Fix PR#.
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob1a1e58b1a817717880788f8c5a26b849304f667b
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 Free Software
3 Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "pointer-set.h"
87 #include "hashtab.h"
88 #include "tree-chrec.h"
89 #include "tree-scalar-evolution.h"
90 #include "cfgloop.h"
91 #include "params.h"
92 #include "langhooks.h"
93 #include "tree-affine.h"
94 #include "target.h"
96 /* The infinite cost. */
97 #define INFTY 10000000
99 /* The expected number of loop iterations. TODO -- use profiling instead of
100 this. */
101 #define AVG_LOOP_NITER(LOOP) 5
104 /* Representation of the induction variable. */
105 struct iv
107 tree base; /* Initial value of the iv. */
108 tree base_object; /* A memory object to that the induction variable points. */
109 tree step; /* Step of the iv (constant only). */
110 tree ssa_name; /* The ssa name with the value. */
111 bool biv_p; /* Is it a biv? */
112 bool have_use_for; /* Do we already have a use for it? */
113 unsigned use_id; /* The identifier in the use if it is the case. */
116 /* Per-ssa version information (induction variable descriptions, etc.). */
117 struct version_info
119 tree name; /* The ssa name. */
120 struct iv *iv; /* Induction variable description. */
121 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
122 an expression that is not an induction variable. */
123 unsigned inv_id; /* Id of an invariant. */
124 bool preserve_biv; /* For the original biv, whether to preserve it. */
127 /* Types of uses. */
128 enum use_type
130 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
131 USE_ADDRESS, /* Use in an address. */
132 USE_COMPARE /* Use is a compare. */
135 /* Cost of a computation. */
136 typedef struct
138 unsigned cost; /* The runtime cost. */
139 unsigned complexity; /* The estimate of the complexity of the code for
140 the computation (in no concrete units --
141 complexity field should be larger for more
142 complex expressions and addressing modes). */
143 } comp_cost;
145 static const comp_cost zero_cost = {0, 0};
146 static const comp_cost infinite_cost = {INFTY, INFTY};
148 /* The candidate - cost pair. */
149 struct cost_pair
151 struct iv_cand *cand; /* The candidate. */
152 comp_cost cost; /* The cost. */
153 bitmap depends_on; /* The list of invariants that have to be
154 preserved. */
155 tree value; /* For final value elimination, the expression for
156 the final value of the iv. For iv elimination,
157 the new bound to compare with. */
160 /* Use. */
161 struct iv_use
163 unsigned id; /* The id of the use. */
164 enum use_type type; /* Type of the use. */
165 struct iv *iv; /* The induction variable it is based on. */
166 gimple stmt; /* Statement in that it occurs. */
167 tree *op_p; /* The place where it occurs. */
168 bitmap related_cands; /* The set of "related" iv candidates, plus the common
169 important ones. */
171 unsigned n_map_members; /* Number of candidates in the cost_map list. */
172 struct cost_pair *cost_map;
173 /* The costs wrto the iv candidates. */
175 struct iv_cand *selected;
176 /* The selected candidate. */
179 /* The position where the iv is computed. */
180 enum iv_position
182 IP_NORMAL, /* At the end, just before the exit condition. */
183 IP_END, /* At the end of the latch block. */
184 IP_ORIGINAL /* The original biv. */
187 /* The induction variable candidate. */
188 struct iv_cand
190 unsigned id; /* The number of the candidate. */
191 bool important; /* Whether this is an "important" candidate, i.e. such
192 that it should be considered by all uses. */
193 enum iv_position pos; /* Where it is computed. */
194 gimple incremented_at;/* For original biv, the statement where it is
195 incremented. */
196 tree var_before; /* The variable used for it before increment. */
197 tree var_after; /* The variable used for it after increment. */
198 struct iv *iv; /* The value of the candidate. NULL for
199 "pseudocandidate" used to indicate the possibility
200 to replace the final value of an iv by direct
201 computation of the value. */
202 unsigned cost; /* Cost of the candidate. */
203 bitmap depends_on; /* The list of invariants that are used in step of the
204 biv. */
207 /* The data used by the induction variable optimizations. */
209 typedef struct iv_use *iv_use_p;
210 DEF_VEC_P(iv_use_p);
211 DEF_VEC_ALLOC_P(iv_use_p,heap);
213 typedef struct iv_cand *iv_cand_p;
214 DEF_VEC_P(iv_cand_p);
215 DEF_VEC_ALLOC_P(iv_cand_p,heap);
217 struct ivopts_data
219 /* The currently optimized loop. */
220 struct loop *current_loop;
222 /* Number of registers used in it. */
223 unsigned regs_used;
225 /* Numbers of iterations for all exits of the current loop. */
226 struct pointer_map_t *niters;
228 /* The size of version_info array allocated. */
229 unsigned version_info_size;
231 /* The array of information for the ssa names. */
232 struct version_info *version_info;
234 /* The bitmap of indices in version_info whose value was changed. */
235 bitmap relevant;
237 /* The maximum invariant id. */
238 unsigned max_inv_id;
240 /* The uses of induction variables. */
241 VEC(iv_use_p,heap) *iv_uses;
243 /* The candidates. */
244 VEC(iv_cand_p,heap) *iv_candidates;
246 /* A bitmap of important candidates. */
247 bitmap important_candidates;
249 /* Whether to consider just related and important candidates when replacing a
250 use. */
251 bool consider_all_candidates;
254 /* An assignment of iv candidates to uses. */
256 struct iv_ca
258 /* The number of uses covered by the assignment. */
259 unsigned upto;
261 /* Number of uses that cannot be expressed by the candidates in the set. */
262 unsigned bad_uses;
264 /* Candidate assigned to a use, together with the related costs. */
265 struct cost_pair **cand_for_use;
267 /* Number of times each candidate is used. */
268 unsigned *n_cand_uses;
270 /* The candidates used. */
271 bitmap cands;
273 /* The number of candidates in the set. */
274 unsigned n_cands;
276 /* Total number of registers needed. */
277 unsigned n_regs;
279 /* Total cost of expressing uses. */
280 comp_cost cand_use_cost;
282 /* Total cost of candidates. */
283 unsigned cand_cost;
285 /* Number of times each invariant is used. */
286 unsigned *n_invariant_uses;
288 /* Total cost of the assignment. */
289 comp_cost cost;
292 /* Difference of two iv candidate assignments. */
294 struct iv_ca_delta
296 /* Changed use. */
297 struct iv_use *use;
299 /* An old assignment (for rollback purposes). */
300 struct cost_pair *old_cp;
302 /* A new assignment. */
303 struct cost_pair *new_cp;
305 /* Next change in the list. */
306 struct iv_ca_delta *next_change;
309 /* Bound on number of candidates below that all candidates are considered. */
311 #define CONSIDER_ALL_CANDIDATES_BOUND \
312 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
314 /* If there are more iv occurrences, we just give up (it is quite unlikely that
315 optimizing such a loop would help, and it would take ages). */
317 #define MAX_CONSIDERED_USES \
318 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
320 /* If there are at most this number of ivs in the set, try removing unnecessary
321 ivs from the set always. */
323 #define ALWAYS_PRUNE_CAND_SET_BOUND \
324 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
326 /* The list of trees for that the decl_rtl field must be reset is stored
327 here. */
329 static VEC(tree,heap) *decl_rtl_to_reset;
331 /* Number of uses recorded in DATA. */
333 static inline unsigned
334 n_iv_uses (struct ivopts_data *data)
336 return VEC_length (iv_use_p, data->iv_uses);
339 /* Ith use recorded in DATA. */
341 static inline struct iv_use *
342 iv_use (struct ivopts_data *data, unsigned i)
344 return VEC_index (iv_use_p, data->iv_uses, i);
347 /* Number of candidates recorded in DATA. */
349 static inline unsigned
350 n_iv_cands (struct ivopts_data *data)
352 return VEC_length (iv_cand_p, data->iv_candidates);
355 /* Ith candidate recorded in DATA. */
357 static inline struct iv_cand *
358 iv_cand (struct ivopts_data *data, unsigned i)
360 return VEC_index (iv_cand_p, data->iv_candidates, i);
363 /* The single loop exit if it dominates the latch, NULL otherwise. */
365 edge
366 single_dom_exit (struct loop *loop)
368 edge exit = single_exit (loop);
370 if (!exit)
371 return NULL;
373 if (!just_once_each_iteration_p (loop, exit->src))
374 return NULL;
376 return exit;
379 /* Dumps information about the induction variable IV to FILE. */
381 extern void dump_iv (FILE *, struct iv *);
382 void
383 dump_iv (FILE *file, struct iv *iv)
385 if (iv->ssa_name)
387 fprintf (file, "ssa name ");
388 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
389 fprintf (file, "\n");
392 fprintf (file, " type ");
393 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
394 fprintf (file, "\n");
396 if (iv->step)
398 fprintf (file, " base ");
399 print_generic_expr (file, iv->base, TDF_SLIM);
400 fprintf (file, "\n");
402 fprintf (file, " step ");
403 print_generic_expr (file, iv->step, TDF_SLIM);
404 fprintf (file, "\n");
406 else
408 fprintf (file, " invariant ");
409 print_generic_expr (file, iv->base, TDF_SLIM);
410 fprintf (file, "\n");
413 if (iv->base_object)
415 fprintf (file, " base object ");
416 print_generic_expr (file, iv->base_object, TDF_SLIM);
417 fprintf (file, "\n");
420 if (iv->biv_p)
421 fprintf (file, " is a biv\n");
424 /* Dumps information about the USE to FILE. */
426 extern void dump_use (FILE *, struct iv_use *);
427 void
428 dump_use (FILE *file, struct iv_use *use)
430 fprintf (file, "use %d\n", use->id);
432 switch (use->type)
434 case USE_NONLINEAR_EXPR:
435 fprintf (file, " generic\n");
436 break;
438 case USE_ADDRESS:
439 fprintf (file, " address\n");
440 break;
442 case USE_COMPARE:
443 fprintf (file, " compare\n");
444 break;
446 default:
447 gcc_unreachable ();
450 fprintf (file, " in statement ");
451 print_gimple_stmt (file, use->stmt, 0, 0);
452 fprintf (file, "\n");
454 fprintf (file, " at position ");
455 if (use->op_p)
456 print_generic_expr (file, *use->op_p, TDF_SLIM);
457 fprintf (file, "\n");
459 dump_iv (file, use->iv);
461 if (use->related_cands)
463 fprintf (file, " related candidates ");
464 dump_bitmap (file, use->related_cands);
468 /* Dumps information about the uses to FILE. */
470 extern void dump_uses (FILE *, struct ivopts_data *);
471 void
472 dump_uses (FILE *file, struct ivopts_data *data)
474 unsigned i;
475 struct iv_use *use;
477 for (i = 0; i < n_iv_uses (data); i++)
479 use = iv_use (data, i);
481 dump_use (file, use);
482 fprintf (file, "\n");
486 /* Dumps information about induction variable candidate CAND to FILE. */
488 extern void dump_cand (FILE *, struct iv_cand *);
489 void
490 dump_cand (FILE *file, struct iv_cand *cand)
492 struct iv *iv = cand->iv;
494 fprintf (file, "candidate %d%s\n",
495 cand->id, cand->important ? " (important)" : "");
497 if (cand->depends_on)
499 fprintf (file, " depends on ");
500 dump_bitmap (file, cand->depends_on);
503 if (!iv)
505 fprintf (file, " final value replacement\n");
506 return;
509 switch (cand->pos)
511 case IP_NORMAL:
512 fprintf (file, " incremented before exit test\n");
513 break;
515 case IP_END:
516 fprintf (file, " incremented at end\n");
517 break;
519 case IP_ORIGINAL:
520 fprintf (file, " original biv\n");
521 break;
524 dump_iv (file, iv);
527 /* Returns the info for ssa version VER. */
529 static inline struct version_info *
530 ver_info (struct ivopts_data *data, unsigned ver)
532 return data->version_info + ver;
535 /* Returns the info for ssa name NAME. */
537 static inline struct version_info *
538 name_info (struct ivopts_data *data, tree name)
540 return ver_info (data, SSA_NAME_VERSION (name));
543 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
544 emitted in LOOP. */
546 static bool
547 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
549 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
551 gcc_assert (bb);
553 if (sbb == loop->latch)
554 return true;
556 if (sbb != bb)
557 return false;
559 return stmt == last_stmt (bb);
562 /* Returns true if STMT if after the place where the original induction
563 variable CAND is incremented. */
565 static bool
566 stmt_after_ip_original_pos (struct iv_cand *cand, gimple stmt)
568 basic_block cand_bb = gimple_bb (cand->incremented_at);
569 basic_block stmt_bb = gimple_bb (stmt);
570 gimple_stmt_iterator bsi;
572 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
573 return false;
575 if (stmt_bb != cand_bb)
576 return true;
578 /* Scan the block from the end, since the original ivs are usually
579 incremented at the end of the loop body. */
580 for (bsi = gsi_last_bb (stmt_bb); ; gsi_prev (&bsi))
582 if (gsi_stmt (bsi) == cand->incremented_at)
583 return false;
584 if (gsi_stmt (bsi) == stmt)
585 return true;
589 /* Returns true if STMT if after the place where the induction variable
590 CAND is incremented in LOOP. */
592 static bool
593 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
595 switch (cand->pos)
597 case IP_END:
598 return false;
600 case IP_NORMAL:
601 return stmt_after_ip_normal_pos (loop, stmt);
603 case IP_ORIGINAL:
604 return stmt_after_ip_original_pos (cand, stmt);
606 default:
607 gcc_unreachable ();
611 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
613 static bool
614 abnormal_ssa_name_p (tree exp)
616 if (!exp)
617 return false;
619 if (TREE_CODE (exp) != SSA_NAME)
620 return false;
622 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
625 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
626 abnormal phi node. Callback for for_each_index. */
628 static bool
629 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
630 void *data ATTRIBUTE_UNUSED)
632 if (TREE_CODE (base) == ARRAY_REF)
634 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
635 return false;
636 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
637 return false;
640 return !abnormal_ssa_name_p (*index);
643 /* Returns true if EXPR contains a ssa name that occurs in an
644 abnormal phi node. */
646 bool
647 contains_abnormal_ssa_name_p (tree expr)
649 enum tree_code code;
650 enum tree_code_class codeclass;
652 if (!expr)
653 return false;
655 code = TREE_CODE (expr);
656 codeclass = TREE_CODE_CLASS (code);
658 if (code == SSA_NAME)
659 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
661 if (code == INTEGER_CST
662 || is_gimple_min_invariant (expr))
663 return false;
665 if (code == ADDR_EXPR)
666 return !for_each_index (&TREE_OPERAND (expr, 0),
667 idx_contains_abnormal_ssa_name_p,
668 NULL);
670 switch (codeclass)
672 case tcc_binary:
673 case tcc_comparison:
674 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
675 return true;
677 /* Fallthru. */
678 case tcc_unary:
679 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
680 return true;
682 break;
684 default:
685 gcc_unreachable ();
688 return false;
691 /* Returns tree describing number of iterations determined from
692 EXIT of DATA->current_loop, or NULL if something goes wrong. */
694 static tree
695 niter_for_exit (struct ivopts_data *data, edge exit)
697 struct tree_niter_desc desc;
698 tree niter;
699 void **slot;
701 if (!data->niters)
703 data->niters = pointer_map_create ();
704 slot = NULL;
706 else
707 slot = pointer_map_contains (data->niters, exit);
709 if (!slot)
711 /* Try to determine number of iterations. We must know it
712 unconditionally (i.e., without possibility of # of iterations
713 being zero). Also, we cannot safely work with ssa names that
714 appear in phi nodes on abnormal edges, so that we do not create
715 overlapping life ranges for them (PR 27283). */
716 if (number_of_iterations_exit (data->current_loop,
717 exit, &desc, true)
718 && integer_zerop (desc.may_be_zero)
719 && !contains_abnormal_ssa_name_p (desc.niter))
720 niter = desc.niter;
721 else
722 niter = NULL_TREE;
724 *pointer_map_insert (data->niters, exit) = niter;
726 else
727 niter = (tree) *slot;
729 return niter;
732 /* Returns tree describing number of iterations determined from
733 single dominating exit of DATA->current_loop, or NULL if something
734 goes wrong. */
736 static tree
737 niter_for_single_dom_exit (struct ivopts_data *data)
739 edge exit = single_dom_exit (data->current_loop);
741 if (!exit)
742 return NULL;
744 return niter_for_exit (data, exit);
747 /* Initializes data structures used by the iv optimization pass, stored
748 in DATA. */
750 static void
751 tree_ssa_iv_optimize_init (struct ivopts_data *data)
753 data->version_info_size = 2 * num_ssa_names;
754 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
755 data->relevant = BITMAP_ALLOC (NULL);
756 data->important_candidates = BITMAP_ALLOC (NULL);
757 data->max_inv_id = 0;
758 data->niters = NULL;
759 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
760 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
761 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
764 /* Returns a memory object to that EXPR points. In case we are able to
765 determine that it does not point to any such object, NULL is returned. */
767 static tree
768 determine_base_object (tree expr)
770 enum tree_code code = TREE_CODE (expr);
771 tree base, obj;
773 /* If this is a pointer casted to any type, we need to determine
774 the base object for the pointer; so handle conversions before
775 throwing away non-pointer expressions. */
776 if (CONVERT_EXPR_P (expr))
777 return determine_base_object (TREE_OPERAND (expr, 0));
779 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
780 return NULL_TREE;
782 switch (code)
784 case INTEGER_CST:
785 return NULL_TREE;
787 case ADDR_EXPR:
788 obj = TREE_OPERAND (expr, 0);
789 base = get_base_address (obj);
791 if (!base)
792 return expr;
794 if (TREE_CODE (base) == INDIRECT_REF)
795 return determine_base_object (TREE_OPERAND (base, 0));
797 return fold_convert (ptr_type_node,
798 build_fold_addr_expr (base));
800 case POINTER_PLUS_EXPR:
801 return determine_base_object (TREE_OPERAND (expr, 0));
803 case PLUS_EXPR:
804 case MINUS_EXPR:
805 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
806 gcc_unreachable ();
808 default:
809 return fold_convert (ptr_type_node, expr);
813 /* Allocates an induction variable with given initial value BASE and step STEP
814 for loop LOOP. */
816 static struct iv *
817 alloc_iv (tree base, tree step)
819 struct iv *iv = XCNEW (struct iv);
820 gcc_assert (step != NULL_TREE);
822 iv->base = base;
823 iv->base_object = determine_base_object (base);
824 iv->step = step;
825 iv->biv_p = false;
826 iv->have_use_for = false;
827 iv->use_id = 0;
828 iv->ssa_name = NULL_TREE;
830 return iv;
833 /* Sets STEP and BASE for induction variable IV. */
835 static void
836 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
838 struct version_info *info = name_info (data, iv);
840 gcc_assert (!info->iv);
842 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
843 info->iv = alloc_iv (base, step);
844 info->iv->ssa_name = iv;
847 /* Finds induction variable declaration for VAR. */
849 static struct iv *
850 get_iv (struct ivopts_data *data, tree var)
852 basic_block bb;
853 tree type = TREE_TYPE (var);
855 if (!POINTER_TYPE_P (type)
856 && !INTEGRAL_TYPE_P (type))
857 return NULL;
859 if (!name_info (data, var)->iv)
861 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
863 if (!bb
864 || !flow_bb_inside_loop_p (data->current_loop, bb))
865 set_iv (data, var, var, build_int_cst (type, 0));
868 return name_info (data, var)->iv;
871 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
872 not define a simple affine biv with nonzero step. */
874 static tree
875 determine_biv_step (gimple phi)
877 struct loop *loop = gimple_bb (phi)->loop_father;
878 tree name = PHI_RESULT (phi);
879 affine_iv iv;
881 if (!is_gimple_reg (name))
882 return NULL_TREE;
884 if (!simple_iv (loop, phi, name, &iv, true))
885 return NULL_TREE;
887 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
890 /* Finds basic ivs. */
892 static bool
893 find_bivs (struct ivopts_data *data)
895 gimple phi;
896 tree step, type, base;
897 bool found = false;
898 struct loop *loop = data->current_loop;
899 gimple_stmt_iterator psi;
901 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
903 phi = gsi_stmt (psi);
905 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
906 continue;
908 step = determine_biv_step (phi);
909 if (!step)
910 continue;
912 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
913 base = expand_simple_operations (base);
914 if (contains_abnormal_ssa_name_p (base)
915 || contains_abnormal_ssa_name_p (step))
916 continue;
918 type = TREE_TYPE (PHI_RESULT (phi));
919 base = fold_convert (type, base);
920 if (step)
922 if (POINTER_TYPE_P (type))
923 step = fold_convert (sizetype, step);
924 else
925 step = fold_convert (type, step);
928 set_iv (data, PHI_RESULT (phi), base, step);
929 found = true;
932 return found;
935 /* Marks basic ivs. */
937 static void
938 mark_bivs (struct ivopts_data *data)
940 gimple phi;
941 tree var;
942 struct iv *iv, *incr_iv;
943 struct loop *loop = data->current_loop;
944 basic_block incr_bb;
945 gimple_stmt_iterator psi;
947 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
949 phi = gsi_stmt (psi);
951 iv = get_iv (data, PHI_RESULT (phi));
952 if (!iv)
953 continue;
955 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
956 incr_iv = get_iv (data, var);
957 if (!incr_iv)
958 continue;
960 /* If the increment is in the subloop, ignore it. */
961 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
962 if (incr_bb->loop_father != data->current_loop
963 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
964 continue;
966 iv->biv_p = true;
967 incr_iv->biv_p = true;
971 /* Checks whether STMT defines a linear induction variable and stores its
972 parameters to IV. */
974 static bool
975 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
977 tree lhs;
978 struct loop *loop = data->current_loop;
980 iv->base = NULL_TREE;
981 iv->step = NULL_TREE;
983 if (gimple_code (stmt) != GIMPLE_ASSIGN)
984 return false;
986 lhs = gimple_assign_lhs (stmt);
987 if (TREE_CODE (lhs) != SSA_NAME)
988 return false;
990 if (!simple_iv (loop, stmt, lhs, iv, true))
991 return false;
992 iv->base = expand_simple_operations (iv->base);
994 if (contains_abnormal_ssa_name_p (iv->base)
995 || contains_abnormal_ssa_name_p (iv->step))
996 return false;
998 return true;
1001 /* Finds general ivs in statement STMT. */
1003 static void
1004 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1006 affine_iv iv;
1008 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1009 return;
1011 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1014 /* Finds general ivs in basic block BB. */
1016 static void
1017 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1019 gimple_stmt_iterator bsi;
1021 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1022 find_givs_in_stmt (data, gsi_stmt (bsi));
1025 /* Finds general ivs. */
1027 static void
1028 find_givs (struct ivopts_data *data)
1030 struct loop *loop = data->current_loop;
1031 basic_block *body = get_loop_body_in_dom_order (loop);
1032 unsigned i;
1034 for (i = 0; i < loop->num_nodes; i++)
1035 find_givs_in_bb (data, body[i]);
1036 free (body);
1039 /* For each ssa name defined in LOOP determines whether it is an induction
1040 variable and if so, its initial value and step. */
1042 static bool
1043 find_induction_variables (struct ivopts_data *data)
1045 unsigned i;
1046 bitmap_iterator bi;
1048 if (!find_bivs (data))
1049 return false;
1051 find_givs (data);
1052 mark_bivs (data);
1054 if (dump_file && (dump_flags & TDF_DETAILS))
1056 tree niter = niter_for_single_dom_exit (data);
1058 if (niter)
1060 fprintf (dump_file, " number of iterations ");
1061 print_generic_expr (dump_file, niter, TDF_SLIM);
1062 fprintf (dump_file, "\n\n");
1065 fprintf (dump_file, "Induction variables:\n\n");
1067 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1069 if (ver_info (data, i)->iv)
1070 dump_iv (dump_file, ver_info (data, i)->iv);
1074 return true;
1077 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1079 static struct iv_use *
1080 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1081 gimple stmt, enum use_type use_type)
1083 struct iv_use *use = XCNEW (struct iv_use);
1085 use->id = n_iv_uses (data);
1086 use->type = use_type;
1087 use->iv = iv;
1088 use->stmt = stmt;
1089 use->op_p = use_p;
1090 use->related_cands = BITMAP_ALLOC (NULL);
1092 /* To avoid showing ssa name in the dumps, if it was not reset by the
1093 caller. */
1094 iv->ssa_name = NULL_TREE;
1096 if (dump_file && (dump_flags & TDF_DETAILS))
1097 dump_use (dump_file, use);
1099 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1101 return use;
1104 /* Checks whether OP is a loop-level invariant and if so, records it.
1105 NONLINEAR_USE is true if the invariant is used in a way we do not
1106 handle specially. */
1108 static void
1109 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1111 basic_block bb;
1112 struct version_info *info;
1114 if (TREE_CODE (op) != SSA_NAME
1115 || !is_gimple_reg (op))
1116 return;
1118 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1119 if (bb
1120 && flow_bb_inside_loop_p (data->current_loop, bb))
1121 return;
1123 info = name_info (data, op);
1124 info->name = op;
1125 info->has_nonlin_use |= nonlinear_use;
1126 if (!info->inv_id)
1127 info->inv_id = ++data->max_inv_id;
1128 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1131 /* Checks whether the use OP is interesting and if so, records it. */
1133 static struct iv_use *
1134 find_interesting_uses_op (struct ivopts_data *data, tree op)
1136 struct iv *iv;
1137 struct iv *civ;
1138 gimple stmt;
1139 struct iv_use *use;
1141 if (TREE_CODE (op) != SSA_NAME)
1142 return NULL;
1144 iv = get_iv (data, op);
1145 if (!iv)
1146 return NULL;
1148 if (iv->have_use_for)
1150 use = iv_use (data, iv->use_id);
1152 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1153 return use;
1156 if (integer_zerop (iv->step))
1158 record_invariant (data, op, true);
1159 return NULL;
1161 iv->have_use_for = true;
1163 civ = XNEW (struct iv);
1164 *civ = *iv;
1166 stmt = SSA_NAME_DEF_STMT (op);
1167 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1168 || is_gimple_assign (stmt));
1170 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1171 iv->use_id = use->id;
1173 return use;
1176 /* Given a condition in statement STMT, checks whether it is a compare
1177 of an induction variable and an invariant. If this is the case,
1178 CONTROL_VAR is set to location of the iv, BOUND to the location of
1179 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1180 induction variable descriptions, and true is returned. If this is not
1181 the case, CONTROL_VAR and BOUND are set to the arguments of the
1182 condition and false is returned. */
1184 static bool
1185 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1186 tree **control_var, tree **bound,
1187 struct iv **iv_var, struct iv **iv_bound)
1189 /* The objects returned when COND has constant operands. */
1190 static struct iv const_iv;
1191 static tree zero;
1192 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1193 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1194 bool ret = false;
1196 if (gimple_code (stmt) == GIMPLE_COND)
1198 op0 = gimple_cond_lhs_ptr (stmt);
1199 op1 = gimple_cond_rhs_ptr (stmt);
1201 else
1203 op0 = gimple_assign_rhs1_ptr (stmt);
1204 op1 = gimple_assign_rhs2_ptr (stmt);
1207 zero = integer_zero_node;
1208 const_iv.step = integer_zero_node;
1210 if (TREE_CODE (*op0) == SSA_NAME)
1211 iv0 = get_iv (data, *op0);
1212 if (TREE_CODE (*op1) == SSA_NAME)
1213 iv1 = get_iv (data, *op1);
1215 /* Exactly one of the compared values must be an iv, and the other one must
1216 be an invariant. */
1217 if (!iv0 || !iv1)
1218 goto end;
1220 if (integer_zerop (iv0->step))
1222 /* Control variable may be on the other side. */
1223 tmp_op = op0; op0 = op1; op1 = tmp_op;
1224 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1226 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1228 end:
1229 if (control_var)
1230 *control_var = op0;;
1231 if (iv_var)
1232 *iv_var = iv0;;
1233 if (bound)
1234 *bound = op1;
1235 if (iv_bound)
1236 *iv_bound = iv1;
1238 return ret;
1241 /* Checks whether the condition in STMT is interesting and if so,
1242 records it. */
1244 static void
1245 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1247 tree *var_p, *bound_p;
1248 struct iv *var_iv, *civ;
1250 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1252 find_interesting_uses_op (data, *var_p);
1253 find_interesting_uses_op (data, *bound_p);
1254 return;
1257 civ = XNEW (struct iv);
1258 *civ = *var_iv;
1259 record_use (data, NULL, civ, stmt, USE_COMPARE);
1262 /* Returns true if expression EXPR is obviously invariant in LOOP,
1263 i.e. if all its operands are defined outside of the LOOP. LOOP
1264 should not be the function body. */
1266 bool
1267 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1269 basic_block def_bb;
1270 unsigned i, len;
1272 gcc_assert (loop_depth (loop) > 0);
1274 if (is_gimple_min_invariant (expr))
1275 return true;
1277 if (TREE_CODE (expr) == SSA_NAME)
1279 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1280 if (def_bb
1281 && flow_bb_inside_loop_p (loop, def_bb))
1282 return false;
1284 return true;
1287 if (!EXPR_P (expr))
1288 return false;
1290 len = TREE_OPERAND_LENGTH (expr);
1291 for (i = 0; i < len; i++)
1292 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1293 return false;
1295 return true;
1298 /* Returns true if statement STMT is obviously invariant in LOOP,
1299 i.e. if all its operands on the RHS are defined outside of the LOOP.
1300 LOOP should not be the function body. */
1302 bool
1303 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1305 unsigned i;
1306 tree lhs;
1308 gcc_assert (loop_depth (loop) > 0);
1310 lhs = gimple_get_lhs (stmt);
1311 for (i = 0; i < gimple_num_ops (stmt); i++)
1313 tree op = gimple_op (stmt, i);
1314 if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1315 return false;
1318 return true;
1321 /* Cumulates the steps of indices into DATA and replaces their values with the
1322 initial ones. Returns false when the value of the index cannot be determined.
1323 Callback for for_each_index. */
1325 struct ifs_ivopts_data
1327 struct ivopts_data *ivopts_data;
1328 gimple stmt;
1329 tree step;
1332 static bool
1333 idx_find_step (tree base, tree *idx, void *data)
1335 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1336 struct iv *iv;
1337 tree step, iv_base, iv_step, lbound, off;
1338 struct loop *loop = dta->ivopts_data->current_loop;
1340 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1341 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1342 return false;
1344 /* If base is a component ref, require that the offset of the reference
1345 be invariant. */
1346 if (TREE_CODE (base) == COMPONENT_REF)
1348 off = component_ref_field_offset (base);
1349 return expr_invariant_in_loop_p (loop, off);
1352 /* If base is array, first check whether we will be able to move the
1353 reference out of the loop (in order to take its address in strength
1354 reduction). In order for this to work we need both lower bound
1355 and step to be loop invariants. */
1356 if (TREE_CODE (base) == ARRAY_REF)
1358 step = array_ref_element_size (base);
1359 lbound = array_ref_low_bound (base);
1361 if (!expr_invariant_in_loop_p (loop, step)
1362 || !expr_invariant_in_loop_p (loop, lbound))
1363 return false;
1366 if (TREE_CODE (*idx) != SSA_NAME)
1367 return true;
1369 iv = get_iv (dta->ivopts_data, *idx);
1370 if (!iv)
1371 return false;
1373 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1374 *&x[0], which is not folded and does not trigger the
1375 ARRAY_REF path below. */
1376 *idx = iv->base;
1378 if (integer_zerop (iv->step))
1379 return true;
1381 if (TREE_CODE (base) == ARRAY_REF)
1383 step = array_ref_element_size (base);
1385 /* We only handle addresses whose step is an integer constant. */
1386 if (TREE_CODE (step) != INTEGER_CST)
1387 return false;
1389 else
1390 /* The step for pointer arithmetics already is 1 byte. */
1391 step = build_int_cst (sizetype, 1);
1393 iv_base = iv->base;
1394 iv_step = iv->step;
1395 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1396 sizetype, &iv_base, &iv_step, dta->stmt,
1397 false))
1399 /* The index might wrap. */
1400 return false;
1403 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1404 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1406 return true;
1409 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1410 object is passed to it in DATA. */
1412 static bool
1413 idx_record_use (tree base, tree *idx,
1414 void *vdata)
1416 struct ivopts_data *data = (struct ivopts_data *) vdata;
1417 find_interesting_uses_op (data, *idx);
1418 if (TREE_CODE (base) == ARRAY_REF)
1420 find_interesting_uses_op (data, array_ref_element_size (base));
1421 find_interesting_uses_op (data, array_ref_low_bound (base));
1423 return true;
1426 /* If we can prove that TOP = cst * BOT for some constant cst,
1427 store cst to MUL and return true. Otherwise return false.
1428 The returned value is always sign-extended, regardless of the
1429 signedness of TOP and BOT. */
1431 static bool
1432 constant_multiple_of (tree top, tree bot, double_int *mul)
1434 tree mby;
1435 enum tree_code code;
1436 double_int res, p0, p1;
1437 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1439 STRIP_NOPS (top);
1440 STRIP_NOPS (bot);
1442 if (operand_equal_p (top, bot, 0))
1444 *mul = double_int_one;
1445 return true;
1448 code = TREE_CODE (top);
1449 switch (code)
1451 case MULT_EXPR:
1452 mby = TREE_OPERAND (top, 1);
1453 if (TREE_CODE (mby) != INTEGER_CST)
1454 return false;
1456 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1457 return false;
1459 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1460 precision);
1461 return true;
1463 case PLUS_EXPR:
1464 case MINUS_EXPR:
1465 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1466 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1467 return false;
1469 if (code == MINUS_EXPR)
1470 p1 = double_int_neg (p1);
1471 *mul = double_int_sext (double_int_add (p0, p1), precision);
1472 return true;
1474 case INTEGER_CST:
1475 if (TREE_CODE (bot) != INTEGER_CST)
1476 return false;
1478 p0 = double_int_sext (tree_to_double_int (top), precision);
1479 p1 = double_int_sext (tree_to_double_int (bot), precision);
1480 if (double_int_zero_p (p1))
1481 return false;
1482 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1483 precision);
1484 return double_int_zero_p (res);
1486 default:
1487 return false;
1491 /* Returns true if memory reference REF with step STEP may be unaligned. */
1493 static bool
1494 may_be_unaligned_p (tree ref, tree step)
1496 tree base;
1497 tree base_type;
1498 HOST_WIDE_INT bitsize;
1499 HOST_WIDE_INT bitpos;
1500 tree toffset;
1501 enum machine_mode mode;
1502 int unsignedp, volatilep;
1503 unsigned base_align;
1505 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1506 thus they are not misaligned. */
1507 if (TREE_CODE (ref) == TARGET_MEM_REF)
1508 return false;
1510 /* The test below is basically copy of what expr.c:normal_inner_ref
1511 does to check whether the object must be loaded by parts when
1512 STRICT_ALIGNMENT is true. */
1513 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1514 &unsignedp, &volatilep, true);
1515 base_type = TREE_TYPE (base);
1516 base_align = TYPE_ALIGN (base_type);
1518 if (mode != BLKmode)
1520 double_int mul;
1521 tree al = build_int_cst (TREE_TYPE (step),
1522 GET_MODE_ALIGNMENT (mode) / BITS_PER_UNIT);
1524 if (base_align < GET_MODE_ALIGNMENT (mode)
1525 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1526 || bitpos % BITS_PER_UNIT != 0)
1527 return true;
1529 if (!constant_multiple_of (step, al, &mul))
1530 return true;
1533 return false;
1536 /* Return true if EXPR may be non-addressable. */
1538 static bool
1539 may_be_nonaddressable_p (tree expr)
1541 switch (TREE_CODE (expr))
1543 case TARGET_MEM_REF:
1544 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1545 target, thus they are always addressable. */
1546 return false;
1548 case COMPONENT_REF:
1549 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1550 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1552 case VIEW_CONVERT_EXPR:
1553 /* This kind of view-conversions may wrap non-addressable objects
1554 and make them look addressable. After some processing the
1555 non-addressability may be uncovered again, causing ADDR_EXPRs
1556 of inappropriate objects to be built. */
1557 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1558 || is_gimple_min_invariant (TREE_OPERAND (expr, 0)))
1559 return true;
1561 /* ... fall through ... */
1563 case ARRAY_REF:
1564 case ARRAY_RANGE_REF:
1565 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1567 CASE_CONVERT:
1568 return true;
1570 default:
1571 break;
1574 return false;
1577 /* Finds addresses in *OP_P inside STMT. */
1579 static void
1580 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1582 tree base = *op_p, step = build_int_cst (sizetype, 0);
1583 struct iv *civ;
1584 struct ifs_ivopts_data ifs_ivopts_data;
1586 /* Do not play with volatile memory references. A bit too conservative,
1587 perhaps, but safe. */
1588 if (gimple_has_volatile_ops (stmt))
1589 goto fail;
1591 /* Ignore bitfields for now. Not really something terribly complicated
1592 to handle. TODO. */
1593 if (TREE_CODE (base) == BIT_FIELD_REF)
1594 goto fail;
1596 base = unshare_expr (base);
1598 if (TREE_CODE (base) == TARGET_MEM_REF)
1600 tree type = build_pointer_type (TREE_TYPE (base));
1601 tree astep;
1603 if (TMR_BASE (base)
1604 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1606 civ = get_iv (data, TMR_BASE (base));
1607 if (!civ)
1608 goto fail;
1610 TMR_BASE (base) = civ->base;
1611 step = civ->step;
1613 if (TMR_INDEX (base)
1614 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1616 civ = get_iv (data, TMR_INDEX (base));
1617 if (!civ)
1618 goto fail;
1620 TMR_INDEX (base) = civ->base;
1621 astep = civ->step;
1623 if (astep)
1625 if (TMR_STEP (base))
1626 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1628 step = fold_build2 (PLUS_EXPR, type, step, astep);
1632 if (integer_zerop (step))
1633 goto fail;
1634 base = tree_mem_ref_addr (type, base);
1636 else
1638 ifs_ivopts_data.ivopts_data = data;
1639 ifs_ivopts_data.stmt = stmt;
1640 ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1641 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1642 || integer_zerop (ifs_ivopts_data.step))
1643 goto fail;
1644 step = ifs_ivopts_data.step;
1646 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1647 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1649 /* Check that the base expression is addressable. This needs
1650 to be done after substituting bases of IVs into it. */
1651 if (may_be_nonaddressable_p (base))
1652 goto fail;
1654 /* Moreover, on strict alignment platforms, check that it is
1655 sufficiently aligned. */
1656 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1657 goto fail;
1659 base = build_fold_addr_expr (base);
1661 /* Substituting bases of IVs into the base expression might
1662 have caused folding opportunities. */
1663 if (TREE_CODE (base) == ADDR_EXPR)
1665 tree *ref = &TREE_OPERAND (base, 0);
1666 while (handled_component_p (*ref))
1667 ref = &TREE_OPERAND (*ref, 0);
1668 if (TREE_CODE (*ref) == INDIRECT_REF)
1669 *ref = fold_indirect_ref (*ref);
1673 civ = alloc_iv (base, step);
1674 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1675 return;
1677 fail:
1678 for_each_index (op_p, idx_record_use, data);
1681 /* Finds and records invariants used in STMT. */
1683 static void
1684 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1686 ssa_op_iter iter;
1687 use_operand_p use_p;
1688 tree op;
1690 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1692 op = USE_FROM_PTR (use_p);
1693 record_invariant (data, op, false);
1697 /* Finds interesting uses of induction variables in the statement STMT. */
1699 static void
1700 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1702 struct iv *iv;
1703 tree op, *lhs, *rhs;
1704 ssa_op_iter iter;
1705 use_operand_p use_p;
1706 enum tree_code code;
1708 find_invariants_stmt (data, stmt);
1710 if (gimple_code (stmt) == GIMPLE_COND)
1712 find_interesting_uses_cond (data, stmt);
1713 return;
1716 if (is_gimple_assign (stmt))
1718 lhs = gimple_assign_lhs_ptr (stmt);
1719 rhs = gimple_assign_rhs1_ptr (stmt);
1721 if (TREE_CODE (*lhs) == SSA_NAME)
1723 /* If the statement defines an induction variable, the uses are not
1724 interesting by themselves. */
1726 iv = get_iv (data, *lhs);
1728 if (iv && !integer_zerop (iv->step))
1729 return;
1732 code = gimple_assign_rhs_code (stmt);
1733 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1734 && (REFERENCE_CLASS_P (*rhs)
1735 || is_gimple_val (*rhs)))
1737 if (REFERENCE_CLASS_P (*rhs))
1738 find_interesting_uses_address (data, stmt, rhs);
1739 else
1740 find_interesting_uses_op (data, *rhs);
1742 if (REFERENCE_CLASS_P (*lhs))
1743 find_interesting_uses_address (data, stmt, lhs);
1744 return;
1746 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1748 find_interesting_uses_cond (data, stmt);
1749 return;
1752 /* TODO -- we should also handle address uses of type
1754 memory = call (whatever);
1758 call (memory). */
1761 if (gimple_code (stmt) == GIMPLE_PHI
1762 && gimple_bb (stmt) == data->current_loop->header)
1764 iv = get_iv (data, PHI_RESULT (stmt));
1766 if (iv && !integer_zerop (iv->step))
1767 return;
1770 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1772 op = USE_FROM_PTR (use_p);
1774 if (TREE_CODE (op) != SSA_NAME)
1775 continue;
1777 iv = get_iv (data, op);
1778 if (!iv)
1779 continue;
1781 find_interesting_uses_op (data, op);
1785 /* Finds interesting uses of induction variables outside of loops
1786 on loop exit edge EXIT. */
1788 static void
1789 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1791 gimple phi;
1792 gimple_stmt_iterator psi;
1793 tree def;
1795 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1797 phi = gsi_stmt (psi);
1798 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1799 if (is_gimple_reg (def))
1800 find_interesting_uses_op (data, def);
1804 /* Finds uses of the induction variables that are interesting. */
1806 static void
1807 find_interesting_uses (struct ivopts_data *data)
1809 basic_block bb;
1810 gimple_stmt_iterator bsi;
1811 basic_block *body = get_loop_body (data->current_loop);
1812 unsigned i;
1813 struct version_info *info;
1814 edge e;
1816 if (dump_file && (dump_flags & TDF_DETAILS))
1817 fprintf (dump_file, "Uses:\n\n");
1819 for (i = 0; i < data->current_loop->num_nodes; i++)
1821 edge_iterator ei;
1822 bb = body[i];
1824 FOR_EACH_EDGE (e, ei, bb->succs)
1825 if (e->dest != EXIT_BLOCK_PTR
1826 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1827 find_interesting_uses_outside (data, e);
1829 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1830 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1831 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1832 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1835 if (dump_file && (dump_flags & TDF_DETAILS))
1837 bitmap_iterator bi;
1839 fprintf (dump_file, "\n");
1841 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1843 info = ver_info (data, i);
1844 if (info->inv_id)
1846 fprintf (dump_file, " ");
1847 print_generic_expr (dump_file, info->name, TDF_SLIM);
1848 fprintf (dump_file, " is invariant (%d)%s\n",
1849 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1853 fprintf (dump_file, "\n");
1856 free (body);
1859 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1860 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1861 we are at the top-level of the processed address. */
1863 static tree
1864 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1865 unsigned HOST_WIDE_INT *offset)
1867 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1868 enum tree_code code;
1869 tree type, orig_type = TREE_TYPE (expr);
1870 unsigned HOST_WIDE_INT off0, off1, st;
1871 tree orig_expr = expr;
1873 STRIP_NOPS (expr);
1875 type = TREE_TYPE (expr);
1876 code = TREE_CODE (expr);
1877 *offset = 0;
1879 switch (code)
1881 case INTEGER_CST:
1882 if (!cst_and_fits_in_hwi (expr)
1883 || integer_zerop (expr))
1884 return orig_expr;
1886 *offset = int_cst_value (expr);
1887 return build_int_cst (orig_type, 0);
1889 case POINTER_PLUS_EXPR:
1890 case PLUS_EXPR:
1891 case MINUS_EXPR:
1892 op0 = TREE_OPERAND (expr, 0);
1893 op1 = TREE_OPERAND (expr, 1);
1895 op0 = strip_offset_1 (op0, false, false, &off0);
1896 op1 = strip_offset_1 (op1, false, false, &off1);
1898 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
1899 if (op0 == TREE_OPERAND (expr, 0)
1900 && op1 == TREE_OPERAND (expr, 1))
1901 return orig_expr;
1903 if (integer_zerop (op1))
1904 expr = op0;
1905 else if (integer_zerop (op0))
1907 if (code == MINUS_EXPR)
1908 expr = fold_build1 (NEGATE_EXPR, type, op1);
1909 else
1910 expr = op1;
1912 else
1913 expr = fold_build2 (code, type, op0, op1);
1915 return fold_convert (orig_type, expr);
1917 case ARRAY_REF:
1918 if (!inside_addr)
1919 return orig_expr;
1921 step = array_ref_element_size (expr);
1922 if (!cst_and_fits_in_hwi (step))
1923 break;
1925 st = int_cst_value (step);
1926 op1 = TREE_OPERAND (expr, 1);
1927 op1 = strip_offset_1 (op1, false, false, &off1);
1928 *offset = off1 * st;
1930 if (top_compref
1931 && integer_zerop (op1))
1933 /* Strip the component reference completely. */
1934 op0 = TREE_OPERAND (expr, 0);
1935 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1936 *offset += off0;
1937 return op0;
1939 break;
1941 case COMPONENT_REF:
1942 if (!inside_addr)
1943 return orig_expr;
1945 tmp = component_ref_field_offset (expr);
1946 if (top_compref
1947 && cst_and_fits_in_hwi (tmp))
1949 /* Strip the component reference completely. */
1950 op0 = TREE_OPERAND (expr, 0);
1951 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1952 *offset = off0 + int_cst_value (tmp);
1953 return op0;
1955 break;
1957 case ADDR_EXPR:
1958 op0 = TREE_OPERAND (expr, 0);
1959 op0 = strip_offset_1 (op0, true, true, &off0);
1960 *offset += off0;
1962 if (op0 == TREE_OPERAND (expr, 0))
1963 return orig_expr;
1965 expr = build_fold_addr_expr (op0);
1966 return fold_convert (orig_type, expr);
1968 case INDIRECT_REF:
1969 inside_addr = false;
1970 break;
1972 default:
1973 return orig_expr;
1976 /* Default handling of expressions for that we want to recurse into
1977 the first operand. */
1978 op0 = TREE_OPERAND (expr, 0);
1979 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1980 *offset += off0;
1982 if (op0 == TREE_OPERAND (expr, 0)
1983 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1984 return orig_expr;
1986 expr = copy_node (expr);
1987 TREE_OPERAND (expr, 0) = op0;
1988 if (op1)
1989 TREE_OPERAND (expr, 1) = op1;
1991 /* Inside address, we might strip the top level component references,
1992 thus changing type of the expression. Handling of ADDR_EXPR
1993 will fix that. */
1994 expr = fold_convert (orig_type, expr);
1996 return expr;
1999 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2001 static tree
2002 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2004 return strip_offset_1 (expr, false, false, offset);
2007 /* Returns variant of TYPE that can be used as base for different uses.
2008 We return unsigned type with the same precision, which avoids problems
2009 with overflows. */
2011 static tree
2012 generic_type_for (tree type)
2014 if (POINTER_TYPE_P (type))
2015 return unsigned_type_for (type);
2017 if (TYPE_UNSIGNED (type))
2018 return type;
2020 return unsigned_type_for (type);
2023 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2024 the bitmap to that we should store it. */
2026 static struct ivopts_data *fd_ivopts_data;
2027 static tree
2028 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2030 bitmap *depends_on = (bitmap *) data;
2031 struct version_info *info;
2033 if (TREE_CODE (*expr_p) != SSA_NAME)
2034 return NULL_TREE;
2035 info = name_info (fd_ivopts_data, *expr_p);
2037 if (!info->inv_id || info->has_nonlin_use)
2038 return NULL_TREE;
2040 if (!*depends_on)
2041 *depends_on = BITMAP_ALLOC (NULL);
2042 bitmap_set_bit (*depends_on, info->inv_id);
2044 return NULL_TREE;
2047 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2048 position to POS. If USE is not NULL, the candidate is set as related to
2049 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2050 replacement of the final value of the iv by a direct computation. */
2052 static struct iv_cand *
2053 add_candidate_1 (struct ivopts_data *data,
2054 tree base, tree step, bool important, enum iv_position pos,
2055 struct iv_use *use, gimple incremented_at)
2057 unsigned i;
2058 struct iv_cand *cand = NULL;
2059 tree type, orig_type;
2061 if (base)
2063 orig_type = TREE_TYPE (base);
2064 type = generic_type_for (orig_type);
2065 /* Don't convert the base to the generic type for pointers as the generic
2066 type is an integer type with the same size as the pointer type. */
2067 if (type != orig_type && !POINTER_TYPE_P (orig_type))
2069 base = fold_convert (type, base);
2070 step = fold_convert (type, step);
2074 for (i = 0; i < n_iv_cands (data); i++)
2076 cand = iv_cand (data, i);
2078 if (cand->pos != pos)
2079 continue;
2081 if (cand->incremented_at != incremented_at)
2082 continue;
2084 if (!cand->iv)
2086 if (!base && !step)
2087 break;
2089 continue;
2092 if (!base && !step)
2093 continue;
2095 if (operand_equal_p (base, cand->iv->base, 0)
2096 && operand_equal_p (step, cand->iv->step, 0))
2097 break;
2100 if (i == n_iv_cands (data))
2102 cand = XCNEW (struct iv_cand);
2103 cand->id = i;
2105 if (!base && !step)
2106 cand->iv = NULL;
2107 else
2108 cand->iv = alloc_iv (base, step);
2110 cand->pos = pos;
2111 if (pos != IP_ORIGINAL && cand->iv)
2113 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2114 cand->var_after = cand->var_before;
2116 cand->important = important;
2117 cand->incremented_at = incremented_at;
2118 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2120 if (step
2121 && TREE_CODE (step) != INTEGER_CST)
2123 fd_ivopts_data = data;
2124 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2127 if (dump_file && (dump_flags & TDF_DETAILS))
2128 dump_cand (dump_file, cand);
2131 if (important && !cand->important)
2133 cand->important = true;
2134 if (dump_file && (dump_flags & TDF_DETAILS))
2135 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2138 if (use)
2140 bitmap_set_bit (use->related_cands, i);
2141 if (dump_file && (dump_flags & TDF_DETAILS))
2142 fprintf (dump_file, "Candidate %d is related to use %d\n",
2143 cand->id, use->id);
2146 return cand;
2149 /* Returns true if incrementing the induction variable at the end of the LOOP
2150 is allowed.
2152 The purpose is to avoid splitting latch edge with a biv increment, thus
2153 creating a jump, possibly confusing other optimization passes and leaving
2154 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2155 is not available (so we do not have a better alternative), or if the latch
2156 edge is already nonempty. */
2158 static bool
2159 allow_ip_end_pos_p (struct loop *loop)
2161 if (!ip_normal_pos (loop))
2162 return true;
2164 if (!empty_block_p (ip_end_pos (loop)))
2165 return true;
2167 return false;
2170 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2171 position to POS. If USE is not NULL, the candidate is set as related to
2172 it. The candidate computation is scheduled on all available positions. */
2174 static void
2175 add_candidate (struct ivopts_data *data,
2176 tree base, tree step, bool important, struct iv_use *use)
2178 if (ip_normal_pos (data->current_loop))
2179 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2180 if (ip_end_pos (data->current_loop)
2181 && allow_ip_end_pos_p (data->current_loop))
2182 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2185 /* Add a standard "0 + 1 * iteration" iv candidate for a
2186 type with SIZE bits. */
2188 static void
2189 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2190 unsigned int size)
2192 tree type = lang_hooks.types.type_for_size (size, true);
2193 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2194 true, NULL);
2197 /* Adds standard iv candidates. */
2199 static void
2200 add_standard_iv_candidates (struct ivopts_data *data)
2202 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2204 /* The same for a double-integer type if it is still fast enough. */
2205 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2206 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2210 /* Adds candidates bases on the old induction variable IV. */
2212 static void
2213 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2215 gimple phi;
2216 tree def;
2217 struct iv_cand *cand;
2219 add_candidate (data, iv->base, iv->step, true, NULL);
2221 /* The same, but with initial value zero. */
2222 add_candidate (data,
2223 build_int_cst (TREE_TYPE (iv->base), 0),
2224 iv->step, true, NULL);
2226 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2227 if (gimple_code (phi) == GIMPLE_PHI)
2229 /* Additionally record the possibility of leaving the original iv
2230 untouched. */
2231 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2232 cand = add_candidate_1 (data,
2233 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2234 SSA_NAME_DEF_STMT (def));
2235 cand->var_before = iv->ssa_name;
2236 cand->var_after = def;
2240 /* Adds candidates based on the old induction variables. */
2242 static void
2243 add_old_ivs_candidates (struct ivopts_data *data)
2245 unsigned i;
2246 struct iv *iv;
2247 bitmap_iterator bi;
2249 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2251 iv = ver_info (data, i)->iv;
2252 if (iv && iv->biv_p && !integer_zerop (iv->step))
2253 add_old_iv_candidates (data, iv);
2257 /* Adds candidates based on the value of the induction variable IV and USE. */
2259 static void
2260 add_iv_value_candidates (struct ivopts_data *data,
2261 struct iv *iv, struct iv_use *use)
2263 unsigned HOST_WIDE_INT offset;
2264 tree base;
2265 tree basetype;
2267 add_candidate (data, iv->base, iv->step, false, use);
2269 /* The same, but with initial value zero. Make such variable important,
2270 since it is generic enough so that possibly many uses may be based
2271 on it. */
2272 basetype = TREE_TYPE (iv->base);
2273 if (POINTER_TYPE_P (basetype))
2274 basetype = sizetype;
2275 add_candidate (data, build_int_cst (basetype, 0),
2276 iv->step, true, use);
2278 /* Third, try removing the constant offset. Make sure to even
2279 add a candidate for &a[0] vs. (T *)&a. */
2280 base = strip_offset (iv->base, &offset);
2281 if (offset
2282 || base != iv->base)
2283 add_candidate (data, base, iv->step, false, use);
2286 /* Adds candidates based on the uses. */
2288 static void
2289 add_derived_ivs_candidates (struct ivopts_data *data)
2291 unsigned i;
2293 for (i = 0; i < n_iv_uses (data); i++)
2295 struct iv_use *use = iv_use (data, i);
2297 if (!use)
2298 continue;
2300 switch (use->type)
2302 case USE_NONLINEAR_EXPR:
2303 case USE_COMPARE:
2304 case USE_ADDRESS:
2305 /* Just add the ivs based on the value of the iv used here. */
2306 add_iv_value_candidates (data, use->iv, use);
2307 break;
2309 default:
2310 gcc_unreachable ();
2315 /* Record important candidates and add them to related_cands bitmaps
2316 if needed. */
2318 static void
2319 record_important_candidates (struct ivopts_data *data)
2321 unsigned i;
2322 struct iv_use *use;
2324 for (i = 0; i < n_iv_cands (data); i++)
2326 struct iv_cand *cand = iv_cand (data, i);
2328 if (cand->important)
2329 bitmap_set_bit (data->important_candidates, i);
2332 data->consider_all_candidates = (n_iv_cands (data)
2333 <= CONSIDER_ALL_CANDIDATES_BOUND);
2335 if (data->consider_all_candidates)
2337 /* We will not need "related_cands" bitmaps in this case,
2338 so release them to decrease peak memory consumption. */
2339 for (i = 0; i < n_iv_uses (data); i++)
2341 use = iv_use (data, i);
2342 BITMAP_FREE (use->related_cands);
2345 else
2347 /* Add important candidates to the related_cands bitmaps. */
2348 for (i = 0; i < n_iv_uses (data); i++)
2349 bitmap_ior_into (iv_use (data, i)->related_cands,
2350 data->important_candidates);
2354 /* Finds the candidates for the induction variables. */
2356 static void
2357 find_iv_candidates (struct ivopts_data *data)
2359 /* Add commonly used ivs. */
2360 add_standard_iv_candidates (data);
2362 /* Add old induction variables. */
2363 add_old_ivs_candidates (data);
2365 /* Add induction variables derived from uses. */
2366 add_derived_ivs_candidates (data);
2368 /* Record the important candidates. */
2369 record_important_candidates (data);
2372 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2373 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2374 we allocate a simple list to every use. */
2376 static void
2377 alloc_use_cost_map (struct ivopts_data *data)
2379 unsigned i, size, s, j;
2381 for (i = 0; i < n_iv_uses (data); i++)
2383 struct iv_use *use = iv_use (data, i);
2384 bitmap_iterator bi;
2386 if (data->consider_all_candidates)
2387 size = n_iv_cands (data);
2388 else
2390 s = 0;
2391 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2393 s++;
2396 /* Round up to the power of two, so that moduling by it is fast. */
2397 for (size = 1; size < s; size <<= 1)
2398 continue;
2401 use->n_map_members = size;
2402 use->cost_map = XCNEWVEC (struct cost_pair, size);
2406 /* Returns description of computation cost of expression whose runtime
2407 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2409 static comp_cost
2410 new_cost (unsigned runtime, unsigned complexity)
2412 comp_cost cost;
2414 cost.cost = runtime;
2415 cost.complexity = complexity;
2417 return cost;
2420 /* Adds costs COST1 and COST2. */
2422 static comp_cost
2423 add_costs (comp_cost cost1, comp_cost cost2)
2425 cost1.cost += cost2.cost;
2426 cost1.complexity += cost2.complexity;
2428 return cost1;
2430 /* Subtracts costs COST1 and COST2. */
2432 static comp_cost
2433 sub_costs (comp_cost cost1, comp_cost cost2)
2435 cost1.cost -= cost2.cost;
2436 cost1.complexity -= cost2.complexity;
2438 return cost1;
2441 /* Returns a negative number if COST1 < COST2, a positive number if
2442 COST1 > COST2, and 0 if COST1 = COST2. */
2444 static int
2445 compare_costs (comp_cost cost1, comp_cost cost2)
2447 if (cost1.cost == cost2.cost)
2448 return cost1.complexity - cost2.complexity;
2450 return cost1.cost - cost2.cost;
2453 /* Returns true if COST is infinite. */
2455 static bool
2456 infinite_cost_p (comp_cost cost)
2458 return cost.cost == INFTY;
2461 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2462 on invariants DEPENDS_ON and that the value used in expressing it
2463 is VALUE.*/
2465 static void
2466 set_use_iv_cost (struct ivopts_data *data,
2467 struct iv_use *use, struct iv_cand *cand,
2468 comp_cost cost, bitmap depends_on, tree value)
2470 unsigned i, s;
2472 if (infinite_cost_p (cost))
2474 BITMAP_FREE (depends_on);
2475 return;
2478 if (data->consider_all_candidates)
2480 use->cost_map[cand->id].cand = cand;
2481 use->cost_map[cand->id].cost = cost;
2482 use->cost_map[cand->id].depends_on = depends_on;
2483 use->cost_map[cand->id].value = value;
2484 return;
2487 /* n_map_members is a power of two, so this computes modulo. */
2488 s = cand->id & (use->n_map_members - 1);
2489 for (i = s; i < use->n_map_members; i++)
2490 if (!use->cost_map[i].cand)
2491 goto found;
2492 for (i = 0; i < s; i++)
2493 if (!use->cost_map[i].cand)
2494 goto found;
2496 gcc_unreachable ();
2498 found:
2499 use->cost_map[i].cand = cand;
2500 use->cost_map[i].cost = cost;
2501 use->cost_map[i].depends_on = depends_on;
2502 use->cost_map[i].value = value;
2505 /* Gets cost of (USE, CANDIDATE) pair. */
2507 static struct cost_pair *
2508 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2509 struct iv_cand *cand)
2511 unsigned i, s;
2512 struct cost_pair *ret;
2514 if (!cand)
2515 return NULL;
2517 if (data->consider_all_candidates)
2519 ret = use->cost_map + cand->id;
2520 if (!ret->cand)
2521 return NULL;
2523 return ret;
2526 /* n_map_members is a power of two, so this computes modulo. */
2527 s = cand->id & (use->n_map_members - 1);
2528 for (i = s; i < use->n_map_members; i++)
2529 if (use->cost_map[i].cand == cand)
2530 return use->cost_map + i;
2532 for (i = 0; i < s; i++)
2533 if (use->cost_map[i].cand == cand)
2534 return use->cost_map + i;
2536 return NULL;
2539 /* Returns estimate on cost of computing SEQ. */
2541 static unsigned
2542 seq_cost (rtx seq)
2544 unsigned cost = 0;
2545 rtx set;
2547 for (; seq; seq = NEXT_INSN (seq))
2549 set = single_set (seq);
2550 if (set)
2551 cost += rtx_cost (set, SET);
2552 else
2553 cost++;
2556 return cost;
2559 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2560 static rtx
2561 produce_memory_decl_rtl (tree obj, int *regno)
2563 rtx x;
2565 gcc_assert (obj);
2566 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2568 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2569 x = gen_rtx_SYMBOL_REF (Pmode, name);
2570 SET_SYMBOL_REF_DECL (x, obj);
2571 x = gen_rtx_MEM (DECL_MODE (obj), x);
2572 targetm.encode_section_info (obj, x, true);
2574 else
2576 x = gen_raw_REG (Pmode, (*regno)++);
2577 x = gen_rtx_MEM (DECL_MODE (obj), x);
2580 return x;
2583 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2584 walk_tree. DATA contains the actual fake register number. */
2586 static tree
2587 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2589 tree obj = NULL_TREE;
2590 rtx x = NULL_RTX;
2591 int *regno = (int *) data;
2593 switch (TREE_CODE (*expr_p))
2595 case ADDR_EXPR:
2596 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2597 handled_component_p (*expr_p);
2598 expr_p = &TREE_OPERAND (*expr_p, 0))
2599 continue;
2600 obj = *expr_p;
2601 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2602 x = produce_memory_decl_rtl (obj, regno);
2603 break;
2605 case SSA_NAME:
2606 *ws = 0;
2607 obj = SSA_NAME_VAR (*expr_p);
2608 if (!DECL_RTL_SET_P (obj))
2609 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2610 break;
2612 case VAR_DECL:
2613 case PARM_DECL:
2614 case RESULT_DECL:
2615 *ws = 0;
2616 obj = *expr_p;
2618 if (DECL_RTL_SET_P (obj))
2619 break;
2621 if (DECL_MODE (obj) == BLKmode)
2622 x = produce_memory_decl_rtl (obj, regno);
2623 else
2624 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2626 break;
2628 default:
2629 break;
2632 if (x)
2634 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2635 SET_DECL_RTL (obj, x);
2638 return NULL_TREE;
2641 /* Determines cost of the computation of EXPR. */
2643 static unsigned
2644 computation_cost (tree expr)
2646 rtx seq, rslt;
2647 tree type = TREE_TYPE (expr);
2648 unsigned cost;
2649 /* Avoid using hard regs in ways which may be unsupported. */
2650 int regno = LAST_VIRTUAL_REGISTER + 1;
2652 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2653 start_sequence ();
2654 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2655 seq = get_insns ();
2656 end_sequence ();
2658 cost = seq_cost (seq);
2659 if (MEM_P (rslt))
2660 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2662 return cost;
2665 /* Returns variable containing the value of candidate CAND at statement AT. */
2667 static tree
2668 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2670 if (stmt_after_increment (loop, cand, stmt))
2671 return cand->var_after;
2672 else
2673 return cand->var_before;
2676 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2677 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2680 tree_int_cst_sign_bit (const_tree t)
2682 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2683 unsigned HOST_WIDE_INT w;
2685 if (bitno < HOST_BITS_PER_WIDE_INT)
2686 w = TREE_INT_CST_LOW (t);
2687 else
2689 w = TREE_INT_CST_HIGH (t);
2690 bitno -= HOST_BITS_PER_WIDE_INT;
2693 return (w >> bitno) & 1;
2696 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2697 same precision that is at least as wide as the precision of TYPE, stores
2698 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2699 type of A and B. */
2701 static tree
2702 determine_common_wider_type (tree *a, tree *b)
2704 tree wider_type = NULL;
2705 tree suba, subb;
2706 tree atype = TREE_TYPE (*a);
2708 if (CONVERT_EXPR_P (*a))
2710 suba = TREE_OPERAND (*a, 0);
2711 wider_type = TREE_TYPE (suba);
2712 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2713 return atype;
2715 else
2716 return atype;
2718 if (CONVERT_EXPR_P (*b))
2720 subb = TREE_OPERAND (*b, 0);
2721 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2722 return atype;
2724 else
2725 return atype;
2727 *a = suba;
2728 *b = subb;
2729 return wider_type;
2732 /* Determines the expression by that USE is expressed from induction variable
2733 CAND at statement AT in LOOP. The expression is stored in a decomposed
2734 form into AFF. Returns false if USE cannot be expressed using CAND. */
2736 static bool
2737 get_computation_aff (struct loop *loop,
2738 struct iv_use *use, struct iv_cand *cand, gimple at,
2739 struct affine_tree_combination *aff)
2741 tree ubase = use->iv->base;
2742 tree ustep = use->iv->step;
2743 tree cbase = cand->iv->base;
2744 tree cstep = cand->iv->step, cstep_common;
2745 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2746 tree common_type, var;
2747 tree uutype;
2748 aff_tree cbase_aff, var_aff;
2749 double_int rat;
2751 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2753 /* We do not have a precision to express the values of use. */
2754 return false;
2757 var = var_at_stmt (loop, cand, at);
2758 uutype = unsigned_type_for (utype);
2760 /* If the conversion is not noop, perform it. */
2761 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2763 cstep = fold_convert (uutype, cstep);
2764 cbase = fold_convert (uutype, cbase);
2765 var = fold_convert (uutype, var);
2768 if (!constant_multiple_of (ustep, cstep, &rat))
2769 return false;
2771 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2772 type, we achieve better folding by computing their difference in this
2773 wider type, and cast the result to UUTYPE. We do not need to worry about
2774 overflows, as all the arithmetics will in the end be performed in UUTYPE
2775 anyway. */
2776 common_type = determine_common_wider_type (&ubase, &cbase);
2778 /* use = ubase - ratio * cbase + ratio * var. */
2779 tree_to_aff_combination (ubase, common_type, aff);
2780 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2781 tree_to_aff_combination (var, uutype, &var_aff);
2783 /* We need to shift the value if we are after the increment. */
2784 if (stmt_after_increment (loop, cand, at))
2786 aff_tree cstep_aff;
2788 if (common_type != uutype)
2789 cstep_common = fold_convert (common_type, cstep);
2790 else
2791 cstep_common = cstep;
2793 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2794 aff_combination_add (&cbase_aff, &cstep_aff);
2797 aff_combination_scale (&cbase_aff, double_int_neg (rat));
2798 aff_combination_add (aff, &cbase_aff);
2799 if (common_type != uutype)
2800 aff_combination_convert (aff, uutype);
2802 aff_combination_scale (&var_aff, rat);
2803 aff_combination_add (aff, &var_aff);
2805 return true;
2808 /* Determines the expression by that USE is expressed from induction variable
2809 CAND at statement AT in LOOP. The computation is unshared. */
2811 static tree
2812 get_computation_at (struct loop *loop,
2813 struct iv_use *use, struct iv_cand *cand, gimple at)
2815 aff_tree aff;
2816 tree type = TREE_TYPE (use->iv->base);
2818 if (!get_computation_aff (loop, use, cand, at, &aff))
2819 return NULL_TREE;
2820 unshare_aff_combination (&aff);
2821 return fold_convert (type, aff_combination_to_tree (&aff));
2824 /* Determines the expression by that USE is expressed from induction variable
2825 CAND in LOOP. The computation is unshared. */
2827 static tree
2828 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2830 return get_computation_at (loop, use, cand, use->stmt);
2833 /* Returns cost of addition in MODE. */
2835 static unsigned
2836 add_cost (enum machine_mode mode)
2838 static unsigned costs[NUM_MACHINE_MODES];
2839 rtx seq;
2840 unsigned cost;
2842 if (costs[mode])
2843 return costs[mode];
2845 start_sequence ();
2846 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2847 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2848 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2849 NULL_RTX);
2850 seq = get_insns ();
2851 end_sequence ();
2853 cost = seq_cost (seq);
2854 if (!cost)
2855 cost = 1;
2857 costs[mode] = cost;
2859 if (dump_file && (dump_flags & TDF_DETAILS))
2860 fprintf (dump_file, "Addition in %s costs %d\n",
2861 GET_MODE_NAME (mode), cost);
2862 return cost;
2865 /* Entry in a hashtable of already known costs for multiplication. */
2866 struct mbc_entry
2868 HOST_WIDE_INT cst; /* The constant to multiply by. */
2869 enum machine_mode mode; /* In mode. */
2870 unsigned cost; /* The cost. */
2873 /* Counts hash value for the ENTRY. */
2875 static hashval_t
2876 mbc_entry_hash (const void *entry)
2878 const struct mbc_entry *e = (const struct mbc_entry *) entry;
2880 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2883 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2885 static int
2886 mbc_entry_eq (const void *entry1, const void *entry2)
2888 const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
2889 const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
2891 return (e1->mode == e2->mode
2892 && e1->cst == e2->cst);
2895 /* Returns cost of multiplication by constant CST in MODE. */
2897 unsigned
2898 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2900 static htab_t costs;
2901 struct mbc_entry **cached, act;
2902 rtx seq;
2903 unsigned cost;
2905 if (!costs)
2906 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2908 act.mode = mode;
2909 act.cst = cst;
2910 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2911 if (*cached)
2912 return (*cached)->cost;
2914 *cached = XNEW (struct mbc_entry);
2915 (*cached)->mode = mode;
2916 (*cached)->cst = cst;
2918 start_sequence ();
2919 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2920 gen_int_mode (cst, mode), NULL_RTX, 0);
2921 seq = get_insns ();
2922 end_sequence ();
2924 cost = seq_cost (seq);
2926 if (dump_file && (dump_flags & TDF_DETAILS))
2927 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2928 (int) cst, GET_MODE_NAME (mode), cost);
2930 (*cached)->cost = cost;
2932 return cost;
2935 /* Returns true if multiplying by RATIO is allowed in an address. Test the
2936 validity for a memory reference accessing memory of mode MODE. */
2938 bool
2939 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
2941 #define MAX_RATIO 128
2942 static sbitmap valid_mult[MAX_MACHINE_MODE];
2944 if (!valid_mult[mode])
2946 rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2947 rtx addr;
2948 HOST_WIDE_INT i;
2950 valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
2951 sbitmap_zero (valid_mult[mode]);
2952 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2953 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2955 XEXP (addr, 1) = gen_int_mode (i, Pmode);
2956 if (memory_address_p (mode, addr))
2957 SET_BIT (valid_mult[mode], i + MAX_RATIO);
2960 if (dump_file && (dump_flags & TDF_DETAILS))
2962 fprintf (dump_file, " allowed multipliers:");
2963 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2964 if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
2965 fprintf (dump_file, " %d", (int) i);
2966 fprintf (dump_file, "\n");
2967 fprintf (dump_file, "\n");
2971 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
2972 return false;
2974 return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
2977 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2978 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2979 variable is omitted. Compute the cost for a memory reference that accesses
2980 a memory location of mode MEM_MODE.
2982 TODO -- there must be some better way. This all is quite crude. */
2984 static comp_cost
2985 get_address_cost (bool symbol_present, bool var_present,
2986 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
2987 enum machine_mode mem_mode)
2989 static bool initialized[MAX_MACHINE_MODE];
2990 static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
2991 static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
2992 static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
2993 unsigned cost, acost, complexity;
2994 bool offset_p, ratio_p;
2995 HOST_WIDE_INT s_offset;
2996 unsigned HOST_WIDE_INT mask;
2997 unsigned bits;
2999 if (!initialized[mem_mode])
3001 HOST_WIDE_INT i;
3002 HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
3003 int old_cse_not_expected;
3004 unsigned sym_p, var_p, off_p, rat_p, add_c;
3005 rtx seq, addr, base;
3006 rtx reg0, reg1;
3008 initialized[mem_mode] = true;
3010 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3012 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3013 for (i = start; i <= 1 << 20; i <<= 1)
3015 XEXP (addr, 1) = gen_int_mode (i, Pmode);
3016 if (!memory_address_p (mem_mode, addr))
3017 break;
3019 max_offset[mem_mode] = i == start ? 0 : i >> 1;
3020 off[mem_mode] = max_offset[mem_mode];
3022 for (i = start; i <= 1 << 20; i <<= 1)
3024 XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3025 if (!memory_address_p (mem_mode, addr))
3026 break;
3028 min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
3030 if (dump_file && (dump_flags & TDF_DETAILS))
3032 fprintf (dump_file, "get_address_cost:\n");
3033 fprintf (dump_file, " min offset %s %d\n",
3034 GET_MODE_NAME (mem_mode),
3035 (int) min_offset[mem_mode]);
3036 fprintf (dump_file, " max offset %s %d\n",
3037 GET_MODE_NAME (mem_mode),
3038 (int) max_offset[mem_mode]);
3041 rat[mem_mode] = 1;
3042 for (i = 2; i <= MAX_RATIO; i++)
3043 if (multiplier_allowed_in_address_p (i, mem_mode))
3045 rat[mem_mode] = i;
3046 break;
3049 /* Compute the cost of various addressing modes. */
3050 acost = 0;
3051 reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3052 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3054 for (i = 0; i < 16; i++)
3056 sym_p = i & 1;
3057 var_p = (i >> 1) & 1;
3058 off_p = (i >> 2) & 1;
3059 rat_p = (i >> 3) & 1;
3061 addr = reg0;
3062 if (rat_p)
3063 addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
3064 gen_int_mode (rat[mem_mode], Pmode));
3066 if (var_p)
3067 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3069 if (sym_p)
3071 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3072 /* ??? We can run into trouble with some backends by presenting
3073 it with symbols which haven't been properly passed through
3074 targetm.encode_section_info. By setting the local bit, we
3075 enhance the probability of things working. */
3076 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3078 if (off_p)
3079 base = gen_rtx_fmt_e (CONST, Pmode,
3080 gen_rtx_fmt_ee (PLUS, Pmode,
3081 base,
3082 gen_int_mode (off[mem_mode],
3083 Pmode)));
3085 else if (off_p)
3086 base = gen_int_mode (off[mem_mode], Pmode);
3087 else
3088 base = NULL_RTX;
3090 if (base)
3091 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3093 start_sequence ();
3094 /* To avoid splitting addressing modes, pretend that no cse will
3095 follow. */
3096 old_cse_not_expected = cse_not_expected;
3097 cse_not_expected = true;
3098 addr = memory_address (mem_mode, addr);
3099 cse_not_expected = old_cse_not_expected;
3100 seq = get_insns ();
3101 end_sequence ();
3103 acost = seq_cost (seq);
3104 acost += address_cost (addr, mem_mode);
3106 if (!acost)
3107 acost = 1;
3108 costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
3111 /* On some targets, it is quite expensive to load symbol to a register,
3112 which makes addresses that contain symbols look much more expensive.
3113 However, the symbol will have to be loaded in any case before the
3114 loop (and quite likely we have it in register already), so it does not
3115 make much sense to penalize them too heavily. So make some final
3116 tweaks for the SYMBOL_PRESENT modes:
3118 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3119 var is cheaper, use this mode with small penalty.
3120 If VAR_PRESENT is true, try whether the mode with
3121 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3122 if this is the case, use it. */
3123 add_c = add_cost (Pmode);
3124 for (i = 0; i < 8; i++)
3126 var_p = i & 1;
3127 off_p = (i >> 1) & 1;
3128 rat_p = (i >> 2) & 1;
3130 acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
3131 if (var_p)
3132 acost += add_c;
3134 if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3135 costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3138 if (dump_file && (dump_flags & TDF_DETAILS))
3140 fprintf (dump_file, "Address costs:\n");
3142 for (i = 0; i < 16; i++)
3144 sym_p = i & 1;
3145 var_p = (i >> 1) & 1;
3146 off_p = (i >> 2) & 1;
3147 rat_p = (i >> 3) & 1;
3149 fprintf (dump_file, " ");
3150 if (sym_p)
3151 fprintf (dump_file, "sym + ");
3152 if (var_p)
3153 fprintf (dump_file, "var + ");
3154 if (off_p)
3155 fprintf (dump_file, "cst + ");
3156 if (rat_p)
3157 fprintf (dump_file, "rat * ");
3159 acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3160 fprintf (dump_file, "index costs %d\n", acost);
3162 fprintf (dump_file, "\n");
3166 bits = GET_MODE_BITSIZE (Pmode);
3167 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3168 offset &= mask;
3169 if ((offset >> (bits - 1) & 1))
3170 offset |= ~mask;
3171 s_offset = offset;
3173 cost = 0;
3174 offset_p = (s_offset != 0
3175 && min_offset[mem_mode] <= s_offset
3176 && s_offset <= max_offset[mem_mode]);
3177 ratio_p = (ratio != 1
3178 && multiplier_allowed_in_address_p (ratio, mem_mode));
3180 if (ratio != 1 && !ratio_p)
3181 cost += multiply_by_cost (ratio, Pmode);
3183 if (s_offset && !offset_p && !symbol_present)
3184 cost += add_cost (Pmode);
3186 acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3187 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3188 return new_cost (cost + acost, complexity);
3191 /* Estimates cost of forcing expression EXPR into a variable. */
3193 static comp_cost
3194 force_expr_to_var_cost (tree expr)
3196 static bool costs_initialized = false;
3197 static unsigned integer_cost;
3198 static unsigned symbol_cost;
3199 static unsigned address_cost;
3200 tree op0, op1;
3201 comp_cost cost0, cost1, cost;
3202 enum machine_mode mode;
3204 if (!costs_initialized)
3206 tree type = build_pointer_type (integer_type_node);
3207 tree var, addr;
3208 rtx x;
3210 var = create_tmp_var_raw (integer_type_node, "test_var");
3211 TREE_STATIC (var) = 1;
3212 x = produce_memory_decl_rtl (var, NULL);
3213 SET_DECL_RTL (var, x);
3215 integer_cost = computation_cost (build_int_cst (integer_type_node,
3216 2000));
3218 addr = build1 (ADDR_EXPR, type, var);
3219 symbol_cost = computation_cost (addr) + 1;
3221 address_cost
3222 = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3223 addr,
3224 build_int_cst (sizetype, 2000))) + 1;
3225 if (dump_file && (dump_flags & TDF_DETAILS))
3227 fprintf (dump_file, "force_expr_to_var_cost:\n");
3228 fprintf (dump_file, " integer %d\n", (int) integer_cost);
3229 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
3230 fprintf (dump_file, " address %d\n", (int) address_cost);
3231 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
3232 fprintf (dump_file, "\n");
3235 costs_initialized = true;
3238 STRIP_NOPS (expr);
3240 if (SSA_VAR_P (expr))
3241 return zero_cost;
3243 if (is_gimple_min_invariant (expr))
3245 if (TREE_CODE (expr) == INTEGER_CST)
3246 return new_cost (integer_cost, 0);
3248 if (TREE_CODE (expr) == ADDR_EXPR)
3250 tree obj = TREE_OPERAND (expr, 0);
3252 if (TREE_CODE (obj) == VAR_DECL
3253 || TREE_CODE (obj) == PARM_DECL
3254 || TREE_CODE (obj) == RESULT_DECL)
3255 return new_cost (symbol_cost, 0);
3258 return new_cost (address_cost, 0);
3261 switch (TREE_CODE (expr))
3263 case POINTER_PLUS_EXPR:
3264 case PLUS_EXPR:
3265 case MINUS_EXPR:
3266 case MULT_EXPR:
3267 op0 = TREE_OPERAND (expr, 0);
3268 op1 = TREE_OPERAND (expr, 1);
3269 STRIP_NOPS (op0);
3270 STRIP_NOPS (op1);
3272 if (is_gimple_val (op0))
3273 cost0 = zero_cost;
3274 else
3275 cost0 = force_expr_to_var_cost (op0);
3277 if (is_gimple_val (op1))
3278 cost1 = zero_cost;
3279 else
3280 cost1 = force_expr_to_var_cost (op1);
3282 break;
3284 default:
3285 /* Just an arbitrary value, FIXME. */
3286 return new_cost (target_spill_cost, 0);
3289 mode = TYPE_MODE (TREE_TYPE (expr));
3290 switch (TREE_CODE (expr))
3292 case POINTER_PLUS_EXPR:
3293 case PLUS_EXPR:
3294 case MINUS_EXPR:
3295 cost = new_cost (add_cost (mode), 0);
3296 break;
3298 case MULT_EXPR:
3299 if (cst_and_fits_in_hwi (op0))
3300 cost = new_cost (multiply_by_cost (int_cst_value (op0), mode), 0);
3301 else if (cst_and_fits_in_hwi (op1))
3302 cost = new_cost (multiply_by_cost (int_cst_value (op1), mode), 0);
3303 else
3304 return new_cost (target_spill_cost, 0);
3305 break;
3307 default:
3308 gcc_unreachable ();
3311 cost = add_costs (cost, cost0);
3312 cost = add_costs (cost, cost1);
3314 /* Bound the cost by target_spill_cost. The parts of complicated
3315 computations often are either loop invariant or at least can
3316 be shared between several iv uses, so letting this grow without
3317 limits would not give reasonable results. */
3318 if (cost.cost > target_spill_cost)
3319 cost.cost = target_spill_cost;
3321 return cost;
3324 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3325 invariants the computation depends on. */
3327 static comp_cost
3328 force_var_cost (struct ivopts_data *data,
3329 tree expr, bitmap *depends_on)
3331 if (depends_on)
3333 fd_ivopts_data = data;
3334 walk_tree (&expr, find_depends, depends_on, NULL);
3337 return force_expr_to_var_cost (expr);
3340 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3341 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3342 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3343 invariants the computation depends on. */
3345 static comp_cost
3346 split_address_cost (struct ivopts_data *data,
3347 tree addr, bool *symbol_present, bool *var_present,
3348 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3350 tree core;
3351 HOST_WIDE_INT bitsize;
3352 HOST_WIDE_INT bitpos;
3353 tree toffset;
3354 enum machine_mode mode;
3355 int unsignedp, volatilep;
3357 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3358 &unsignedp, &volatilep, false);
3360 if (toffset != 0
3361 || bitpos % BITS_PER_UNIT != 0
3362 || TREE_CODE (core) != VAR_DECL)
3364 *symbol_present = false;
3365 *var_present = true;
3366 fd_ivopts_data = data;
3367 walk_tree (&addr, find_depends, depends_on, NULL);
3368 return new_cost (target_spill_cost, 0);
3371 *offset += bitpos / BITS_PER_UNIT;
3372 if (TREE_STATIC (core)
3373 || DECL_EXTERNAL (core))
3375 *symbol_present = true;
3376 *var_present = false;
3377 return zero_cost;
3380 *symbol_present = false;
3381 *var_present = true;
3382 return zero_cost;
3385 /* Estimates cost of expressing difference of addresses E1 - E2 as
3386 var + symbol + offset. The value of offset is added to OFFSET,
3387 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3388 part is missing. DEPENDS_ON is a set of the invariants the computation
3389 depends on. */
3391 static comp_cost
3392 ptr_difference_cost (struct ivopts_data *data,
3393 tree e1, tree e2, bool *symbol_present, bool *var_present,
3394 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3396 HOST_WIDE_INT diff = 0;
3397 comp_cost cost;
3399 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3401 if (ptr_difference_const (e1, e2, &diff))
3403 *offset += diff;
3404 *symbol_present = false;
3405 *var_present = false;
3406 return zero_cost;
3409 if (integer_zerop (e2))
3410 return split_address_cost (data, TREE_OPERAND (e1, 0),
3411 symbol_present, var_present, offset, depends_on);
3413 *symbol_present = false;
3414 *var_present = true;
3416 cost = force_var_cost (data, e1, depends_on);
3417 cost = add_costs (cost, force_var_cost (data, e2, depends_on));
3418 cost.cost += add_cost (Pmode);
3420 return cost;
3423 /* Estimates cost of expressing difference E1 - E2 as
3424 var + symbol + offset. The value of offset is added to OFFSET,
3425 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3426 part is missing. DEPENDS_ON is a set of the invariants the computation
3427 depends on. */
3429 static comp_cost
3430 difference_cost (struct ivopts_data *data,
3431 tree e1, tree e2, bool *symbol_present, bool *var_present,
3432 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3434 comp_cost cost;
3435 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3436 unsigned HOST_WIDE_INT off1, off2;
3438 e1 = strip_offset (e1, &off1);
3439 e2 = strip_offset (e2, &off2);
3440 *offset += off1 - off2;
3442 STRIP_NOPS (e1);
3443 STRIP_NOPS (e2);
3445 if (TREE_CODE (e1) == ADDR_EXPR)
3446 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3447 depends_on);
3448 *symbol_present = false;
3450 if (operand_equal_p (e1, e2, 0))
3452 *var_present = false;
3453 return zero_cost;
3455 *var_present = true;
3456 if (integer_zerop (e2))
3457 return force_var_cost (data, e1, depends_on);
3459 if (integer_zerop (e1))
3461 cost = force_var_cost (data, e2, depends_on);
3462 cost.cost += multiply_by_cost (-1, mode);
3464 return cost;
3467 cost = force_var_cost (data, e1, depends_on);
3468 cost = add_costs (cost, force_var_cost (data, e2, depends_on));
3469 cost.cost += add_cost (mode);
3471 return cost;
3474 /* Determines the cost of the computation by that USE is expressed
3475 from induction variable CAND. If ADDRESS_P is true, we just need
3476 to create an address from it, otherwise we want to get it into
3477 register. A set of invariants we depend on is stored in
3478 DEPENDS_ON. AT is the statement at that the value is computed. */
3480 static comp_cost
3481 get_computation_cost_at (struct ivopts_data *data,
3482 struct iv_use *use, struct iv_cand *cand,
3483 bool address_p, bitmap *depends_on, gimple at)
3485 tree ubase = use->iv->base, ustep = use->iv->step;
3486 tree cbase, cstep;
3487 tree utype = TREE_TYPE (ubase), ctype;
3488 unsigned HOST_WIDE_INT cstepi, offset = 0;
3489 HOST_WIDE_INT ratio, aratio;
3490 bool var_present, symbol_present;
3491 comp_cost cost;
3492 unsigned n_sums;
3493 double_int rat;
3495 *depends_on = NULL;
3497 /* Only consider real candidates. */
3498 if (!cand->iv)
3499 return infinite_cost;
3501 cbase = cand->iv->base;
3502 cstep = cand->iv->step;
3503 ctype = TREE_TYPE (cbase);
3505 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3507 /* We do not have a precision to express the values of use. */
3508 return infinite_cost;
3511 if (address_p)
3513 /* Do not try to express address of an object with computation based
3514 on address of a different object. This may cause problems in rtl
3515 level alias analysis (that does not expect this to be happening,
3516 as this is illegal in C), and would be unlikely to be useful
3517 anyway. */
3518 if (use->iv->base_object
3519 && cand->iv->base_object
3520 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3521 return infinite_cost;
3524 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3526 /* TODO -- add direct handling of this case. */
3527 goto fallback;
3530 /* CSTEPI is removed from the offset in case statement is after the
3531 increment. If the step is not constant, we use zero instead.
3532 This is a bit imprecise (there is the extra addition), but
3533 redundancy elimination is likely to transform the code so that
3534 it uses value of the variable before increment anyway,
3535 so it is not that much unrealistic. */
3536 if (cst_and_fits_in_hwi (cstep))
3537 cstepi = int_cst_value (cstep);
3538 else
3539 cstepi = 0;
3541 if (!constant_multiple_of (ustep, cstep, &rat))
3542 return infinite_cost;
3544 if (double_int_fits_in_shwi_p (rat))
3545 ratio = double_int_to_shwi (rat);
3546 else
3547 return infinite_cost;
3549 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3550 or ratio == 1, it is better to handle this like
3552 ubase - ratio * cbase + ratio * var
3554 (also holds in the case ratio == -1, TODO. */
3556 if (cst_and_fits_in_hwi (cbase))
3558 offset = - ratio * int_cst_value (cbase);
3559 cost = difference_cost (data,
3560 ubase, build_int_cst (utype, 0),
3561 &symbol_present, &var_present, &offset,
3562 depends_on);
3564 else if (ratio == 1)
3566 cost = difference_cost (data,
3567 ubase, cbase,
3568 &symbol_present, &var_present, &offset,
3569 depends_on);
3571 else
3573 cost = force_var_cost (data, cbase, depends_on);
3574 cost.cost += add_cost (TYPE_MODE (ctype));
3575 cost = add_costs (cost,
3576 difference_cost (data,
3577 ubase, build_int_cst (utype, 0),
3578 &symbol_present, &var_present,
3579 &offset, depends_on));
3582 /* If we are after the increment, the value of the candidate is higher by
3583 one iteration. */
3584 if (stmt_after_increment (data->current_loop, cand, at))
3585 offset -= ratio * cstepi;
3587 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3588 (symbol/var/const parts may be omitted). If we are looking for an address,
3589 find the cost of addressing this. */
3590 if (address_p)
3591 return add_costs (cost, get_address_cost (symbol_present, var_present,
3592 offset, ratio,
3593 TYPE_MODE (TREE_TYPE (*use->op_p))));
3595 /* Otherwise estimate the costs for computing the expression. */
3596 aratio = ratio > 0 ? ratio : -ratio;
3597 if (!symbol_present && !var_present && !offset)
3599 if (ratio != 1)
3600 cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3602 return cost;
3605 if (aratio != 1)
3606 cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3608 n_sums = 1;
3609 if (var_present
3610 /* Symbol + offset should be compile-time computable. */
3611 && (symbol_present || offset))
3612 n_sums++;
3614 /* Having offset does not affect runtime cost in case it is added to
3615 symbol, but it increases complexity. */
3616 if (offset)
3617 cost.complexity++;
3619 cost.cost += n_sums * add_cost (TYPE_MODE (ctype));
3620 return cost;
3622 fallback:
3624 /* Just get the expression, expand it and measure the cost. */
3625 tree comp = get_computation_at (data->current_loop, use, cand, at);
3627 if (!comp)
3628 return infinite_cost;
3630 if (address_p)
3631 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3633 return new_cost (computation_cost (comp), 0);
3637 /* Determines the cost of the computation by that USE is expressed
3638 from induction variable CAND. If ADDRESS_P is true, we just need
3639 to create an address from it, otherwise we want to get it into
3640 register. A set of invariants we depend on is stored in
3641 DEPENDS_ON. */
3643 static comp_cost
3644 get_computation_cost (struct ivopts_data *data,
3645 struct iv_use *use, struct iv_cand *cand,
3646 bool address_p, bitmap *depends_on)
3648 return get_computation_cost_at (data,
3649 use, cand, address_p, depends_on, use->stmt);
3652 /* Determines cost of basing replacement of USE on CAND in a generic
3653 expression. */
3655 static bool
3656 determine_use_iv_cost_generic (struct ivopts_data *data,
3657 struct iv_use *use, struct iv_cand *cand)
3659 bitmap depends_on;
3660 comp_cost cost;
3662 /* The simple case first -- if we need to express value of the preserved
3663 original biv, the cost is 0. This also prevents us from counting the
3664 cost of increment twice -- once at this use and once in the cost of
3665 the candidate. */
3666 if (cand->pos == IP_ORIGINAL
3667 && cand->incremented_at == use->stmt)
3669 set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE);
3670 return true;
3673 cost = get_computation_cost (data, use, cand, false, &depends_on);
3674 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3676 return !infinite_cost_p (cost);
3679 /* Determines cost of basing replacement of USE on CAND in an address. */
3681 static bool
3682 determine_use_iv_cost_address (struct ivopts_data *data,
3683 struct iv_use *use, struct iv_cand *cand)
3685 bitmap depends_on;
3686 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on);
3688 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3690 return !infinite_cost_p (cost);
3693 /* Computes value of candidate CAND at position AT in iteration NITER, and
3694 stores it to VAL. */
3696 static void
3697 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
3698 aff_tree *val)
3700 aff_tree step, delta, nit;
3701 struct iv *iv = cand->iv;
3702 tree type = TREE_TYPE (iv->base);
3703 tree steptype = type;
3704 if (POINTER_TYPE_P (type))
3705 steptype = sizetype;
3707 tree_to_aff_combination (iv->step, steptype, &step);
3708 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3709 aff_combination_convert (&nit, steptype);
3710 aff_combination_mult (&nit, &step, &delta);
3711 if (stmt_after_increment (loop, cand, at))
3712 aff_combination_add (&delta, &step);
3714 tree_to_aff_combination (iv->base, type, val);
3715 aff_combination_add (val, &delta);
3718 /* Returns period of induction variable iv. */
3720 static tree
3721 iv_period (struct iv *iv)
3723 tree step = iv->step, period, type;
3724 tree pow2div;
3726 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3728 /* Period of the iv is gcd (step, type range). Since type range is power
3729 of two, it suffices to determine the maximum power of two that divides
3730 step. */
3731 pow2div = num_ending_zeros (step);
3732 type = unsigned_type_for (TREE_TYPE (step));
3734 period = build_low_bits_mask (type,
3735 (TYPE_PRECISION (type)
3736 - tree_low_cst (pow2div, 1)));
3738 return period;
3741 /* Returns the comparison operator used when eliminating the iv USE. */
3743 static enum tree_code
3744 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3746 struct loop *loop = data->current_loop;
3747 basic_block ex_bb;
3748 edge exit;
3750 ex_bb = gimple_bb (use->stmt);
3751 exit = EDGE_SUCC (ex_bb, 0);
3752 if (flow_bb_inside_loop_p (loop, exit->dest))
3753 exit = EDGE_SUCC (ex_bb, 1);
3755 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3758 /* Check whether it is possible to express the condition in USE by comparison
3759 of candidate CAND. If so, store the value compared with to BOUND. */
3761 static bool
3762 may_eliminate_iv (struct ivopts_data *data,
3763 struct iv_use *use, struct iv_cand *cand, tree *bound)
3765 basic_block ex_bb;
3766 edge exit;
3767 tree nit, period;
3768 struct loop *loop = data->current_loop;
3769 aff_tree bnd;
3771 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3772 return false;
3774 /* For now works only for exits that dominate the loop latch.
3775 TODO: extend to other conditions inside loop body. */
3776 ex_bb = gimple_bb (use->stmt);
3777 if (use->stmt != last_stmt (ex_bb)
3778 || gimple_code (use->stmt) != GIMPLE_COND
3779 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3780 return false;
3782 exit = EDGE_SUCC (ex_bb, 0);
3783 if (flow_bb_inside_loop_p (loop, exit->dest))
3784 exit = EDGE_SUCC (ex_bb, 1);
3785 if (flow_bb_inside_loop_p (loop, exit->dest))
3786 return false;
3788 nit = niter_for_exit (data, exit);
3789 if (!nit)
3790 return false;
3792 /* Determine whether we can use the variable to test the exit condition.
3793 This is the case iff the period of the induction variable is greater
3794 than the number of iterations for which the exit condition is true. */
3795 period = iv_period (cand->iv);
3797 /* If the number of iterations is constant, compare against it directly. */
3798 if (TREE_CODE (nit) == INTEGER_CST)
3800 if (!tree_int_cst_lt (nit, period))
3801 return false;
3804 /* If not, and if this is the only possible exit of the loop, see whether
3805 we can get a conservative estimate on the number of iterations of the
3806 entire loop and compare against that instead. */
3807 else if (loop_only_exit_p (loop, exit))
3809 double_int period_value, max_niter;
3810 if (!estimated_loop_iterations (loop, true, &max_niter))
3811 return false;
3812 period_value = tree_to_double_int (period);
3813 if (double_int_ucmp (max_niter, period_value) >= 0)
3814 return false;
3817 /* Otherwise, punt. */
3818 else
3819 return false;
3821 cand_value_at (loop, cand, use->stmt, nit, &bnd);
3822 *bound = aff_combination_to_tree (&bnd);
3823 return true;
3826 /* Determines cost of basing replacement of USE on CAND in a condition. */
3828 static bool
3829 determine_use_iv_cost_condition (struct ivopts_data *data,
3830 struct iv_use *use, struct iv_cand *cand)
3832 tree bound = NULL_TREE;
3833 struct iv *cmp_iv;
3834 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
3835 comp_cost elim_cost, express_cost, cost;
3836 bool ok;
3838 /* Only consider real candidates. */
3839 if (!cand->iv)
3841 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE);
3842 return false;
3845 /* Try iv elimination. */
3846 if (may_eliminate_iv (data, use, cand, &bound))
3848 elim_cost = force_var_cost (data, bound, &depends_on_elim);
3849 /* The bound is a loop invariant, so it will be only computed
3850 once. */
3851 elim_cost.cost /= AVG_LOOP_NITER (data->current_loop);
3853 else
3854 elim_cost = infinite_cost;
3856 /* Try expressing the original giv. If it is compared with an invariant,
3857 note that we cannot get rid of it. */
3858 ok = extract_cond_operands (data, use->stmt, NULL, NULL, NULL, &cmp_iv);
3859 gcc_assert (ok);
3861 express_cost = get_computation_cost (data, use, cand, false,
3862 &depends_on_express);
3863 fd_ivopts_data = data;
3864 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
3866 /* Choose the better approach. */
3867 if (compare_costs (elim_cost, express_cost) < 0)
3869 cost = elim_cost;
3870 depends_on = depends_on_elim;
3871 depends_on_elim = NULL;
3873 else
3875 cost = express_cost;
3876 depends_on = depends_on_express;
3877 depends_on_express = NULL;
3878 bound = NULL_TREE;
3881 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
3883 if (depends_on_elim)
3884 BITMAP_FREE (depends_on_elim);
3885 if (depends_on_express)
3886 BITMAP_FREE (depends_on_express);
3888 return !infinite_cost_p (cost);
3891 /* Determines cost of basing replacement of USE on CAND. Returns false
3892 if USE cannot be based on CAND. */
3894 static bool
3895 determine_use_iv_cost (struct ivopts_data *data,
3896 struct iv_use *use, struct iv_cand *cand)
3898 switch (use->type)
3900 case USE_NONLINEAR_EXPR:
3901 return determine_use_iv_cost_generic (data, use, cand);
3903 case USE_ADDRESS:
3904 return determine_use_iv_cost_address (data, use, cand);
3906 case USE_COMPARE:
3907 return determine_use_iv_cost_condition (data, use, cand);
3909 default:
3910 gcc_unreachable ();
3914 /* Determines costs of basing the use of the iv on an iv candidate. */
3916 static void
3917 determine_use_iv_costs (struct ivopts_data *data)
3919 unsigned i, j;
3920 struct iv_use *use;
3921 struct iv_cand *cand;
3922 bitmap to_clear = BITMAP_ALLOC (NULL);
3924 alloc_use_cost_map (data);
3926 for (i = 0; i < n_iv_uses (data); i++)
3928 use = iv_use (data, i);
3930 if (data->consider_all_candidates)
3932 for (j = 0; j < n_iv_cands (data); j++)
3934 cand = iv_cand (data, j);
3935 determine_use_iv_cost (data, use, cand);
3938 else
3940 bitmap_iterator bi;
3942 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3944 cand = iv_cand (data, j);
3945 if (!determine_use_iv_cost (data, use, cand))
3946 bitmap_set_bit (to_clear, j);
3949 /* Remove the candidates for that the cost is infinite from
3950 the list of related candidates. */
3951 bitmap_and_compl_into (use->related_cands, to_clear);
3952 bitmap_clear (to_clear);
3956 BITMAP_FREE (to_clear);
3958 if (dump_file && (dump_flags & TDF_DETAILS))
3960 fprintf (dump_file, "Use-candidate costs:\n");
3962 for (i = 0; i < n_iv_uses (data); i++)
3964 use = iv_use (data, i);
3966 fprintf (dump_file, "Use %d:\n", i);
3967 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
3968 for (j = 0; j < use->n_map_members; j++)
3970 if (!use->cost_map[j].cand
3971 || infinite_cost_p (use->cost_map[j].cost))
3972 continue;
3974 fprintf (dump_file, " %d\t%d\t%d\t",
3975 use->cost_map[j].cand->id,
3976 use->cost_map[j].cost.cost,
3977 use->cost_map[j].cost.complexity);
3978 if (use->cost_map[j].depends_on)
3979 bitmap_print (dump_file,
3980 use->cost_map[j].depends_on, "","");
3981 fprintf (dump_file, "\n");
3984 fprintf (dump_file, "\n");
3986 fprintf (dump_file, "\n");
3990 /* Determines cost of the candidate CAND. */
3992 static void
3993 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3995 comp_cost cost_base;
3996 unsigned cost, cost_step;
3997 tree base;
3999 if (!cand->iv)
4001 cand->cost = 0;
4002 return;
4005 /* There are two costs associated with the candidate -- its increment
4006 and its initialization. The second is almost negligible for any loop
4007 that rolls enough, so we take it just very little into account. */
4009 base = cand->iv->base;
4010 cost_base = force_var_cost (data, base, NULL);
4011 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
4013 cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop);
4015 /* Prefer the original ivs unless we may gain something by replacing it.
4016 The reason is to make debugging simpler; so this is not relevant for
4017 artificial ivs created by other optimization passes. */
4018 if (cand->pos != IP_ORIGINAL
4019 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4020 cost++;
4022 /* Prefer not to insert statements into latch unless there are some
4023 already (so that we do not create unnecessary jumps). */
4024 if (cand->pos == IP_END
4025 && empty_block_p (ip_end_pos (data->current_loop)))
4026 cost++;
4028 cand->cost = cost;
4031 /* Determines costs of computation of the candidates. */
4033 static void
4034 determine_iv_costs (struct ivopts_data *data)
4036 unsigned i;
4038 if (dump_file && (dump_flags & TDF_DETAILS))
4040 fprintf (dump_file, "Candidate costs:\n");
4041 fprintf (dump_file, " cand\tcost\n");
4044 for (i = 0; i < n_iv_cands (data); i++)
4046 struct iv_cand *cand = iv_cand (data, i);
4048 determine_iv_cost (data, cand);
4050 if (dump_file && (dump_flags & TDF_DETAILS))
4051 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4054 if (dump_file && (dump_flags & TDF_DETAILS))
4055 fprintf (dump_file, "\n");
4058 /* Calculates cost for having SIZE induction variables. */
4060 static unsigned
4061 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4063 /* We add size to the cost, so that we prefer eliminating ivs
4064 if possible. */
4065 return size + estimate_reg_pressure_cost (size, data->regs_used);
4068 /* For each size of the induction variable set determine the penalty. */
4070 static void
4071 determine_set_costs (struct ivopts_data *data)
4073 unsigned j, n;
4074 gimple phi;
4075 gimple_stmt_iterator psi;
4076 tree op;
4077 struct loop *loop = data->current_loop;
4078 bitmap_iterator bi;
4080 /* We use the following model (definitely improvable, especially the
4081 cost function -- TODO):
4083 We estimate the number of registers available (using MD data), name it A.
4085 We estimate the number of registers used by the loop, name it U. This
4086 number is obtained as the number of loop phi nodes (not counting virtual
4087 registers and bivs) + the number of variables from outside of the loop.
4089 We set a reserve R (free regs that are used for temporary computations,
4090 etc.). For now the reserve is a constant 3.
4092 Let I be the number of induction variables.
4094 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4095 make a lot of ivs without a reason).
4096 -- if A - R < U + I <= A, the cost is I * PRES_COST
4097 -- if U + I > A, the cost is I * PRES_COST and
4098 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4100 if (dump_file && (dump_flags & TDF_DETAILS))
4102 fprintf (dump_file, "Global costs:\n");
4103 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4104 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost);
4105 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
4108 n = 0;
4109 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
4111 phi = gsi_stmt (psi);
4112 op = PHI_RESULT (phi);
4114 if (!is_gimple_reg (op))
4115 continue;
4117 if (get_iv (data, op))
4118 continue;
4120 n++;
4123 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4125 struct version_info *info = ver_info (data, j);
4127 if (info->inv_id && info->has_nonlin_use)
4128 n++;
4131 data->regs_used = n;
4132 if (dump_file && (dump_flags & TDF_DETAILS))
4133 fprintf (dump_file, " regs_used %d\n", n);
4135 if (dump_file && (dump_flags & TDF_DETAILS))
4137 fprintf (dump_file, " cost for size:\n");
4138 fprintf (dump_file, " ivs\tcost\n");
4139 for (j = 0; j <= 2 * target_avail_regs; j++)
4140 fprintf (dump_file, " %d\t%d\n", j,
4141 ivopts_global_cost_for_size (data, j));
4142 fprintf (dump_file, "\n");
4146 /* Returns true if A is a cheaper cost pair than B. */
4148 static bool
4149 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4151 int cmp;
4153 if (!a)
4154 return false;
4156 if (!b)
4157 return true;
4159 cmp = compare_costs (a->cost, b->cost);
4160 if (cmp < 0)
4161 return true;
4163 if (cmp > 0)
4164 return false;
4166 /* In case the costs are the same, prefer the cheaper candidate. */
4167 if (a->cand->cost < b->cand->cost)
4168 return true;
4170 return false;
4173 /* Computes the cost field of IVS structure. */
4175 static void
4176 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4178 comp_cost cost = ivs->cand_use_cost;
4179 cost.cost += ivs->cand_cost;
4180 cost.cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4182 ivs->cost = cost;
4185 /* Remove invariants in set INVS to set IVS. */
4187 static void
4188 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4190 bitmap_iterator bi;
4191 unsigned iid;
4193 if (!invs)
4194 return;
4196 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4198 ivs->n_invariant_uses[iid]--;
4199 if (ivs->n_invariant_uses[iid] == 0)
4200 ivs->n_regs--;
4204 /* Set USE not to be expressed by any candidate in IVS. */
4206 static void
4207 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4208 struct iv_use *use)
4210 unsigned uid = use->id, cid;
4211 struct cost_pair *cp;
4213 cp = ivs->cand_for_use[uid];
4214 if (!cp)
4215 return;
4216 cid = cp->cand->id;
4218 ivs->bad_uses++;
4219 ivs->cand_for_use[uid] = NULL;
4220 ivs->n_cand_uses[cid]--;
4222 if (ivs->n_cand_uses[cid] == 0)
4224 bitmap_clear_bit (ivs->cands, cid);
4225 /* Do not count the pseudocandidates. */
4226 if (cp->cand->iv)
4227 ivs->n_regs--;
4228 ivs->n_cands--;
4229 ivs->cand_cost -= cp->cand->cost;
4231 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4234 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4236 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4237 iv_ca_recount_cost (data, ivs);
4240 /* Add invariants in set INVS to set IVS. */
4242 static void
4243 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4245 bitmap_iterator bi;
4246 unsigned iid;
4248 if (!invs)
4249 return;
4251 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4253 ivs->n_invariant_uses[iid]++;
4254 if (ivs->n_invariant_uses[iid] == 1)
4255 ivs->n_regs++;
4259 /* Set cost pair for USE in set IVS to CP. */
4261 static void
4262 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4263 struct iv_use *use, struct cost_pair *cp)
4265 unsigned uid = use->id, cid;
4267 if (ivs->cand_for_use[uid] == cp)
4268 return;
4270 if (ivs->cand_for_use[uid])
4271 iv_ca_set_no_cp (data, ivs, use);
4273 if (cp)
4275 cid = cp->cand->id;
4277 ivs->bad_uses--;
4278 ivs->cand_for_use[uid] = cp;
4279 ivs->n_cand_uses[cid]++;
4280 if (ivs->n_cand_uses[cid] == 1)
4282 bitmap_set_bit (ivs->cands, cid);
4283 /* Do not count the pseudocandidates. */
4284 if (cp->cand->iv)
4285 ivs->n_regs++;
4286 ivs->n_cands++;
4287 ivs->cand_cost += cp->cand->cost;
4289 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4292 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4293 iv_ca_set_add_invariants (ivs, cp->depends_on);
4294 iv_ca_recount_cost (data, ivs);
4298 /* Extend set IVS by expressing USE by some of the candidates in it
4299 if possible. */
4301 static void
4302 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4303 struct iv_use *use)
4305 struct cost_pair *best_cp = NULL, *cp;
4306 bitmap_iterator bi;
4307 unsigned i;
4309 gcc_assert (ivs->upto >= use->id);
4311 if (ivs->upto == use->id)
4313 ivs->upto++;
4314 ivs->bad_uses++;
4317 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4319 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4321 if (cheaper_cost_pair (cp, best_cp))
4322 best_cp = cp;
4325 iv_ca_set_cp (data, ivs, use, best_cp);
4328 /* Get cost for assignment IVS. */
4330 static comp_cost
4331 iv_ca_cost (struct iv_ca *ivs)
4333 return (ivs->bad_uses ? infinite_cost : ivs->cost);
4336 /* Returns true if all dependences of CP are among invariants in IVS. */
4338 static bool
4339 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4341 unsigned i;
4342 bitmap_iterator bi;
4344 if (!cp->depends_on)
4345 return true;
4347 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4349 if (ivs->n_invariant_uses[i] == 0)
4350 return false;
4353 return true;
4356 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4357 it before NEXT_CHANGE. */
4359 static struct iv_ca_delta *
4360 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4361 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4363 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4365 change->use = use;
4366 change->old_cp = old_cp;
4367 change->new_cp = new_cp;
4368 change->next_change = next_change;
4370 return change;
4373 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4374 are rewritten. */
4376 static struct iv_ca_delta *
4377 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4379 struct iv_ca_delta *last;
4381 if (!l2)
4382 return l1;
4384 if (!l1)
4385 return l2;
4387 for (last = l1; last->next_change; last = last->next_change)
4388 continue;
4389 last->next_change = l2;
4391 return l1;
4394 /* Returns candidate by that USE is expressed in IVS. */
4396 static struct cost_pair *
4397 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4399 return ivs->cand_for_use[use->id];
4402 /* Reverse the list of changes DELTA, forming the inverse to it. */
4404 static struct iv_ca_delta *
4405 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4407 struct iv_ca_delta *act, *next, *prev = NULL;
4408 struct cost_pair *tmp;
4410 for (act = delta; act; act = next)
4412 next = act->next_change;
4413 act->next_change = prev;
4414 prev = act;
4416 tmp = act->old_cp;
4417 act->old_cp = act->new_cp;
4418 act->new_cp = tmp;
4421 return prev;
4424 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4425 reverted instead. */
4427 static void
4428 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4429 struct iv_ca_delta *delta, bool forward)
4431 struct cost_pair *from, *to;
4432 struct iv_ca_delta *act;
4434 if (!forward)
4435 delta = iv_ca_delta_reverse (delta);
4437 for (act = delta; act; act = act->next_change)
4439 from = act->old_cp;
4440 to = act->new_cp;
4441 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4442 iv_ca_set_cp (data, ivs, act->use, to);
4445 if (!forward)
4446 iv_ca_delta_reverse (delta);
4449 /* Returns true if CAND is used in IVS. */
4451 static bool
4452 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4454 return ivs->n_cand_uses[cand->id] > 0;
4457 /* Returns number of induction variable candidates in the set IVS. */
4459 static unsigned
4460 iv_ca_n_cands (struct iv_ca *ivs)
4462 return ivs->n_cands;
4465 /* Free the list of changes DELTA. */
4467 static void
4468 iv_ca_delta_free (struct iv_ca_delta **delta)
4470 struct iv_ca_delta *act, *next;
4472 for (act = *delta; act; act = next)
4474 next = act->next_change;
4475 free (act);
4478 *delta = NULL;
4481 /* Allocates new iv candidates assignment. */
4483 static struct iv_ca *
4484 iv_ca_new (struct ivopts_data *data)
4486 struct iv_ca *nw = XNEW (struct iv_ca);
4488 nw->upto = 0;
4489 nw->bad_uses = 0;
4490 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4491 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4492 nw->cands = BITMAP_ALLOC (NULL);
4493 nw->n_cands = 0;
4494 nw->n_regs = 0;
4495 nw->cand_use_cost = zero_cost;
4496 nw->cand_cost = 0;
4497 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4498 nw->cost = zero_cost;
4500 return nw;
4503 /* Free memory occupied by the set IVS. */
4505 static void
4506 iv_ca_free (struct iv_ca **ivs)
4508 free ((*ivs)->cand_for_use);
4509 free ((*ivs)->n_cand_uses);
4510 BITMAP_FREE ((*ivs)->cands);
4511 free ((*ivs)->n_invariant_uses);
4512 free (*ivs);
4513 *ivs = NULL;
4516 /* Dumps IVS to FILE. */
4518 static void
4519 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4521 const char *pref = " invariants ";
4522 unsigned i;
4523 comp_cost cost = iv_ca_cost (ivs);
4525 fprintf (file, " cost %d (complexity %d)\n", cost.cost, cost.complexity);
4526 bitmap_print (file, ivs->cands, " candidates ","\n");
4528 for (i = 1; i <= data->max_inv_id; i++)
4529 if (ivs->n_invariant_uses[i])
4531 fprintf (file, "%s%d", pref, i);
4532 pref = ", ";
4534 fprintf (file, "\n");
4537 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4538 new set, and store differences in DELTA. Number of induction variables
4539 in the new set is stored to N_IVS. */
4541 static comp_cost
4542 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4543 struct iv_cand *cand, struct iv_ca_delta **delta,
4544 unsigned *n_ivs)
4546 unsigned i;
4547 comp_cost cost;
4548 struct iv_use *use;
4549 struct cost_pair *old_cp, *new_cp;
4551 *delta = NULL;
4552 for (i = 0; i < ivs->upto; i++)
4554 use = iv_use (data, i);
4555 old_cp = iv_ca_cand_for_use (ivs, use);
4557 if (old_cp
4558 && old_cp->cand == cand)
4559 continue;
4561 new_cp = get_use_iv_cost (data, use, cand);
4562 if (!new_cp)
4563 continue;
4565 if (!iv_ca_has_deps (ivs, new_cp))
4566 continue;
4568 if (!cheaper_cost_pair (new_cp, old_cp))
4569 continue;
4571 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4574 iv_ca_delta_commit (data, ivs, *delta, true);
4575 cost = iv_ca_cost (ivs);
4576 if (n_ivs)
4577 *n_ivs = iv_ca_n_cands (ivs);
4578 iv_ca_delta_commit (data, ivs, *delta, false);
4580 return cost;
4583 /* Try narrowing set IVS by removing CAND. Return the cost of
4584 the new set and store the differences in DELTA. */
4586 static comp_cost
4587 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4588 struct iv_cand *cand, struct iv_ca_delta **delta)
4590 unsigned i, ci;
4591 struct iv_use *use;
4592 struct cost_pair *old_cp, *new_cp, *cp;
4593 bitmap_iterator bi;
4594 struct iv_cand *cnd;
4595 comp_cost cost;
4597 *delta = NULL;
4598 for (i = 0; i < n_iv_uses (data); i++)
4600 use = iv_use (data, i);
4602 old_cp = iv_ca_cand_for_use (ivs, use);
4603 if (old_cp->cand != cand)
4604 continue;
4606 new_cp = NULL;
4608 if (data->consider_all_candidates)
4610 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4612 if (ci == cand->id)
4613 continue;
4615 cnd = iv_cand (data, ci);
4617 cp = get_use_iv_cost (data, use, cnd);
4618 if (!cp)
4619 continue;
4620 if (!iv_ca_has_deps (ivs, cp))
4621 continue;
4623 if (!cheaper_cost_pair (cp, new_cp))
4624 continue;
4626 new_cp = cp;
4629 else
4631 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4633 if (ci == cand->id)
4634 continue;
4636 cnd = iv_cand (data, ci);
4638 cp = get_use_iv_cost (data, use, cnd);
4639 if (!cp)
4640 continue;
4641 if (!iv_ca_has_deps (ivs, cp))
4642 continue;
4644 if (!cheaper_cost_pair (cp, new_cp))
4645 continue;
4647 new_cp = cp;
4651 if (!new_cp)
4653 iv_ca_delta_free (delta);
4654 return infinite_cost;
4657 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4660 iv_ca_delta_commit (data, ivs, *delta, true);
4661 cost = iv_ca_cost (ivs);
4662 iv_ca_delta_commit (data, ivs, *delta, false);
4664 return cost;
4667 /* Try optimizing the set of candidates IVS by removing candidates different
4668 from to EXCEPT_CAND from it. Return cost of the new set, and store
4669 differences in DELTA. */
4671 static comp_cost
4672 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4673 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4675 bitmap_iterator bi;
4676 struct iv_ca_delta *act_delta, *best_delta;
4677 unsigned i;
4678 comp_cost best_cost, acost;
4679 struct iv_cand *cand;
4681 best_delta = NULL;
4682 best_cost = iv_ca_cost (ivs);
4684 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4686 cand = iv_cand (data, i);
4688 if (cand == except_cand)
4689 continue;
4691 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4693 if (compare_costs (acost, best_cost) < 0)
4695 best_cost = acost;
4696 iv_ca_delta_free (&best_delta);
4697 best_delta = act_delta;
4699 else
4700 iv_ca_delta_free (&act_delta);
4703 if (!best_delta)
4705 *delta = NULL;
4706 return best_cost;
4709 /* Recurse to possibly remove other unnecessary ivs. */
4710 iv_ca_delta_commit (data, ivs, best_delta, true);
4711 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4712 iv_ca_delta_commit (data, ivs, best_delta, false);
4713 *delta = iv_ca_delta_join (best_delta, *delta);
4714 return best_cost;
4717 /* Tries to extend the sets IVS in the best possible way in order
4718 to express the USE. */
4720 static bool
4721 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4722 struct iv_use *use)
4724 comp_cost best_cost, act_cost;
4725 unsigned i;
4726 bitmap_iterator bi;
4727 struct iv_cand *cand;
4728 struct iv_ca_delta *best_delta = NULL, *act_delta;
4729 struct cost_pair *cp;
4731 iv_ca_add_use (data, ivs, use);
4732 best_cost = iv_ca_cost (ivs);
4734 cp = iv_ca_cand_for_use (ivs, use);
4735 if (cp)
4737 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4738 iv_ca_set_no_cp (data, ivs, use);
4741 /* First try important candidates not based on any memory object. Only if
4742 this fails, try the specific ones. Rationale -- in loops with many
4743 variables the best choice often is to use just one generic biv. If we
4744 added here many ivs specific to the uses, the optimization algorithm later
4745 would be likely to get stuck in a local minimum, thus causing us to create
4746 too many ivs. The approach from few ivs to more seems more likely to be
4747 successful -- starting from few ivs, replacing an expensive use by a
4748 specific iv should always be a win. */
4749 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4751 cand = iv_cand (data, i);
4753 if (cand->iv->base_object != NULL_TREE)
4754 continue;
4756 if (iv_ca_cand_used_p (ivs, cand))
4757 continue;
4759 cp = get_use_iv_cost (data, use, cand);
4760 if (!cp)
4761 continue;
4763 iv_ca_set_cp (data, ivs, use, cp);
4764 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4765 iv_ca_set_no_cp (data, ivs, use);
4766 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4768 if (compare_costs (act_cost, best_cost) < 0)
4770 best_cost = act_cost;
4772 iv_ca_delta_free (&best_delta);
4773 best_delta = act_delta;
4775 else
4776 iv_ca_delta_free (&act_delta);
4779 if (infinite_cost_p (best_cost))
4781 for (i = 0; i < use->n_map_members; i++)
4783 cp = use->cost_map + i;
4784 cand = cp->cand;
4785 if (!cand)
4786 continue;
4788 /* Already tried this. */
4789 if (cand->important && cand->iv->base_object == NULL_TREE)
4790 continue;
4792 if (iv_ca_cand_used_p (ivs, cand))
4793 continue;
4795 act_delta = NULL;
4796 iv_ca_set_cp (data, ivs, use, cp);
4797 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4798 iv_ca_set_no_cp (data, ivs, use);
4799 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4800 cp, act_delta);
4802 if (compare_costs (act_cost, best_cost) < 0)
4804 best_cost = act_cost;
4806 if (best_delta)
4807 iv_ca_delta_free (&best_delta);
4808 best_delta = act_delta;
4810 else
4811 iv_ca_delta_free (&act_delta);
4815 iv_ca_delta_commit (data, ivs, best_delta, true);
4816 iv_ca_delta_free (&best_delta);
4818 return !infinite_cost_p (best_cost);
4821 /* Finds an initial assignment of candidates to uses. */
4823 static struct iv_ca *
4824 get_initial_solution (struct ivopts_data *data)
4826 struct iv_ca *ivs = iv_ca_new (data);
4827 unsigned i;
4829 for (i = 0; i < n_iv_uses (data); i++)
4830 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4832 iv_ca_free (&ivs);
4833 return NULL;
4836 return ivs;
4839 /* Tries to improve set of induction variables IVS. */
4841 static bool
4842 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4844 unsigned i, n_ivs;
4845 comp_cost acost, best_cost = iv_ca_cost (ivs);
4846 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4847 struct iv_cand *cand;
4849 /* Try extending the set of induction variables by one. */
4850 for (i = 0; i < n_iv_cands (data); i++)
4852 cand = iv_cand (data, i);
4854 if (iv_ca_cand_used_p (ivs, cand))
4855 continue;
4857 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4858 if (!act_delta)
4859 continue;
4861 /* If we successfully added the candidate and the set is small enough,
4862 try optimizing it by removing other candidates. */
4863 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4865 iv_ca_delta_commit (data, ivs, act_delta, true);
4866 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4867 iv_ca_delta_commit (data, ivs, act_delta, false);
4868 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4871 if (compare_costs (acost, best_cost) < 0)
4873 best_cost = acost;
4874 iv_ca_delta_free (&best_delta);
4875 best_delta = act_delta;
4877 else
4878 iv_ca_delta_free (&act_delta);
4881 if (!best_delta)
4883 /* Try removing the candidates from the set instead. */
4884 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4886 /* Nothing more we can do. */
4887 if (!best_delta)
4888 return false;
4891 iv_ca_delta_commit (data, ivs, best_delta, true);
4892 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
4893 iv_ca_delta_free (&best_delta);
4894 return true;
4897 /* Attempts to find the optimal set of induction variables. We do simple
4898 greedy heuristic -- we try to replace at most one candidate in the selected
4899 solution and remove the unused ivs while this improves the cost. */
4901 static struct iv_ca *
4902 find_optimal_iv_set (struct ivopts_data *data)
4904 unsigned i;
4905 struct iv_ca *set;
4906 struct iv_use *use;
4908 /* Get the initial solution. */
4909 set = get_initial_solution (data);
4910 if (!set)
4912 if (dump_file && (dump_flags & TDF_DETAILS))
4913 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4914 return NULL;
4917 if (dump_file && (dump_flags & TDF_DETAILS))
4919 fprintf (dump_file, "Initial set of candidates:\n");
4920 iv_ca_dump (data, dump_file, set);
4923 while (try_improve_iv_set (data, set))
4925 if (dump_file && (dump_flags & TDF_DETAILS))
4927 fprintf (dump_file, "Improved to:\n");
4928 iv_ca_dump (data, dump_file, set);
4932 if (dump_file && (dump_flags & TDF_DETAILS))
4934 comp_cost cost = iv_ca_cost (set);
4935 fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity);
4938 for (i = 0; i < n_iv_uses (data); i++)
4940 use = iv_use (data, i);
4941 use->selected = iv_ca_cand_for_use (set, use)->cand;
4944 return set;
4947 /* Creates a new induction variable corresponding to CAND. */
4949 static void
4950 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4952 gimple_stmt_iterator incr_pos;
4953 tree base;
4954 bool after = false;
4956 if (!cand->iv)
4957 return;
4959 switch (cand->pos)
4961 case IP_NORMAL:
4962 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
4963 break;
4965 case IP_END:
4966 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
4967 after = true;
4968 break;
4970 case IP_ORIGINAL:
4971 /* Mark that the iv is preserved. */
4972 name_info (data, cand->var_before)->preserve_biv = true;
4973 name_info (data, cand->var_after)->preserve_biv = true;
4975 /* Rewrite the increment so that it uses var_before directly. */
4976 find_interesting_uses_op (data, cand->var_after)->selected = cand;
4978 return;
4981 gimple_add_tmp_var (cand->var_before);
4982 add_referenced_var (cand->var_before);
4984 base = unshare_expr (cand->iv->base);
4986 create_iv (base, unshare_expr (cand->iv->step),
4987 cand->var_before, data->current_loop,
4988 &incr_pos, after, &cand->var_before, &cand->var_after);
4991 /* Creates new induction variables described in SET. */
4993 static void
4994 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4996 unsigned i;
4997 struct iv_cand *cand;
4998 bitmap_iterator bi;
5000 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5002 cand = iv_cand (data, i);
5003 create_new_iv (data, cand);
5007 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
5008 is true, remove also the ssa name defined by the statement. */
5010 static void
5011 remove_statement (gimple stmt, bool including_defined_name)
5013 gimple_stmt_iterator bsi = gsi_for_stmt (stmt);
5015 if (gimple_code (stmt) == GIMPLE_PHI)
5016 remove_phi_node (&bsi, including_defined_name);
5017 else
5019 gsi_remove (&bsi, true);
5020 release_defs (stmt);
5024 /* Rewrites USE (definition of iv used in a nonlinear expression)
5025 using candidate CAND. */
5027 static void
5028 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5029 struct iv_use *use, struct iv_cand *cand)
5031 tree comp;
5032 tree op, tgt;
5033 gimple ass;
5034 gimple_stmt_iterator bsi;
5036 /* An important special case -- if we are asked to express value of
5037 the original iv by itself, just exit; there is no need to
5038 introduce a new computation (that might also need casting the
5039 variable to unsigned and back). */
5040 if (cand->pos == IP_ORIGINAL
5041 && cand->incremented_at == use->stmt)
5043 tree step, ctype, utype;
5044 enum tree_code incr_code = PLUS_EXPR, old_code;
5046 gcc_assert (is_gimple_assign (use->stmt));
5047 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
5049 step = cand->iv->step;
5050 ctype = TREE_TYPE (step);
5051 utype = TREE_TYPE (cand->var_after);
5052 if (TREE_CODE (step) == NEGATE_EXPR)
5054 incr_code = MINUS_EXPR;
5055 step = TREE_OPERAND (step, 0);
5058 /* Check whether we may leave the computation unchanged.
5059 This is the case only if it does not rely on other
5060 computations in the loop -- otherwise, the computation
5061 we rely upon may be removed in remove_unused_ivs,
5062 thus leading to ICE. */
5063 old_code = gimple_assign_rhs_code (use->stmt);
5064 if (old_code == PLUS_EXPR
5065 || old_code == MINUS_EXPR
5066 || old_code == POINTER_PLUS_EXPR)
5068 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
5069 op = gimple_assign_rhs2 (use->stmt);
5070 else if (old_code != MINUS_EXPR
5071 && gimple_assign_rhs2 (use->stmt) == cand->var_before)
5072 op = gimple_assign_rhs1 (use->stmt);
5073 else
5074 op = NULL_TREE;
5076 else
5077 op = NULL_TREE;
5079 if (op
5080 && (TREE_CODE (op) == INTEGER_CST
5081 || operand_equal_p (op, step, 0)))
5082 return;
5084 /* Otherwise, add the necessary computations to express
5085 the iv. */
5086 op = fold_convert (ctype, cand->var_before);
5087 comp = fold_convert (utype,
5088 build2 (incr_code, ctype, op,
5089 unshare_expr (step)));
5091 else
5093 comp = get_computation (data->current_loop, use, cand);
5094 gcc_assert (comp != NULL_TREE);
5097 switch (gimple_code (use->stmt))
5099 case GIMPLE_PHI:
5100 tgt = PHI_RESULT (use->stmt);
5102 /* If we should keep the biv, do not replace it. */
5103 if (name_info (data, tgt)->preserve_biv)
5104 return;
5106 bsi = gsi_after_labels (gimple_bb (use->stmt));
5107 break;
5109 case GIMPLE_ASSIGN:
5110 tgt = gimple_assign_lhs (use->stmt);
5111 bsi = gsi_for_stmt (use->stmt);
5112 break;
5114 default:
5115 gcc_unreachable ();
5118 op = force_gimple_operand_gsi (&bsi, comp, false, SSA_NAME_VAR (tgt),
5119 true, GSI_SAME_STMT);
5121 if (gimple_code (use->stmt) == GIMPLE_PHI)
5123 ass = gimple_build_assign (tgt, op);
5124 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
5125 remove_statement (use->stmt, false);
5127 else
5129 gimple_assign_set_rhs_from_tree (&bsi, op);
5130 use->stmt = gsi_stmt (bsi);
5134 /* Replaces ssa name in index IDX by its basic variable. Callback for
5135 for_each_index. */
5137 static bool
5138 idx_remove_ssa_names (tree base, tree *idx,
5139 void *data ATTRIBUTE_UNUSED)
5141 tree *op;
5143 if (TREE_CODE (*idx) == SSA_NAME)
5144 *idx = SSA_NAME_VAR (*idx);
5146 if (TREE_CODE (base) == ARRAY_REF)
5148 op = &TREE_OPERAND (base, 2);
5149 if (*op
5150 && TREE_CODE (*op) == SSA_NAME)
5151 *op = SSA_NAME_VAR (*op);
5152 op = &TREE_OPERAND (base, 3);
5153 if (*op
5154 && TREE_CODE (*op) == SSA_NAME)
5155 *op = SSA_NAME_VAR (*op);
5158 return true;
5161 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5163 static tree
5164 unshare_and_remove_ssa_names (tree ref)
5166 ref = unshare_expr (ref);
5167 for_each_index (&ref, idx_remove_ssa_names, NULL);
5169 return ref;
5172 /* Extract the alias analysis info for the memory reference REF. There are
5173 several ways how this information may be stored and what precisely is
5174 its semantics depending on the type of the reference, but there always is
5175 somewhere hidden one _DECL node that is used to determine the set of
5176 virtual operands for the reference. The code below deciphers this jungle
5177 and extracts this single useful piece of information. */
5179 static tree
5180 get_ref_tag (tree ref, tree orig)
5182 tree var = get_base_address (ref);
5183 tree aref = NULL_TREE, tag, sv;
5184 HOST_WIDE_INT offset, size, maxsize;
5186 for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5188 aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5189 if (ref)
5190 break;
5193 if (!var)
5194 return NULL_TREE;
5196 if (TREE_CODE (var) == INDIRECT_REF)
5198 /* If the base is a dereference of a pointer, first check its name memory
5199 tag. If it does not have one, use its symbol memory tag. */
5200 var = TREE_OPERAND (var, 0);
5201 if (TREE_CODE (var) != SSA_NAME)
5202 return NULL_TREE;
5204 if (SSA_NAME_PTR_INFO (var))
5206 tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5207 if (tag)
5208 return tag;
5211 var = SSA_NAME_VAR (var);
5212 tag = symbol_mem_tag (var);
5213 gcc_assert (tag != NULL_TREE);
5214 return tag;
5216 else
5218 if (!DECL_P (var))
5219 return NULL_TREE;
5221 tag = symbol_mem_tag (var);
5222 if (tag)
5223 return tag;
5225 return var;
5229 /* Copies the reference information from OLD_REF to NEW_REF. */
5231 static void
5232 copy_ref_info (tree new_ref, tree old_ref)
5234 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5235 copy_mem_ref_info (new_ref, old_ref);
5236 else
5238 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5239 TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5243 /* Rewrites USE (address that is an iv) using candidate CAND. */
5245 static void
5246 rewrite_use_address (struct ivopts_data *data,
5247 struct iv_use *use, struct iv_cand *cand)
5249 aff_tree aff;
5250 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5251 tree ref;
5252 bool ok;
5254 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5255 gcc_assert (ok);
5256 unshare_aff_combination (&aff);
5258 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5259 copy_ref_info (ref, *use->op_p);
5260 *use->op_p = ref;
5263 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5264 candidate CAND. */
5266 static void
5267 rewrite_use_compare (struct ivopts_data *data,
5268 struct iv_use *use, struct iv_cand *cand)
5270 tree comp, *var_p, op, bound;
5271 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5272 enum tree_code compare;
5273 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5274 bool ok;
5276 bound = cp->value;
5277 if (bound)
5279 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5280 tree var_type = TREE_TYPE (var);
5282 compare = iv_elimination_compare (data, use);
5283 bound = unshare_expr (fold_convert (var_type, bound));
5284 op = force_gimple_operand_gsi (&bsi, bound, true, NULL_TREE,
5285 true, GSI_SAME_STMT);
5287 gimple_cond_set_lhs (use->stmt, var);
5288 gimple_cond_set_code (use->stmt, compare);
5289 gimple_cond_set_rhs (use->stmt, op);
5290 return;
5293 /* The induction variable elimination failed; just express the original
5294 giv. */
5295 comp = get_computation (data->current_loop, use, cand);
5296 gcc_assert (comp != NULL_TREE);
5298 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
5299 gcc_assert (ok);
5301 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
5302 true, GSI_SAME_STMT);
5305 /* Rewrites USE using candidate CAND. */
5307 static void
5308 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5310 push_stmt_changes (&use->stmt);
5312 switch (use->type)
5314 case USE_NONLINEAR_EXPR:
5315 rewrite_use_nonlinear_expr (data, use, cand);
5316 break;
5318 case USE_ADDRESS:
5319 rewrite_use_address (data, use, cand);
5320 break;
5322 case USE_COMPARE:
5323 rewrite_use_compare (data, use, cand);
5324 break;
5326 default:
5327 gcc_unreachable ();
5330 pop_stmt_changes (&use->stmt);
5333 /* Rewrite the uses using the selected induction variables. */
5335 static void
5336 rewrite_uses (struct ivopts_data *data)
5338 unsigned i;
5339 struct iv_cand *cand;
5340 struct iv_use *use;
5342 for (i = 0; i < n_iv_uses (data); i++)
5344 use = iv_use (data, i);
5345 cand = use->selected;
5346 gcc_assert (cand);
5348 rewrite_use (data, use, cand);
5352 /* Removes the ivs that are not used after rewriting. */
5354 static void
5355 remove_unused_ivs (struct ivopts_data *data)
5357 unsigned j;
5358 bitmap_iterator bi;
5360 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5362 struct version_info *info;
5364 info = ver_info (data, j);
5365 if (info->iv
5366 && !integer_zerop (info->iv->step)
5367 && !info->inv_id
5368 && !info->iv->have_use_for
5369 && !info->preserve_biv)
5370 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5374 /* Frees data allocated by the optimization of a single loop. */
5376 static void
5377 free_loop_data (struct ivopts_data *data)
5379 unsigned i, j;
5380 bitmap_iterator bi;
5381 tree obj;
5383 if (data->niters)
5385 pointer_map_destroy (data->niters);
5386 data->niters = NULL;
5389 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5391 struct version_info *info;
5393 info = ver_info (data, i);
5394 if (info->iv)
5395 free (info->iv);
5396 info->iv = NULL;
5397 info->has_nonlin_use = false;
5398 info->preserve_biv = false;
5399 info->inv_id = 0;
5401 bitmap_clear (data->relevant);
5402 bitmap_clear (data->important_candidates);
5404 for (i = 0; i < n_iv_uses (data); i++)
5406 struct iv_use *use = iv_use (data, i);
5408 free (use->iv);
5409 BITMAP_FREE (use->related_cands);
5410 for (j = 0; j < use->n_map_members; j++)
5411 if (use->cost_map[j].depends_on)
5412 BITMAP_FREE (use->cost_map[j].depends_on);
5413 free (use->cost_map);
5414 free (use);
5416 VEC_truncate (iv_use_p, data->iv_uses, 0);
5418 for (i = 0; i < n_iv_cands (data); i++)
5420 struct iv_cand *cand = iv_cand (data, i);
5422 if (cand->iv)
5423 free (cand->iv);
5424 if (cand->depends_on)
5425 BITMAP_FREE (cand->depends_on);
5426 free (cand);
5428 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5430 if (data->version_info_size < num_ssa_names)
5432 data->version_info_size = 2 * num_ssa_names;
5433 free (data->version_info);
5434 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5437 data->max_inv_id = 0;
5439 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5440 SET_DECL_RTL (obj, NULL_RTX);
5442 VEC_truncate (tree, decl_rtl_to_reset, 0);
5445 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5446 loop tree. */
5448 static void
5449 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5451 free_loop_data (data);
5452 free (data->version_info);
5453 BITMAP_FREE (data->relevant);
5454 BITMAP_FREE (data->important_candidates);
5456 VEC_free (tree, heap, decl_rtl_to_reset);
5457 VEC_free (iv_use_p, heap, data->iv_uses);
5458 VEC_free (iv_cand_p, heap, data->iv_candidates);
5461 /* Optimizes the LOOP. Returns true if anything changed. */
5463 static bool
5464 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5466 bool changed = false;
5467 struct iv_ca *iv_ca;
5468 edge exit;
5470 gcc_assert (!data->niters);
5471 data->current_loop = loop;
5473 if (dump_file && (dump_flags & TDF_DETAILS))
5475 fprintf (dump_file, "Processing loop %d\n", loop->num);
5477 exit = single_dom_exit (loop);
5478 if (exit)
5480 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5481 exit->src->index, exit->dest->index);
5482 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
5483 fprintf (dump_file, "\n");
5486 fprintf (dump_file, "\n");
5489 /* For each ssa name determines whether it behaves as an induction variable
5490 in some loop. */
5491 if (!find_induction_variables (data))
5492 goto finish;
5494 /* Finds interesting uses (item 1). */
5495 find_interesting_uses (data);
5496 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5497 goto finish;
5499 /* Finds candidates for the induction variables (item 2). */
5500 find_iv_candidates (data);
5502 /* Calculates the costs (item 3, part 1). */
5503 determine_use_iv_costs (data);
5504 determine_iv_costs (data);
5505 determine_set_costs (data);
5507 /* Find the optimal set of induction variables (item 3, part 2). */
5508 iv_ca = find_optimal_iv_set (data);
5509 if (!iv_ca)
5510 goto finish;
5511 changed = true;
5513 /* Create the new induction variables (item 4, part 1). */
5514 create_new_ivs (data, iv_ca);
5515 iv_ca_free (&iv_ca);
5517 /* Rewrite the uses (item 4, part 2). */
5518 rewrite_uses (data);
5520 /* Remove the ivs that are unused after rewriting. */
5521 remove_unused_ivs (data);
5523 /* We have changed the structure of induction variables; it might happen
5524 that definitions in the scev database refer to some of them that were
5525 eliminated. */
5526 scev_reset ();
5528 finish:
5529 free_loop_data (data);
5531 return changed;
5534 /* Main entry point. Optimizes induction variables in loops. */
5536 void
5537 tree_ssa_iv_optimize (void)
5539 struct loop *loop;
5540 struct ivopts_data data;
5541 loop_iterator li;
5543 tree_ssa_iv_optimize_init (&data);
5545 /* Optimize the loops starting with the innermost ones. */
5546 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5548 if (dump_file && (dump_flags & TDF_DETAILS))
5549 flow_loop_dump (loop, dump_file, NULL, 1);
5551 tree_ssa_iv_optimize_loop (&data, loop);
5554 tree_ssa_iv_optimize_finalize (&data);