* gcc.target/powerpc/altivec-volatile.c: Adjust expected warning.
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blobc0a2194cfd09181eaf23f6a0572ca89f41dd06b8
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "tm_p.h"
71 #include "basic-block.h"
72 #include "output.h"
73 #include "tree-pretty-print.h"
74 #include "gimple-pretty-print.h"
75 #include "tree-flow.h"
76 #include "tree-dump.h"
77 #include "timevar.h"
78 #include "cfgloop.h"
79 #include "tree-pass.h"
80 #include "ggc.h"
81 #include "insn-config.h"
82 #include "recog.h"
83 #include "pointer-set.h"
84 #include "hashtab.h"
85 #include "tree-chrec.h"
86 #include "tree-scalar-evolution.h"
87 #include "cfgloop.h"
88 #include "params.h"
89 #include "langhooks.h"
90 #include "tree-affine.h"
91 #include "target.h"
92 #include "tree-ssa-propagate.h"
94 /* FIXME: Expressions are expanded to RTL in this pass to determine the
95 cost of different addressing modes. This should be moved to a TBD
96 interface between the GIMPLE and RTL worlds. */
97 #include "expr.h"
99 /* The infinite cost. */
100 #define INFTY 10000000
102 /* The expected number of loop iterations. TODO -- use profiling instead of
103 this. */
104 #define AVG_LOOP_NITER(LOOP) 5
107 /* Representation of the induction variable. */
108 struct iv
110 tree base; /* Initial value of the iv. */
111 tree base_object; /* A memory object to that the induction variable points. */
112 tree step; /* Step of the iv (constant only). */
113 tree ssa_name; /* The ssa name with the value. */
114 bool biv_p; /* Is it a biv? */
115 bool have_use_for; /* Do we already have a use for it? */
116 unsigned use_id; /* The identifier in the use if it is the case. */
119 /* Per-ssa version information (induction variable descriptions, etc.). */
120 struct version_info
122 tree name; /* The ssa name. */
123 struct iv *iv; /* Induction variable description. */
124 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
125 an expression that is not an induction variable. */
126 bool preserve_biv; /* For the original biv, whether to preserve it. */
127 unsigned inv_id; /* Id of an invariant. */
130 /* Types of uses. */
131 enum use_type
133 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
134 USE_ADDRESS, /* Use in an address. */
135 USE_COMPARE /* Use is a compare. */
138 /* Cost of a computation. */
139 typedef struct
141 int cost; /* The runtime cost. */
142 unsigned complexity; /* The estimate of the complexity of the code for
143 the computation (in no concrete units --
144 complexity field should be larger for more
145 complex expressions and addressing modes). */
146 } comp_cost;
148 static const comp_cost zero_cost = {0, 0};
149 static const comp_cost infinite_cost = {INFTY, INFTY};
151 /* The candidate - cost pair. */
152 struct cost_pair
154 struct iv_cand *cand; /* The candidate. */
155 comp_cost cost; /* The cost. */
156 bitmap depends_on; /* The list of invariants that have to be
157 preserved. */
158 tree value; /* For final value elimination, the expression for
159 the final value of the iv. For iv elimination,
160 the new bound to compare with. */
163 /* Use. */
164 struct iv_use
166 unsigned id; /* The id of the use. */
167 enum use_type type; /* Type of the use. */
168 struct iv *iv; /* The induction variable it is based on. */
169 gimple stmt; /* Statement in that it occurs. */
170 tree *op_p; /* The place where it occurs. */
171 bitmap related_cands; /* The set of "related" iv candidates, plus the common
172 important ones. */
174 unsigned n_map_members; /* Number of candidates in the cost_map list. */
175 struct cost_pair *cost_map;
176 /* The costs wrto the iv candidates. */
178 struct iv_cand *selected;
179 /* The selected candidate. */
182 /* The position where the iv is computed. */
183 enum iv_position
185 IP_NORMAL, /* At the end, just before the exit condition. */
186 IP_END, /* At the end of the latch block. */
187 IP_BEFORE_USE, /* Immediately before a specific use. */
188 IP_AFTER_USE, /* Immediately after a specific use. */
189 IP_ORIGINAL /* The original biv. */
192 /* The induction variable candidate. */
193 struct iv_cand
195 unsigned id; /* The number of the candidate. */
196 bool important; /* Whether this is an "important" candidate, i.e. such
197 that it should be considered by all uses. */
198 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
199 gimple incremented_at;/* For original biv, the statement where it is
200 incremented. */
201 tree var_before; /* The variable used for it before increment. */
202 tree var_after; /* The variable used for it after increment. */
203 struct iv *iv; /* The value of the candidate. NULL for
204 "pseudocandidate" used to indicate the possibility
205 to replace the final value of an iv by direct
206 computation of the value. */
207 unsigned cost; /* Cost of the candidate. */
208 unsigned cost_step; /* Cost of the candidate's increment operation. */
209 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
210 where it is incremented. */
211 bitmap depends_on; /* The list of invariants that are used in step of the
212 biv. */
215 /* The data used by the induction variable optimizations. */
217 typedef struct iv_use *iv_use_p;
218 DEF_VEC_P(iv_use_p);
219 DEF_VEC_ALLOC_P(iv_use_p,heap);
221 typedef struct iv_cand *iv_cand_p;
222 DEF_VEC_P(iv_cand_p);
223 DEF_VEC_ALLOC_P(iv_cand_p,heap);
225 struct ivopts_data
227 /* The currently optimized loop. */
228 struct loop *current_loop;
230 /* Numbers of iterations for all exits of the current loop. */
231 struct pointer_map_t *niters;
233 /* Number of registers used in it. */
234 unsigned regs_used;
236 /* The size of version_info array allocated. */
237 unsigned version_info_size;
239 /* The array of information for the ssa names. */
240 struct version_info *version_info;
242 /* The bitmap of indices in version_info whose value was changed. */
243 bitmap relevant;
245 /* The uses of induction variables. */
246 VEC(iv_use_p,heap) *iv_uses;
248 /* The candidates. */
249 VEC(iv_cand_p,heap) *iv_candidates;
251 /* A bitmap of important candidates. */
252 bitmap important_candidates;
254 /* The maximum invariant id. */
255 unsigned max_inv_id;
257 /* Whether to consider just related and important candidates when replacing a
258 use. */
259 bool consider_all_candidates;
261 /* Are we optimizing for speed? */
262 bool speed;
265 /* An assignment of iv candidates to uses. */
267 struct iv_ca
269 /* The number of uses covered by the assignment. */
270 unsigned upto;
272 /* Number of uses that cannot be expressed by the candidates in the set. */
273 unsigned bad_uses;
275 /* Candidate assigned to a use, together with the related costs. */
276 struct cost_pair **cand_for_use;
278 /* Number of times each candidate is used. */
279 unsigned *n_cand_uses;
281 /* The candidates used. */
282 bitmap cands;
284 /* The number of candidates in the set. */
285 unsigned n_cands;
287 /* Total number of registers needed. */
288 unsigned n_regs;
290 /* Total cost of expressing uses. */
291 comp_cost cand_use_cost;
293 /* Total cost of candidates. */
294 unsigned cand_cost;
296 /* Number of times each invariant is used. */
297 unsigned *n_invariant_uses;
299 /* Total cost of the assignment. */
300 comp_cost cost;
303 /* Difference of two iv candidate assignments. */
305 struct iv_ca_delta
307 /* Changed use. */
308 struct iv_use *use;
310 /* An old assignment (for rollback purposes). */
311 struct cost_pair *old_cp;
313 /* A new assignment. */
314 struct cost_pair *new_cp;
316 /* Next change in the list. */
317 struct iv_ca_delta *next_change;
320 /* Bound on number of candidates below that all candidates are considered. */
322 #define CONSIDER_ALL_CANDIDATES_BOUND \
323 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
325 /* If there are more iv occurrences, we just give up (it is quite unlikely that
326 optimizing such a loop would help, and it would take ages). */
328 #define MAX_CONSIDERED_USES \
329 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
331 /* If there are at most this number of ivs in the set, try removing unnecessary
332 ivs from the set always. */
334 #define ALWAYS_PRUNE_CAND_SET_BOUND \
335 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
337 /* The list of trees for that the decl_rtl field must be reset is stored
338 here. */
340 static VEC(tree,heap) *decl_rtl_to_reset;
342 /* Number of uses recorded in DATA. */
344 static inline unsigned
345 n_iv_uses (struct ivopts_data *data)
347 return VEC_length (iv_use_p, data->iv_uses);
350 /* Ith use recorded in DATA. */
352 static inline struct iv_use *
353 iv_use (struct ivopts_data *data, unsigned i)
355 return VEC_index (iv_use_p, data->iv_uses, i);
358 /* Number of candidates recorded in DATA. */
360 static inline unsigned
361 n_iv_cands (struct ivopts_data *data)
363 return VEC_length (iv_cand_p, data->iv_candidates);
366 /* Ith candidate recorded in DATA. */
368 static inline struct iv_cand *
369 iv_cand (struct ivopts_data *data, unsigned i)
371 return VEC_index (iv_cand_p, data->iv_candidates, i);
374 /* The single loop exit if it dominates the latch, NULL otherwise. */
376 edge
377 single_dom_exit (struct loop *loop)
379 edge exit = single_exit (loop);
381 if (!exit)
382 return NULL;
384 if (!just_once_each_iteration_p (loop, exit->src))
385 return NULL;
387 return exit;
390 /* Dumps information about the induction variable IV to FILE. */
392 extern void dump_iv (FILE *, struct iv *);
393 void
394 dump_iv (FILE *file, struct iv *iv)
396 if (iv->ssa_name)
398 fprintf (file, "ssa name ");
399 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
400 fprintf (file, "\n");
403 fprintf (file, " type ");
404 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
405 fprintf (file, "\n");
407 if (iv->step)
409 fprintf (file, " base ");
410 print_generic_expr (file, iv->base, TDF_SLIM);
411 fprintf (file, "\n");
413 fprintf (file, " step ");
414 print_generic_expr (file, iv->step, TDF_SLIM);
415 fprintf (file, "\n");
417 else
419 fprintf (file, " invariant ");
420 print_generic_expr (file, iv->base, TDF_SLIM);
421 fprintf (file, "\n");
424 if (iv->base_object)
426 fprintf (file, " base object ");
427 print_generic_expr (file, iv->base_object, TDF_SLIM);
428 fprintf (file, "\n");
431 if (iv->biv_p)
432 fprintf (file, " is a biv\n");
435 /* Dumps information about the USE to FILE. */
437 extern void dump_use (FILE *, struct iv_use *);
438 void
439 dump_use (FILE *file, struct iv_use *use)
441 fprintf (file, "use %d\n", use->id);
443 switch (use->type)
445 case USE_NONLINEAR_EXPR:
446 fprintf (file, " generic\n");
447 break;
449 case USE_ADDRESS:
450 fprintf (file, " address\n");
451 break;
453 case USE_COMPARE:
454 fprintf (file, " compare\n");
455 break;
457 default:
458 gcc_unreachable ();
461 fprintf (file, " in statement ");
462 print_gimple_stmt (file, use->stmt, 0, 0);
463 fprintf (file, "\n");
465 fprintf (file, " at position ");
466 if (use->op_p)
467 print_generic_expr (file, *use->op_p, TDF_SLIM);
468 fprintf (file, "\n");
470 dump_iv (file, use->iv);
472 if (use->related_cands)
474 fprintf (file, " related candidates ");
475 dump_bitmap (file, use->related_cands);
479 /* Dumps information about the uses to FILE. */
481 extern void dump_uses (FILE *, struct ivopts_data *);
482 void
483 dump_uses (FILE *file, struct ivopts_data *data)
485 unsigned i;
486 struct iv_use *use;
488 for (i = 0; i < n_iv_uses (data); i++)
490 use = iv_use (data, i);
492 dump_use (file, use);
493 fprintf (file, "\n");
497 /* Dumps information about induction variable candidate CAND to FILE. */
499 extern void dump_cand (FILE *, struct iv_cand *);
500 void
501 dump_cand (FILE *file, struct iv_cand *cand)
503 struct iv *iv = cand->iv;
505 fprintf (file, "candidate %d%s\n",
506 cand->id, cand->important ? " (important)" : "");
508 if (cand->depends_on)
510 fprintf (file, " depends on ");
511 dump_bitmap (file, cand->depends_on);
514 if (!iv)
516 fprintf (file, " final value replacement\n");
517 return;
520 switch (cand->pos)
522 case IP_NORMAL:
523 fprintf (file, " incremented before exit test\n");
524 break;
526 case IP_BEFORE_USE:
527 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
528 break;
530 case IP_AFTER_USE:
531 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
532 break;
534 case IP_END:
535 fprintf (file, " incremented at end\n");
536 break;
538 case IP_ORIGINAL:
539 fprintf (file, " original biv\n");
540 break;
543 dump_iv (file, iv);
546 /* Returns the info for ssa version VER. */
548 static inline struct version_info *
549 ver_info (struct ivopts_data *data, unsigned ver)
551 return data->version_info + ver;
554 /* Returns the info for ssa name NAME. */
556 static inline struct version_info *
557 name_info (struct ivopts_data *data, tree name)
559 return ver_info (data, SSA_NAME_VERSION (name));
562 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
563 emitted in LOOP. */
565 static bool
566 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
568 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
570 gcc_assert (bb);
572 if (sbb == loop->latch)
573 return true;
575 if (sbb != bb)
576 return false;
578 return stmt == last_stmt (bb);
581 /* Returns true if STMT if after the place where the original induction
582 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
583 if the positions are identical. */
585 static bool
586 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
588 basic_block cand_bb = gimple_bb (cand->incremented_at);
589 basic_block stmt_bb = gimple_bb (stmt);
591 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
592 return false;
594 if (stmt_bb != cand_bb)
595 return true;
597 if (true_if_equal
598 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
599 return true;
600 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
603 /* Returns true if STMT if after the place where the induction variable
604 CAND is incremented in LOOP. */
606 static bool
607 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
609 switch (cand->pos)
611 case IP_END:
612 return false;
614 case IP_NORMAL:
615 return stmt_after_ip_normal_pos (loop, stmt);
617 case IP_ORIGINAL:
618 case IP_AFTER_USE:
619 return stmt_after_inc_pos (cand, stmt, false);
621 case IP_BEFORE_USE:
622 return stmt_after_inc_pos (cand, stmt, true);
624 default:
625 gcc_unreachable ();
629 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
631 static bool
632 abnormal_ssa_name_p (tree exp)
634 if (!exp)
635 return false;
637 if (TREE_CODE (exp) != SSA_NAME)
638 return false;
640 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
643 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
644 abnormal phi node. Callback for for_each_index. */
646 static bool
647 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
648 void *data ATTRIBUTE_UNUSED)
650 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
652 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
653 return false;
654 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
655 return false;
658 return !abnormal_ssa_name_p (*index);
661 /* Returns true if EXPR contains a ssa name that occurs in an
662 abnormal phi node. */
664 bool
665 contains_abnormal_ssa_name_p (tree expr)
667 enum tree_code code;
668 enum tree_code_class codeclass;
670 if (!expr)
671 return false;
673 code = TREE_CODE (expr);
674 codeclass = TREE_CODE_CLASS (code);
676 if (code == SSA_NAME)
677 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
679 if (code == INTEGER_CST
680 || is_gimple_min_invariant (expr))
681 return false;
683 if (code == ADDR_EXPR)
684 return !for_each_index (&TREE_OPERAND (expr, 0),
685 idx_contains_abnormal_ssa_name_p,
686 NULL);
688 if (code == COND_EXPR)
689 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
690 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
691 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
693 switch (codeclass)
695 case tcc_binary:
696 case tcc_comparison:
697 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
698 return true;
700 /* Fallthru. */
701 case tcc_unary:
702 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
703 return true;
705 break;
707 default:
708 gcc_unreachable ();
711 return false;
714 /* Returns tree describing number of iterations determined from
715 EXIT of DATA->current_loop, or NULL if something goes wrong. */
717 static tree
718 niter_for_exit (struct ivopts_data *data, edge exit)
720 struct tree_niter_desc desc;
721 tree niter;
722 void **slot;
724 if (!data->niters)
726 data->niters = pointer_map_create ();
727 slot = NULL;
729 else
730 slot = pointer_map_contains (data->niters, exit);
732 if (!slot)
734 /* Try to determine number of iterations. We must know it
735 unconditionally (i.e., without possibility of # of iterations
736 being zero). Also, we cannot safely work with ssa names that
737 appear in phi nodes on abnormal edges, so that we do not create
738 overlapping life ranges for them (PR 27283). */
739 if (number_of_iterations_exit (data->current_loop,
740 exit, &desc, true)
741 && integer_zerop (desc.may_be_zero)
742 && !contains_abnormal_ssa_name_p (desc.niter))
743 niter = desc.niter;
744 else
745 niter = NULL_TREE;
747 *pointer_map_insert (data->niters, exit) = niter;
749 else
750 niter = (tree) *slot;
752 return niter;
755 /* Returns tree describing number of iterations determined from
756 single dominating exit of DATA->current_loop, or NULL if something
757 goes wrong. */
759 static tree
760 niter_for_single_dom_exit (struct ivopts_data *data)
762 edge exit = single_dom_exit (data->current_loop);
764 if (!exit)
765 return NULL;
767 return niter_for_exit (data, exit);
770 /* Initializes data structures used by the iv optimization pass, stored
771 in DATA. */
773 static void
774 tree_ssa_iv_optimize_init (struct ivopts_data *data)
776 data->version_info_size = 2 * num_ssa_names;
777 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
778 data->relevant = BITMAP_ALLOC (NULL);
779 data->important_candidates = BITMAP_ALLOC (NULL);
780 data->max_inv_id = 0;
781 data->niters = NULL;
782 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
783 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
784 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
787 /* Returns a memory object to that EXPR points. In case we are able to
788 determine that it does not point to any such object, NULL is returned. */
790 static tree
791 determine_base_object (tree expr)
793 enum tree_code code = TREE_CODE (expr);
794 tree base, obj;
796 /* If this is a pointer casted to any type, we need to determine
797 the base object for the pointer; so handle conversions before
798 throwing away non-pointer expressions. */
799 if (CONVERT_EXPR_P (expr))
800 return determine_base_object (TREE_OPERAND (expr, 0));
802 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
803 return NULL_TREE;
805 switch (code)
807 case INTEGER_CST:
808 return NULL_TREE;
810 case ADDR_EXPR:
811 obj = TREE_OPERAND (expr, 0);
812 base = get_base_address (obj);
814 if (!base)
815 return expr;
817 if (TREE_CODE (base) == MEM_REF)
818 return determine_base_object (TREE_OPERAND (base, 0));
820 return fold_convert (ptr_type_node,
821 build_fold_addr_expr (base));
823 case POINTER_PLUS_EXPR:
824 return determine_base_object (TREE_OPERAND (expr, 0));
826 case PLUS_EXPR:
827 case MINUS_EXPR:
828 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
829 gcc_unreachable ();
831 default:
832 return fold_convert (ptr_type_node, expr);
836 /* Allocates an induction variable with given initial value BASE and step STEP
837 for loop LOOP. */
839 static struct iv *
840 alloc_iv (tree base, tree step)
842 struct iv *iv = XCNEW (struct iv);
843 gcc_assert (step != NULL_TREE);
845 iv->base = base;
846 iv->base_object = determine_base_object (base);
847 iv->step = step;
848 iv->biv_p = false;
849 iv->have_use_for = false;
850 iv->use_id = 0;
851 iv->ssa_name = NULL_TREE;
853 return iv;
856 /* Sets STEP and BASE for induction variable IV. */
858 static void
859 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
861 struct version_info *info = name_info (data, iv);
863 gcc_assert (!info->iv);
865 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
866 info->iv = alloc_iv (base, step);
867 info->iv->ssa_name = iv;
870 /* Finds induction variable declaration for VAR. */
872 static struct iv *
873 get_iv (struct ivopts_data *data, tree var)
875 basic_block bb;
876 tree type = TREE_TYPE (var);
878 if (!POINTER_TYPE_P (type)
879 && !INTEGRAL_TYPE_P (type))
880 return NULL;
882 if (!name_info (data, var)->iv)
884 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
886 if (!bb
887 || !flow_bb_inside_loop_p (data->current_loop, bb))
888 set_iv (data, var, var, build_int_cst (type, 0));
891 return name_info (data, var)->iv;
894 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
895 not define a simple affine biv with nonzero step. */
897 static tree
898 determine_biv_step (gimple phi)
900 struct loop *loop = gimple_bb (phi)->loop_father;
901 tree name = PHI_RESULT (phi);
902 affine_iv iv;
904 if (!is_gimple_reg (name))
905 return NULL_TREE;
907 if (!simple_iv (loop, loop, name, &iv, true))
908 return NULL_TREE;
910 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
913 /* Finds basic ivs. */
915 static bool
916 find_bivs (struct ivopts_data *data)
918 gimple phi;
919 tree step, type, base;
920 bool found = false;
921 struct loop *loop = data->current_loop;
922 gimple_stmt_iterator psi;
924 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
926 phi = gsi_stmt (psi);
928 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
929 continue;
931 step = determine_biv_step (phi);
932 if (!step)
933 continue;
935 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
936 base = expand_simple_operations (base);
937 if (contains_abnormal_ssa_name_p (base)
938 || contains_abnormal_ssa_name_p (step))
939 continue;
941 type = TREE_TYPE (PHI_RESULT (phi));
942 base = fold_convert (type, base);
943 if (step)
945 if (POINTER_TYPE_P (type))
946 step = fold_convert (sizetype, step);
947 else
948 step = fold_convert (type, step);
951 set_iv (data, PHI_RESULT (phi), base, step);
952 found = true;
955 return found;
958 /* Marks basic ivs. */
960 static void
961 mark_bivs (struct ivopts_data *data)
963 gimple phi;
964 tree var;
965 struct iv *iv, *incr_iv;
966 struct loop *loop = data->current_loop;
967 basic_block incr_bb;
968 gimple_stmt_iterator psi;
970 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
972 phi = gsi_stmt (psi);
974 iv = get_iv (data, PHI_RESULT (phi));
975 if (!iv)
976 continue;
978 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
979 incr_iv = get_iv (data, var);
980 if (!incr_iv)
981 continue;
983 /* If the increment is in the subloop, ignore it. */
984 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
985 if (incr_bb->loop_father != data->current_loop
986 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
987 continue;
989 iv->biv_p = true;
990 incr_iv->biv_p = true;
994 /* Checks whether STMT defines a linear induction variable and stores its
995 parameters to IV. */
997 static bool
998 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1000 tree lhs;
1001 struct loop *loop = data->current_loop;
1003 iv->base = NULL_TREE;
1004 iv->step = NULL_TREE;
1006 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1007 return false;
1009 lhs = gimple_assign_lhs (stmt);
1010 if (TREE_CODE (lhs) != SSA_NAME)
1011 return false;
1013 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1014 return false;
1015 iv->base = expand_simple_operations (iv->base);
1017 if (contains_abnormal_ssa_name_p (iv->base)
1018 || contains_abnormal_ssa_name_p (iv->step))
1019 return false;
1021 return true;
1024 /* Finds general ivs in statement STMT. */
1026 static void
1027 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1029 affine_iv iv;
1031 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1032 return;
1034 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1037 /* Finds general ivs in basic block BB. */
1039 static void
1040 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1042 gimple_stmt_iterator bsi;
1044 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1045 find_givs_in_stmt (data, gsi_stmt (bsi));
1048 /* Finds general ivs. */
1050 static void
1051 find_givs (struct ivopts_data *data)
1053 struct loop *loop = data->current_loop;
1054 basic_block *body = get_loop_body_in_dom_order (loop);
1055 unsigned i;
1057 for (i = 0; i < loop->num_nodes; i++)
1058 find_givs_in_bb (data, body[i]);
1059 free (body);
1062 /* For each ssa name defined in LOOP determines whether it is an induction
1063 variable and if so, its initial value and step. */
1065 static bool
1066 find_induction_variables (struct ivopts_data *data)
1068 unsigned i;
1069 bitmap_iterator bi;
1071 if (!find_bivs (data))
1072 return false;
1074 find_givs (data);
1075 mark_bivs (data);
1077 if (dump_file && (dump_flags & TDF_DETAILS))
1079 tree niter = niter_for_single_dom_exit (data);
1081 if (niter)
1083 fprintf (dump_file, " number of iterations ");
1084 print_generic_expr (dump_file, niter, TDF_SLIM);
1085 fprintf (dump_file, "\n\n");
1088 fprintf (dump_file, "Induction variables:\n\n");
1090 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1092 if (ver_info (data, i)->iv)
1093 dump_iv (dump_file, ver_info (data, i)->iv);
1097 return true;
1100 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1102 static struct iv_use *
1103 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1104 gimple stmt, enum use_type use_type)
1106 struct iv_use *use = XCNEW (struct iv_use);
1108 use->id = n_iv_uses (data);
1109 use->type = use_type;
1110 use->iv = iv;
1111 use->stmt = stmt;
1112 use->op_p = use_p;
1113 use->related_cands = BITMAP_ALLOC (NULL);
1115 /* To avoid showing ssa name in the dumps, if it was not reset by the
1116 caller. */
1117 iv->ssa_name = NULL_TREE;
1119 if (dump_file && (dump_flags & TDF_DETAILS))
1120 dump_use (dump_file, use);
1122 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1124 return use;
1127 /* Checks whether OP is a loop-level invariant and if so, records it.
1128 NONLINEAR_USE is true if the invariant is used in a way we do not
1129 handle specially. */
1131 static void
1132 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1134 basic_block bb;
1135 struct version_info *info;
1137 if (TREE_CODE (op) != SSA_NAME
1138 || !is_gimple_reg (op))
1139 return;
1141 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1142 if (bb
1143 && flow_bb_inside_loop_p (data->current_loop, bb))
1144 return;
1146 info = name_info (data, op);
1147 info->name = op;
1148 info->has_nonlin_use |= nonlinear_use;
1149 if (!info->inv_id)
1150 info->inv_id = ++data->max_inv_id;
1151 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1154 /* Checks whether the use OP is interesting and if so, records it. */
1156 static struct iv_use *
1157 find_interesting_uses_op (struct ivopts_data *data, tree op)
1159 struct iv *iv;
1160 struct iv *civ;
1161 gimple stmt;
1162 struct iv_use *use;
1164 if (TREE_CODE (op) != SSA_NAME)
1165 return NULL;
1167 iv = get_iv (data, op);
1168 if (!iv)
1169 return NULL;
1171 if (iv->have_use_for)
1173 use = iv_use (data, iv->use_id);
1175 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1176 return use;
1179 if (integer_zerop (iv->step))
1181 record_invariant (data, op, true);
1182 return NULL;
1184 iv->have_use_for = true;
1186 civ = XNEW (struct iv);
1187 *civ = *iv;
1189 stmt = SSA_NAME_DEF_STMT (op);
1190 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1191 || is_gimple_assign (stmt));
1193 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1194 iv->use_id = use->id;
1196 return use;
1199 /* Given a condition in statement STMT, checks whether it is a compare
1200 of an induction variable and an invariant. If this is the case,
1201 CONTROL_VAR is set to location of the iv, BOUND to the location of
1202 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1203 induction variable descriptions, and true is returned. If this is not
1204 the case, CONTROL_VAR and BOUND are set to the arguments of the
1205 condition and false is returned. */
1207 static bool
1208 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1209 tree **control_var, tree **bound,
1210 struct iv **iv_var, struct iv **iv_bound)
1212 /* The objects returned when COND has constant operands. */
1213 static struct iv const_iv;
1214 static tree zero;
1215 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1216 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1217 bool ret = false;
1219 if (gimple_code (stmt) == GIMPLE_COND)
1221 op0 = gimple_cond_lhs_ptr (stmt);
1222 op1 = gimple_cond_rhs_ptr (stmt);
1224 else
1226 op0 = gimple_assign_rhs1_ptr (stmt);
1227 op1 = gimple_assign_rhs2_ptr (stmt);
1230 zero = integer_zero_node;
1231 const_iv.step = integer_zero_node;
1233 if (TREE_CODE (*op0) == SSA_NAME)
1234 iv0 = get_iv (data, *op0);
1235 if (TREE_CODE (*op1) == SSA_NAME)
1236 iv1 = get_iv (data, *op1);
1238 /* Exactly one of the compared values must be an iv, and the other one must
1239 be an invariant. */
1240 if (!iv0 || !iv1)
1241 goto end;
1243 if (integer_zerop (iv0->step))
1245 /* Control variable may be on the other side. */
1246 tmp_op = op0; op0 = op1; op1 = tmp_op;
1247 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1249 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1251 end:
1252 if (control_var)
1253 *control_var = op0;;
1254 if (iv_var)
1255 *iv_var = iv0;;
1256 if (bound)
1257 *bound = op1;
1258 if (iv_bound)
1259 *iv_bound = iv1;
1261 return ret;
1264 /* Checks whether the condition in STMT is interesting and if so,
1265 records it. */
1267 static void
1268 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1270 tree *var_p, *bound_p;
1271 struct iv *var_iv, *civ;
1273 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1275 find_interesting_uses_op (data, *var_p);
1276 find_interesting_uses_op (data, *bound_p);
1277 return;
1280 civ = XNEW (struct iv);
1281 *civ = *var_iv;
1282 record_use (data, NULL, civ, stmt, USE_COMPARE);
1285 /* Returns true if expression EXPR is obviously invariant in LOOP,
1286 i.e. if all its operands are defined outside of the LOOP. LOOP
1287 should not be the function body. */
1289 bool
1290 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1292 basic_block def_bb;
1293 unsigned i, len;
1295 gcc_assert (loop_depth (loop) > 0);
1297 if (is_gimple_min_invariant (expr))
1298 return true;
1300 if (TREE_CODE (expr) == SSA_NAME)
1302 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1303 if (def_bb
1304 && flow_bb_inside_loop_p (loop, def_bb))
1305 return false;
1307 return true;
1310 if (!EXPR_P (expr))
1311 return false;
1313 len = TREE_OPERAND_LENGTH (expr);
1314 for (i = 0; i < len; i++)
1315 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1316 return false;
1318 return true;
1321 /* Returns true if statement STMT is obviously invariant in LOOP,
1322 i.e. if all its operands on the RHS are defined outside of the LOOP.
1323 LOOP should not be the function body. */
1325 bool
1326 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1328 unsigned i;
1329 tree lhs;
1331 gcc_assert (loop_depth (loop) > 0);
1333 lhs = gimple_get_lhs (stmt);
1334 for (i = 0; i < gimple_num_ops (stmt); i++)
1336 tree op = gimple_op (stmt, i);
1337 if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1338 return false;
1341 return true;
1344 /* Cumulates the steps of indices into DATA and replaces their values with the
1345 initial ones. Returns false when the value of the index cannot be determined.
1346 Callback for for_each_index. */
1348 struct ifs_ivopts_data
1350 struct ivopts_data *ivopts_data;
1351 gimple stmt;
1352 tree step;
1355 static bool
1356 idx_find_step (tree base, tree *idx, void *data)
1358 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1359 struct iv *iv;
1360 tree step, iv_base, iv_step, lbound, off;
1361 struct loop *loop = dta->ivopts_data->current_loop;
1363 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF)
1364 return false;
1366 /* If base is a component ref, require that the offset of the reference
1367 be invariant. */
1368 if (TREE_CODE (base) == COMPONENT_REF)
1370 off = component_ref_field_offset (base);
1371 return expr_invariant_in_loop_p (loop, off);
1374 /* If base is array, first check whether we will be able to move the
1375 reference out of the loop (in order to take its address in strength
1376 reduction). In order for this to work we need both lower bound
1377 and step to be loop invariants. */
1378 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1380 /* Moreover, for a range, the size needs to be invariant as well. */
1381 if (TREE_CODE (base) == ARRAY_RANGE_REF
1382 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1383 return false;
1385 step = array_ref_element_size (base);
1386 lbound = array_ref_low_bound (base);
1388 if (!expr_invariant_in_loop_p (loop, step)
1389 || !expr_invariant_in_loop_p (loop, lbound))
1390 return false;
1393 if (TREE_CODE (*idx) != SSA_NAME)
1394 return true;
1396 iv = get_iv (dta->ivopts_data, *idx);
1397 if (!iv)
1398 return false;
1400 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1401 *&x[0], which is not folded and does not trigger the
1402 ARRAY_REF path below. */
1403 *idx = iv->base;
1405 if (integer_zerop (iv->step))
1406 return true;
1408 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1410 step = array_ref_element_size (base);
1412 /* We only handle addresses whose step is an integer constant. */
1413 if (TREE_CODE (step) != INTEGER_CST)
1414 return false;
1416 else
1417 /* The step for pointer arithmetics already is 1 byte. */
1418 step = build_int_cst (sizetype, 1);
1420 iv_base = iv->base;
1421 iv_step = iv->step;
1422 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1423 sizetype, &iv_base, &iv_step, dta->stmt,
1424 false))
1426 /* The index might wrap. */
1427 return false;
1430 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1431 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1433 return true;
1436 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1437 object is passed to it in DATA. */
1439 static bool
1440 idx_record_use (tree base, tree *idx,
1441 void *vdata)
1443 struct ivopts_data *data = (struct ivopts_data *) vdata;
1444 find_interesting_uses_op (data, *idx);
1445 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1447 find_interesting_uses_op (data, array_ref_element_size (base));
1448 find_interesting_uses_op (data, array_ref_low_bound (base));
1450 return true;
1453 /* If we can prove that TOP = cst * BOT for some constant cst,
1454 store cst to MUL and return true. Otherwise return false.
1455 The returned value is always sign-extended, regardless of the
1456 signedness of TOP and BOT. */
1458 static bool
1459 constant_multiple_of (tree top, tree bot, double_int *mul)
1461 tree mby;
1462 enum tree_code code;
1463 double_int res, p0, p1;
1464 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1466 STRIP_NOPS (top);
1467 STRIP_NOPS (bot);
1469 if (operand_equal_p (top, bot, 0))
1471 *mul = double_int_one;
1472 return true;
1475 code = TREE_CODE (top);
1476 switch (code)
1478 case MULT_EXPR:
1479 mby = TREE_OPERAND (top, 1);
1480 if (TREE_CODE (mby) != INTEGER_CST)
1481 return false;
1483 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1484 return false;
1486 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1487 precision);
1488 return true;
1490 case PLUS_EXPR:
1491 case MINUS_EXPR:
1492 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1493 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1494 return false;
1496 if (code == MINUS_EXPR)
1497 p1 = double_int_neg (p1);
1498 *mul = double_int_sext (double_int_add (p0, p1), precision);
1499 return true;
1501 case INTEGER_CST:
1502 if (TREE_CODE (bot) != INTEGER_CST)
1503 return false;
1505 p0 = double_int_sext (tree_to_double_int (top), precision);
1506 p1 = double_int_sext (tree_to_double_int (bot), precision);
1507 if (double_int_zero_p (p1))
1508 return false;
1509 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1510 precision);
1511 return double_int_zero_p (res);
1513 default:
1514 return false;
1518 /* Returns true if memory reference REF with step STEP may be unaligned. */
1520 static bool
1521 may_be_unaligned_p (tree ref, tree step)
1523 tree base;
1524 tree base_type;
1525 HOST_WIDE_INT bitsize;
1526 HOST_WIDE_INT bitpos;
1527 tree toffset;
1528 enum machine_mode mode;
1529 int unsignedp, volatilep;
1530 unsigned base_align;
1532 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1533 thus they are not misaligned. */
1534 if (TREE_CODE (ref) == TARGET_MEM_REF)
1535 return false;
1537 /* The test below is basically copy of what expr.c:normal_inner_ref
1538 does to check whether the object must be loaded by parts when
1539 STRICT_ALIGNMENT is true. */
1540 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1541 &unsignedp, &volatilep, true);
1542 base_type = TREE_TYPE (base);
1543 base_align = TYPE_ALIGN (base_type);
1545 if (mode != BLKmode)
1547 unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1549 if (base_align < mode_align
1550 || (bitpos % mode_align) != 0
1551 || (bitpos % BITS_PER_UNIT) != 0)
1552 return true;
1554 if (toffset
1555 && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1556 return true;
1558 if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1559 return true;
1562 return false;
1565 /* Return true if EXPR may be non-addressable. */
1567 static bool
1568 may_be_nonaddressable_p (tree expr)
1570 switch (TREE_CODE (expr))
1572 case TARGET_MEM_REF:
1573 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1574 target, thus they are always addressable. */
1575 return false;
1577 case COMPONENT_REF:
1578 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1579 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1581 case VIEW_CONVERT_EXPR:
1582 /* This kind of view-conversions may wrap non-addressable objects
1583 and make them look addressable. After some processing the
1584 non-addressability may be uncovered again, causing ADDR_EXPRs
1585 of inappropriate objects to be built. */
1586 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1587 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1588 return true;
1590 /* ... fall through ... */
1592 case ARRAY_REF:
1593 case ARRAY_RANGE_REF:
1594 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1596 CASE_CONVERT:
1597 return true;
1599 default:
1600 break;
1603 return false;
1606 /* Finds addresses in *OP_P inside STMT. */
1608 static void
1609 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1611 tree base = *op_p, step = build_int_cst (sizetype, 0);
1612 struct iv *civ;
1613 struct ifs_ivopts_data ifs_ivopts_data;
1615 /* Do not play with volatile memory references. A bit too conservative,
1616 perhaps, but safe. */
1617 if (gimple_has_volatile_ops (stmt))
1618 goto fail;
1620 /* Ignore bitfields for now. Not really something terribly complicated
1621 to handle. TODO. */
1622 if (TREE_CODE (base) == BIT_FIELD_REF)
1623 goto fail;
1625 base = unshare_expr (base);
1627 if (TREE_CODE (base) == TARGET_MEM_REF)
1629 tree type = build_pointer_type (TREE_TYPE (base));
1630 tree astep;
1632 if (TMR_BASE (base)
1633 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1635 civ = get_iv (data, TMR_BASE (base));
1636 if (!civ)
1637 goto fail;
1639 TMR_BASE (base) = civ->base;
1640 step = civ->step;
1642 if (TMR_INDEX (base)
1643 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1645 civ = get_iv (data, TMR_INDEX (base));
1646 if (!civ)
1647 goto fail;
1649 TMR_INDEX (base) = civ->base;
1650 astep = civ->step;
1652 if (astep)
1654 if (TMR_STEP (base))
1655 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1657 step = fold_build2 (PLUS_EXPR, type, step, astep);
1661 if (integer_zerop (step))
1662 goto fail;
1663 base = tree_mem_ref_addr (type, base);
1665 else
1667 ifs_ivopts_data.ivopts_data = data;
1668 ifs_ivopts_data.stmt = stmt;
1669 ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1670 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1671 || integer_zerop (ifs_ivopts_data.step))
1672 goto fail;
1673 step = ifs_ivopts_data.step;
1675 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1677 /* Check that the base expression is addressable. This needs
1678 to be done after substituting bases of IVs into it. */
1679 if (may_be_nonaddressable_p (base))
1680 goto fail;
1682 /* Moreover, on strict alignment platforms, check that it is
1683 sufficiently aligned. */
1684 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1685 goto fail;
1687 base = build_fold_addr_expr (base);
1689 /* Substituting bases of IVs into the base expression might
1690 have caused folding opportunities. */
1691 if (TREE_CODE (base) == ADDR_EXPR)
1693 tree *ref = &TREE_OPERAND (base, 0);
1694 while (handled_component_p (*ref))
1695 ref = &TREE_OPERAND (*ref, 0);
1696 if (TREE_CODE (*ref) == MEM_REF)
1698 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1699 TREE_OPERAND (*ref, 0),
1700 TREE_OPERAND (*ref, 1));
1701 if (tem)
1702 *ref = tem;
1707 civ = alloc_iv (base, step);
1708 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1709 return;
1711 fail:
1712 for_each_index (op_p, idx_record_use, data);
1715 /* Finds and records invariants used in STMT. */
1717 static void
1718 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1720 ssa_op_iter iter;
1721 use_operand_p use_p;
1722 tree op;
1724 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1726 op = USE_FROM_PTR (use_p);
1727 record_invariant (data, op, false);
1731 /* Finds interesting uses of induction variables in the statement STMT. */
1733 static void
1734 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1736 struct iv *iv;
1737 tree op, *lhs, *rhs;
1738 ssa_op_iter iter;
1739 use_operand_p use_p;
1740 enum tree_code code;
1742 find_invariants_stmt (data, stmt);
1744 if (gimple_code (stmt) == GIMPLE_COND)
1746 find_interesting_uses_cond (data, stmt);
1747 return;
1750 if (is_gimple_assign (stmt))
1752 lhs = gimple_assign_lhs_ptr (stmt);
1753 rhs = gimple_assign_rhs1_ptr (stmt);
1755 if (TREE_CODE (*lhs) == SSA_NAME)
1757 /* If the statement defines an induction variable, the uses are not
1758 interesting by themselves. */
1760 iv = get_iv (data, *lhs);
1762 if (iv && !integer_zerop (iv->step))
1763 return;
1766 code = gimple_assign_rhs_code (stmt);
1767 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1768 && (REFERENCE_CLASS_P (*rhs)
1769 || is_gimple_val (*rhs)))
1771 if (REFERENCE_CLASS_P (*rhs))
1772 find_interesting_uses_address (data, stmt, rhs);
1773 else
1774 find_interesting_uses_op (data, *rhs);
1776 if (REFERENCE_CLASS_P (*lhs))
1777 find_interesting_uses_address (data, stmt, lhs);
1778 return;
1780 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1782 find_interesting_uses_cond (data, stmt);
1783 return;
1786 /* TODO -- we should also handle address uses of type
1788 memory = call (whatever);
1792 call (memory). */
1795 if (gimple_code (stmt) == GIMPLE_PHI
1796 && gimple_bb (stmt) == data->current_loop->header)
1798 iv = get_iv (data, PHI_RESULT (stmt));
1800 if (iv && !integer_zerop (iv->step))
1801 return;
1804 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1806 op = USE_FROM_PTR (use_p);
1808 if (TREE_CODE (op) != SSA_NAME)
1809 continue;
1811 iv = get_iv (data, op);
1812 if (!iv)
1813 continue;
1815 find_interesting_uses_op (data, op);
1819 /* Finds interesting uses of induction variables outside of loops
1820 on loop exit edge EXIT. */
1822 static void
1823 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1825 gimple phi;
1826 gimple_stmt_iterator psi;
1827 tree def;
1829 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1831 phi = gsi_stmt (psi);
1832 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1833 if (is_gimple_reg (def))
1834 find_interesting_uses_op (data, def);
1838 /* Finds uses of the induction variables that are interesting. */
1840 static void
1841 find_interesting_uses (struct ivopts_data *data)
1843 basic_block bb;
1844 gimple_stmt_iterator bsi;
1845 basic_block *body = get_loop_body (data->current_loop);
1846 unsigned i;
1847 struct version_info *info;
1848 edge e;
1850 if (dump_file && (dump_flags & TDF_DETAILS))
1851 fprintf (dump_file, "Uses:\n\n");
1853 for (i = 0; i < data->current_loop->num_nodes; i++)
1855 edge_iterator ei;
1856 bb = body[i];
1858 FOR_EACH_EDGE (e, ei, bb->succs)
1859 if (e->dest != EXIT_BLOCK_PTR
1860 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1861 find_interesting_uses_outside (data, e);
1863 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1864 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1865 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1866 if (!is_gimple_debug (gsi_stmt (bsi)))
1867 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1870 if (dump_file && (dump_flags & TDF_DETAILS))
1872 bitmap_iterator bi;
1874 fprintf (dump_file, "\n");
1876 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1878 info = ver_info (data, i);
1879 if (info->inv_id)
1881 fprintf (dump_file, " ");
1882 print_generic_expr (dump_file, info->name, TDF_SLIM);
1883 fprintf (dump_file, " is invariant (%d)%s\n",
1884 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1888 fprintf (dump_file, "\n");
1891 free (body);
1894 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1895 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1896 we are at the top-level of the processed address. */
1898 static tree
1899 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1900 unsigned HOST_WIDE_INT *offset)
1902 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1903 enum tree_code code;
1904 tree type, orig_type = TREE_TYPE (expr);
1905 unsigned HOST_WIDE_INT off0, off1, st;
1906 tree orig_expr = expr;
1908 STRIP_NOPS (expr);
1910 type = TREE_TYPE (expr);
1911 code = TREE_CODE (expr);
1912 *offset = 0;
1914 switch (code)
1916 case INTEGER_CST:
1917 if (!cst_and_fits_in_hwi (expr)
1918 || integer_zerop (expr))
1919 return orig_expr;
1921 *offset = int_cst_value (expr);
1922 return build_int_cst (orig_type, 0);
1924 case POINTER_PLUS_EXPR:
1925 case PLUS_EXPR:
1926 case MINUS_EXPR:
1927 op0 = TREE_OPERAND (expr, 0);
1928 op1 = TREE_OPERAND (expr, 1);
1930 op0 = strip_offset_1 (op0, false, false, &off0);
1931 op1 = strip_offset_1 (op1, false, false, &off1);
1933 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
1934 if (op0 == TREE_OPERAND (expr, 0)
1935 && op1 == TREE_OPERAND (expr, 1))
1936 return orig_expr;
1938 if (integer_zerop (op1))
1939 expr = op0;
1940 else if (integer_zerop (op0))
1942 if (code == MINUS_EXPR)
1943 expr = fold_build1 (NEGATE_EXPR, type, op1);
1944 else
1945 expr = op1;
1947 else
1948 expr = fold_build2 (code, type, op0, op1);
1950 return fold_convert (orig_type, expr);
1952 case MULT_EXPR:
1953 op1 = TREE_OPERAND (expr, 1);
1954 if (!cst_and_fits_in_hwi (op1))
1955 return orig_expr;
1957 op0 = TREE_OPERAND (expr, 0);
1958 op0 = strip_offset_1 (op0, false, false, &off0);
1959 if (op0 == TREE_OPERAND (expr, 0))
1960 return orig_expr;
1962 *offset = off0 * int_cst_value (op1);
1963 if (integer_zerop (op0))
1964 expr = op0;
1965 else
1966 expr = fold_build2 (MULT_EXPR, type, op0, op1);
1968 return fold_convert (orig_type, expr);
1970 case ARRAY_REF:
1971 case ARRAY_RANGE_REF:
1972 if (!inside_addr)
1973 return orig_expr;
1975 step = array_ref_element_size (expr);
1976 if (!cst_and_fits_in_hwi (step))
1977 break;
1979 st = int_cst_value (step);
1980 op1 = TREE_OPERAND (expr, 1);
1981 op1 = strip_offset_1 (op1, false, false, &off1);
1982 *offset = off1 * st;
1984 if (top_compref
1985 && integer_zerop (op1))
1987 /* Strip the component reference completely. */
1988 op0 = TREE_OPERAND (expr, 0);
1989 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1990 *offset += off0;
1991 return op0;
1993 break;
1995 case COMPONENT_REF:
1996 if (!inside_addr)
1997 return orig_expr;
1999 tmp = component_ref_field_offset (expr);
2000 if (top_compref
2001 && cst_and_fits_in_hwi (tmp))
2003 /* Strip the component reference completely. */
2004 op0 = TREE_OPERAND (expr, 0);
2005 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2006 *offset = off0 + int_cst_value (tmp);
2007 return op0;
2009 break;
2011 case ADDR_EXPR:
2012 op0 = TREE_OPERAND (expr, 0);
2013 op0 = strip_offset_1 (op0, true, true, &off0);
2014 *offset += off0;
2016 if (op0 == TREE_OPERAND (expr, 0))
2017 return orig_expr;
2019 expr = build_fold_addr_expr (op0);
2020 return fold_convert (orig_type, expr);
2022 case MEM_REF:
2023 /* ??? Offset operand? */
2024 inside_addr = false;
2025 break;
2027 default:
2028 return orig_expr;
2031 /* Default handling of expressions for that we want to recurse into
2032 the first operand. */
2033 op0 = TREE_OPERAND (expr, 0);
2034 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2035 *offset += off0;
2037 if (op0 == TREE_OPERAND (expr, 0)
2038 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2039 return orig_expr;
2041 expr = copy_node (expr);
2042 TREE_OPERAND (expr, 0) = op0;
2043 if (op1)
2044 TREE_OPERAND (expr, 1) = op1;
2046 /* Inside address, we might strip the top level component references,
2047 thus changing type of the expression. Handling of ADDR_EXPR
2048 will fix that. */
2049 expr = fold_convert (orig_type, expr);
2051 return expr;
2054 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2056 static tree
2057 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2059 return strip_offset_1 (expr, false, false, offset);
2062 /* Returns variant of TYPE that can be used as base for different uses.
2063 We return unsigned type with the same precision, which avoids problems
2064 with overflows. */
2066 static tree
2067 generic_type_for (tree type)
2069 if (POINTER_TYPE_P (type))
2070 return unsigned_type_for (type);
2072 if (TYPE_UNSIGNED (type))
2073 return type;
2075 return unsigned_type_for (type);
2078 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2079 the bitmap to that we should store it. */
2081 static struct ivopts_data *fd_ivopts_data;
2082 static tree
2083 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2085 bitmap *depends_on = (bitmap *) data;
2086 struct version_info *info;
2088 if (TREE_CODE (*expr_p) != SSA_NAME)
2089 return NULL_TREE;
2090 info = name_info (fd_ivopts_data, *expr_p);
2092 if (!info->inv_id || info->has_nonlin_use)
2093 return NULL_TREE;
2095 if (!*depends_on)
2096 *depends_on = BITMAP_ALLOC (NULL);
2097 bitmap_set_bit (*depends_on, info->inv_id);
2099 return NULL_TREE;
2102 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2103 position to POS. If USE is not NULL, the candidate is set as related to
2104 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2105 replacement of the final value of the iv by a direct computation. */
2107 static struct iv_cand *
2108 add_candidate_1 (struct ivopts_data *data,
2109 tree base, tree step, bool important, enum iv_position pos,
2110 struct iv_use *use, gimple incremented_at)
2112 unsigned i;
2113 struct iv_cand *cand = NULL;
2114 tree type, orig_type;
2116 if (base)
2118 orig_type = TREE_TYPE (base);
2119 type = generic_type_for (orig_type);
2120 if (type != orig_type)
2122 base = fold_convert (type, base);
2123 step = fold_convert (type, step);
2127 for (i = 0; i < n_iv_cands (data); i++)
2129 cand = iv_cand (data, i);
2131 if (cand->pos != pos)
2132 continue;
2134 if (cand->incremented_at != incremented_at
2135 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2136 && cand->ainc_use != use))
2137 continue;
2139 if (!cand->iv)
2141 if (!base && !step)
2142 break;
2144 continue;
2147 if (!base && !step)
2148 continue;
2150 if (operand_equal_p (base, cand->iv->base, 0)
2151 && operand_equal_p (step, cand->iv->step, 0))
2152 break;
2155 if (i == n_iv_cands (data))
2157 cand = XCNEW (struct iv_cand);
2158 cand->id = i;
2160 if (!base && !step)
2161 cand->iv = NULL;
2162 else
2163 cand->iv = alloc_iv (base, step);
2165 cand->pos = pos;
2166 if (pos != IP_ORIGINAL && cand->iv)
2168 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2169 cand->var_after = cand->var_before;
2171 cand->important = important;
2172 cand->incremented_at = incremented_at;
2173 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2175 if (step
2176 && TREE_CODE (step) != INTEGER_CST)
2178 fd_ivopts_data = data;
2179 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2182 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2183 cand->ainc_use = use;
2184 else
2185 cand->ainc_use = NULL;
2187 if (dump_file && (dump_flags & TDF_DETAILS))
2188 dump_cand (dump_file, cand);
2191 if (important && !cand->important)
2193 cand->important = true;
2194 if (dump_file && (dump_flags & TDF_DETAILS))
2195 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2198 if (use)
2200 bitmap_set_bit (use->related_cands, i);
2201 if (dump_file && (dump_flags & TDF_DETAILS))
2202 fprintf (dump_file, "Candidate %d is related to use %d\n",
2203 cand->id, use->id);
2206 return cand;
2209 /* Returns true if incrementing the induction variable at the end of the LOOP
2210 is allowed.
2212 The purpose is to avoid splitting latch edge with a biv increment, thus
2213 creating a jump, possibly confusing other optimization passes and leaving
2214 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2215 is not available (so we do not have a better alternative), or if the latch
2216 edge is already nonempty. */
2218 static bool
2219 allow_ip_end_pos_p (struct loop *loop)
2221 if (!ip_normal_pos (loop))
2222 return true;
2224 if (!empty_block_p (ip_end_pos (loop)))
2225 return true;
2227 return false;
2230 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2231 Important field is set to IMPORTANT. */
2233 static void
2234 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2235 bool important, struct iv_use *use)
2237 basic_block use_bb = gimple_bb (use->stmt);
2238 enum machine_mode mem_mode;
2239 unsigned HOST_WIDE_INT cstepi;
2241 /* If we insert the increment in any position other than the standard
2242 ones, we must ensure that it is incremented once per iteration.
2243 It must not be in an inner nested loop, or one side of an if
2244 statement. */
2245 if (use_bb->loop_father != data->current_loop
2246 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2247 || stmt_could_throw_p (use->stmt)
2248 || !cst_and_fits_in_hwi (step))
2249 return;
2251 cstepi = int_cst_value (step);
2253 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2254 if ((HAVE_PRE_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2255 || (HAVE_PRE_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2257 enum tree_code code = MINUS_EXPR;
2258 tree new_base;
2259 tree new_step = step;
2261 if (POINTER_TYPE_P (TREE_TYPE (base)))
2263 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2264 code = POINTER_PLUS_EXPR;
2266 else
2267 new_step = fold_convert (TREE_TYPE (base), new_step);
2268 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2269 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2270 use->stmt);
2272 if ((HAVE_POST_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2273 || (HAVE_POST_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2275 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2276 use->stmt);
2280 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2281 position to POS. If USE is not NULL, the candidate is set as related to
2282 it. The candidate computation is scheduled on all available positions. */
2284 static void
2285 add_candidate (struct ivopts_data *data,
2286 tree base, tree step, bool important, struct iv_use *use)
2288 if (ip_normal_pos (data->current_loop))
2289 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2290 if (ip_end_pos (data->current_loop)
2291 && allow_ip_end_pos_p (data->current_loop))
2292 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2294 if (use != NULL && use->type == USE_ADDRESS)
2295 add_autoinc_candidates (data, base, step, important, use);
2298 /* Add a standard "0 + 1 * iteration" iv candidate for a
2299 type with SIZE bits. */
2301 static void
2302 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2303 unsigned int size)
2305 tree type = lang_hooks.types.type_for_size (size, true);
2306 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2307 true, NULL);
2310 /* Adds standard iv candidates. */
2312 static void
2313 add_standard_iv_candidates (struct ivopts_data *data)
2315 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2317 /* The same for a double-integer type if it is still fast enough. */
2318 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2319 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2323 /* Adds candidates bases on the old induction variable IV. */
2325 static void
2326 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2328 gimple phi;
2329 tree def;
2330 struct iv_cand *cand;
2332 add_candidate (data, iv->base, iv->step, true, NULL);
2334 /* The same, but with initial value zero. */
2335 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2336 add_candidate (data, size_int (0), iv->step, true, NULL);
2337 else
2338 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2339 iv->step, true, NULL);
2341 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2342 if (gimple_code (phi) == GIMPLE_PHI)
2344 /* Additionally record the possibility of leaving the original iv
2345 untouched. */
2346 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2347 cand = add_candidate_1 (data,
2348 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2349 SSA_NAME_DEF_STMT (def));
2350 cand->var_before = iv->ssa_name;
2351 cand->var_after = def;
2355 /* Adds candidates based on the old induction variables. */
2357 static void
2358 add_old_ivs_candidates (struct ivopts_data *data)
2360 unsigned i;
2361 struct iv *iv;
2362 bitmap_iterator bi;
2364 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2366 iv = ver_info (data, i)->iv;
2367 if (iv && iv->biv_p && !integer_zerop (iv->step))
2368 add_old_iv_candidates (data, iv);
2372 /* Adds candidates based on the value of the induction variable IV and USE. */
2374 static void
2375 add_iv_value_candidates (struct ivopts_data *data,
2376 struct iv *iv, struct iv_use *use)
2378 unsigned HOST_WIDE_INT offset;
2379 tree base;
2380 tree basetype;
2382 add_candidate (data, iv->base, iv->step, false, use);
2384 /* The same, but with initial value zero. Make such variable important,
2385 since it is generic enough so that possibly many uses may be based
2386 on it. */
2387 basetype = TREE_TYPE (iv->base);
2388 if (POINTER_TYPE_P (basetype))
2389 basetype = sizetype;
2390 add_candidate (data, build_int_cst (basetype, 0),
2391 iv->step, true, use);
2393 /* Third, try removing the constant offset. Make sure to even
2394 add a candidate for &a[0] vs. (T *)&a. */
2395 base = strip_offset (iv->base, &offset);
2396 if (offset
2397 || base != iv->base)
2398 add_candidate (data, base, iv->step, false, use);
2401 /* Adds candidates based on the uses. */
2403 static void
2404 add_derived_ivs_candidates (struct ivopts_data *data)
2406 unsigned i;
2408 for (i = 0; i < n_iv_uses (data); i++)
2410 struct iv_use *use = iv_use (data, i);
2412 if (!use)
2413 continue;
2415 switch (use->type)
2417 case USE_NONLINEAR_EXPR:
2418 case USE_COMPARE:
2419 case USE_ADDRESS:
2420 /* Just add the ivs based on the value of the iv used here. */
2421 add_iv_value_candidates (data, use->iv, use);
2422 break;
2424 default:
2425 gcc_unreachable ();
2430 /* Record important candidates and add them to related_cands bitmaps
2431 if needed. */
2433 static void
2434 record_important_candidates (struct ivopts_data *data)
2436 unsigned i;
2437 struct iv_use *use;
2439 for (i = 0; i < n_iv_cands (data); i++)
2441 struct iv_cand *cand = iv_cand (data, i);
2443 if (cand->important)
2444 bitmap_set_bit (data->important_candidates, i);
2447 data->consider_all_candidates = (n_iv_cands (data)
2448 <= CONSIDER_ALL_CANDIDATES_BOUND);
2450 if (data->consider_all_candidates)
2452 /* We will not need "related_cands" bitmaps in this case,
2453 so release them to decrease peak memory consumption. */
2454 for (i = 0; i < n_iv_uses (data); i++)
2456 use = iv_use (data, i);
2457 BITMAP_FREE (use->related_cands);
2460 else
2462 /* Add important candidates to the related_cands bitmaps. */
2463 for (i = 0; i < n_iv_uses (data); i++)
2464 bitmap_ior_into (iv_use (data, i)->related_cands,
2465 data->important_candidates);
2469 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2470 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2471 we allocate a simple list to every use. */
2473 static void
2474 alloc_use_cost_map (struct ivopts_data *data)
2476 unsigned i, size, s, j;
2478 for (i = 0; i < n_iv_uses (data); i++)
2480 struct iv_use *use = iv_use (data, i);
2481 bitmap_iterator bi;
2483 if (data->consider_all_candidates)
2484 size = n_iv_cands (data);
2485 else
2487 s = 0;
2488 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2490 s++;
2493 /* Round up to the power of two, so that moduling by it is fast. */
2494 for (size = 1; size < s; size <<= 1)
2495 continue;
2498 use->n_map_members = size;
2499 use->cost_map = XCNEWVEC (struct cost_pair, size);
2503 /* Returns description of computation cost of expression whose runtime
2504 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2506 static comp_cost
2507 new_cost (unsigned runtime, unsigned complexity)
2509 comp_cost cost;
2511 cost.cost = runtime;
2512 cost.complexity = complexity;
2514 return cost;
2517 /* Adds costs COST1 and COST2. */
2519 static comp_cost
2520 add_costs (comp_cost cost1, comp_cost cost2)
2522 cost1.cost += cost2.cost;
2523 cost1.complexity += cost2.complexity;
2525 return cost1;
2527 /* Subtracts costs COST1 and COST2. */
2529 static comp_cost
2530 sub_costs (comp_cost cost1, comp_cost cost2)
2532 cost1.cost -= cost2.cost;
2533 cost1.complexity -= cost2.complexity;
2535 return cost1;
2538 /* Returns a negative number if COST1 < COST2, a positive number if
2539 COST1 > COST2, and 0 if COST1 = COST2. */
2541 static int
2542 compare_costs (comp_cost cost1, comp_cost cost2)
2544 if (cost1.cost == cost2.cost)
2545 return cost1.complexity - cost2.complexity;
2547 return cost1.cost - cost2.cost;
2550 /* Returns true if COST is infinite. */
2552 static bool
2553 infinite_cost_p (comp_cost cost)
2555 return cost.cost == INFTY;
2558 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2559 on invariants DEPENDS_ON and that the value used in expressing it
2560 is VALUE. */
2562 static void
2563 set_use_iv_cost (struct ivopts_data *data,
2564 struct iv_use *use, struct iv_cand *cand,
2565 comp_cost cost, bitmap depends_on, tree value)
2567 unsigned i, s;
2569 if (infinite_cost_p (cost))
2571 BITMAP_FREE (depends_on);
2572 return;
2575 if (data->consider_all_candidates)
2577 use->cost_map[cand->id].cand = cand;
2578 use->cost_map[cand->id].cost = cost;
2579 use->cost_map[cand->id].depends_on = depends_on;
2580 use->cost_map[cand->id].value = value;
2581 return;
2584 /* n_map_members is a power of two, so this computes modulo. */
2585 s = cand->id & (use->n_map_members - 1);
2586 for (i = s; i < use->n_map_members; i++)
2587 if (!use->cost_map[i].cand)
2588 goto found;
2589 for (i = 0; i < s; i++)
2590 if (!use->cost_map[i].cand)
2591 goto found;
2593 gcc_unreachable ();
2595 found:
2596 use->cost_map[i].cand = cand;
2597 use->cost_map[i].cost = cost;
2598 use->cost_map[i].depends_on = depends_on;
2599 use->cost_map[i].value = value;
2602 /* Gets cost of (USE, CANDIDATE) pair. */
2604 static struct cost_pair *
2605 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2606 struct iv_cand *cand)
2608 unsigned i, s;
2609 struct cost_pair *ret;
2611 if (!cand)
2612 return NULL;
2614 if (data->consider_all_candidates)
2616 ret = use->cost_map + cand->id;
2617 if (!ret->cand)
2618 return NULL;
2620 return ret;
2623 /* n_map_members is a power of two, so this computes modulo. */
2624 s = cand->id & (use->n_map_members - 1);
2625 for (i = s; i < use->n_map_members; i++)
2626 if (use->cost_map[i].cand == cand)
2627 return use->cost_map + i;
2629 for (i = 0; i < s; i++)
2630 if (use->cost_map[i].cand == cand)
2631 return use->cost_map + i;
2633 return NULL;
2636 /* Returns estimate on cost of computing SEQ. */
2638 static unsigned
2639 seq_cost (rtx seq, bool speed)
2641 unsigned cost = 0;
2642 rtx set;
2644 for (; seq; seq = NEXT_INSN (seq))
2646 set = single_set (seq);
2647 if (set)
2648 cost += rtx_cost (set, SET,speed);
2649 else
2650 cost++;
2653 return cost;
2656 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2657 static rtx
2658 produce_memory_decl_rtl (tree obj, int *regno)
2660 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2661 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2662 rtx x;
2664 gcc_assert (obj);
2665 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2667 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2668 x = gen_rtx_SYMBOL_REF (address_mode, name);
2669 SET_SYMBOL_REF_DECL (x, obj);
2670 x = gen_rtx_MEM (DECL_MODE (obj), x);
2671 set_mem_addr_space (x, as);
2672 targetm.encode_section_info (obj, x, true);
2674 else
2676 x = gen_raw_REG (address_mode, (*regno)++);
2677 x = gen_rtx_MEM (DECL_MODE (obj), x);
2678 set_mem_addr_space (x, as);
2681 return x;
2684 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2685 walk_tree. DATA contains the actual fake register number. */
2687 static tree
2688 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2690 tree obj = NULL_TREE;
2691 rtx x = NULL_RTX;
2692 int *regno = (int *) data;
2694 switch (TREE_CODE (*expr_p))
2696 case ADDR_EXPR:
2697 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2698 handled_component_p (*expr_p);
2699 expr_p = &TREE_OPERAND (*expr_p, 0))
2700 continue;
2701 obj = *expr_p;
2702 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2703 x = produce_memory_decl_rtl (obj, regno);
2704 break;
2706 case SSA_NAME:
2707 *ws = 0;
2708 obj = SSA_NAME_VAR (*expr_p);
2709 if (!DECL_RTL_SET_P (obj))
2710 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2711 break;
2713 case VAR_DECL:
2714 case PARM_DECL:
2715 case RESULT_DECL:
2716 *ws = 0;
2717 obj = *expr_p;
2719 if (DECL_RTL_SET_P (obj))
2720 break;
2722 if (DECL_MODE (obj) == BLKmode)
2723 x = produce_memory_decl_rtl (obj, regno);
2724 else
2725 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2727 break;
2729 default:
2730 break;
2733 if (x)
2735 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2736 SET_DECL_RTL (obj, x);
2739 return NULL_TREE;
2742 /* Determines cost of the computation of EXPR. */
2744 static unsigned
2745 computation_cost (tree expr, bool speed)
2747 rtx seq, rslt;
2748 tree type = TREE_TYPE (expr);
2749 unsigned cost;
2750 /* Avoid using hard regs in ways which may be unsupported. */
2751 int regno = LAST_VIRTUAL_REGISTER + 1;
2752 struct cgraph_node *node = cgraph_node (current_function_decl);
2753 enum node_frequency real_frequency = node->frequency;
2755 node->frequency = NODE_FREQUENCY_NORMAL;
2756 crtl->maybe_hot_insn_p = speed;
2757 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2758 start_sequence ();
2759 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2760 seq = get_insns ();
2761 end_sequence ();
2762 default_rtl_profile ();
2763 node->frequency = real_frequency;
2765 cost = seq_cost (seq, speed);
2766 if (MEM_P (rslt))
2767 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2768 TYPE_ADDR_SPACE (type), speed);
2770 return cost;
2773 /* Returns variable containing the value of candidate CAND at statement AT. */
2775 static tree
2776 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2778 if (stmt_after_increment (loop, cand, stmt))
2779 return cand->var_after;
2780 else
2781 return cand->var_before;
2784 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2785 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2788 tree_int_cst_sign_bit (const_tree t)
2790 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2791 unsigned HOST_WIDE_INT w;
2793 if (bitno < HOST_BITS_PER_WIDE_INT)
2794 w = TREE_INT_CST_LOW (t);
2795 else
2797 w = TREE_INT_CST_HIGH (t);
2798 bitno -= HOST_BITS_PER_WIDE_INT;
2801 return (w >> bitno) & 1;
2804 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2805 same precision that is at least as wide as the precision of TYPE, stores
2806 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2807 type of A and B. */
2809 static tree
2810 determine_common_wider_type (tree *a, tree *b)
2812 tree wider_type = NULL;
2813 tree suba, subb;
2814 tree atype = TREE_TYPE (*a);
2816 if (CONVERT_EXPR_P (*a))
2818 suba = TREE_OPERAND (*a, 0);
2819 wider_type = TREE_TYPE (suba);
2820 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2821 return atype;
2823 else
2824 return atype;
2826 if (CONVERT_EXPR_P (*b))
2828 subb = TREE_OPERAND (*b, 0);
2829 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2830 return atype;
2832 else
2833 return atype;
2835 *a = suba;
2836 *b = subb;
2837 return wider_type;
2840 /* Determines the expression by that USE is expressed from induction variable
2841 CAND at statement AT in LOOP. The expression is stored in a decomposed
2842 form into AFF. Returns false if USE cannot be expressed using CAND. */
2844 static bool
2845 get_computation_aff (struct loop *loop,
2846 struct iv_use *use, struct iv_cand *cand, gimple at,
2847 struct affine_tree_combination *aff)
2849 tree ubase = use->iv->base;
2850 tree ustep = use->iv->step;
2851 tree cbase = cand->iv->base;
2852 tree cstep = cand->iv->step, cstep_common;
2853 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2854 tree common_type, var;
2855 tree uutype;
2856 aff_tree cbase_aff, var_aff;
2857 double_int rat;
2859 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2861 /* We do not have a precision to express the values of use. */
2862 return false;
2865 var = var_at_stmt (loop, cand, at);
2866 uutype = unsigned_type_for (utype);
2868 /* If the conversion is not noop, perform it. */
2869 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2871 cstep = fold_convert (uutype, cstep);
2872 cbase = fold_convert (uutype, cbase);
2873 var = fold_convert (uutype, var);
2876 if (!constant_multiple_of (ustep, cstep, &rat))
2877 return false;
2879 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2880 type, we achieve better folding by computing their difference in this
2881 wider type, and cast the result to UUTYPE. We do not need to worry about
2882 overflows, as all the arithmetics will in the end be performed in UUTYPE
2883 anyway. */
2884 common_type = determine_common_wider_type (&ubase, &cbase);
2886 /* use = ubase - ratio * cbase + ratio * var. */
2887 tree_to_aff_combination (ubase, common_type, aff);
2888 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2889 tree_to_aff_combination (var, uutype, &var_aff);
2891 /* We need to shift the value if we are after the increment. */
2892 if (stmt_after_increment (loop, cand, at))
2894 aff_tree cstep_aff;
2896 if (common_type != uutype)
2897 cstep_common = fold_convert (common_type, cstep);
2898 else
2899 cstep_common = cstep;
2901 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2902 aff_combination_add (&cbase_aff, &cstep_aff);
2905 aff_combination_scale (&cbase_aff, double_int_neg (rat));
2906 aff_combination_add (aff, &cbase_aff);
2907 if (common_type != uutype)
2908 aff_combination_convert (aff, uutype);
2910 aff_combination_scale (&var_aff, rat);
2911 aff_combination_add (aff, &var_aff);
2913 return true;
2916 /* Determines the expression by that USE is expressed from induction variable
2917 CAND at statement AT in LOOP. The computation is unshared. */
2919 static tree
2920 get_computation_at (struct loop *loop,
2921 struct iv_use *use, struct iv_cand *cand, gimple at)
2923 aff_tree aff;
2924 tree type = TREE_TYPE (use->iv->base);
2926 if (!get_computation_aff (loop, use, cand, at, &aff))
2927 return NULL_TREE;
2928 unshare_aff_combination (&aff);
2929 return fold_convert (type, aff_combination_to_tree (&aff));
2932 /* Determines the expression by that USE is expressed from induction variable
2933 CAND in LOOP. The computation is unshared. */
2935 static tree
2936 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2938 return get_computation_at (loop, use, cand, use->stmt);
2941 /* Adjust the cost COST for being in loop setup rather than loop body.
2942 If we're optimizing for space, the loop setup overhead is constant;
2943 if we're optimizing for speed, amortize it over the per-iteration cost. */
2944 static unsigned
2945 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
2947 if (cost == INFTY)
2948 return cost;
2949 else if (optimize_loop_for_speed_p (data->current_loop))
2950 return cost / AVG_LOOP_NITER (data->current_loop);
2951 else
2952 return cost;
2955 /* Returns cost of addition in MODE. */
2957 static unsigned
2958 add_cost (enum machine_mode mode, bool speed)
2960 static unsigned costs[NUM_MACHINE_MODES];
2961 rtx seq;
2962 unsigned cost;
2964 if (costs[mode])
2965 return costs[mode];
2967 start_sequence ();
2968 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2969 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2970 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2971 NULL_RTX);
2972 seq = get_insns ();
2973 end_sequence ();
2975 cost = seq_cost (seq, speed);
2976 if (!cost)
2977 cost = 1;
2979 costs[mode] = cost;
2981 if (dump_file && (dump_flags & TDF_DETAILS))
2982 fprintf (dump_file, "Addition in %s costs %d\n",
2983 GET_MODE_NAME (mode), cost);
2984 return cost;
2987 /* Entry in a hashtable of already known costs for multiplication. */
2988 struct mbc_entry
2990 HOST_WIDE_INT cst; /* The constant to multiply by. */
2991 enum machine_mode mode; /* In mode. */
2992 unsigned cost; /* The cost. */
2995 /* Counts hash value for the ENTRY. */
2997 static hashval_t
2998 mbc_entry_hash (const void *entry)
3000 const struct mbc_entry *e = (const struct mbc_entry *) entry;
3002 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3005 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3007 static int
3008 mbc_entry_eq (const void *entry1, const void *entry2)
3010 const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
3011 const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
3013 return (e1->mode == e2->mode
3014 && e1->cst == e2->cst);
3017 /* Returns cost of multiplication by constant CST in MODE. */
3019 unsigned
3020 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
3022 static htab_t costs;
3023 struct mbc_entry **cached, act;
3024 rtx seq;
3025 unsigned cost;
3027 if (!costs)
3028 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3030 act.mode = mode;
3031 act.cst = cst;
3032 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3033 if (*cached)
3034 return (*cached)->cost;
3036 *cached = XNEW (struct mbc_entry);
3037 (*cached)->mode = mode;
3038 (*cached)->cst = cst;
3040 start_sequence ();
3041 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3042 gen_int_mode (cst, mode), NULL_RTX, 0);
3043 seq = get_insns ();
3044 end_sequence ();
3046 cost = seq_cost (seq, speed);
3048 if (dump_file && (dump_flags & TDF_DETAILS))
3049 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3050 (int) cst, GET_MODE_NAME (mode), cost);
3052 (*cached)->cost = cost;
3054 return cost;
3057 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3058 validity for a memory reference accessing memory of mode MODE in
3059 address space AS. */
3061 DEF_VEC_P (sbitmap);
3062 DEF_VEC_ALLOC_P (sbitmap, heap);
3064 bool
3065 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3066 addr_space_t as)
3068 #define MAX_RATIO 128
3069 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3070 static VEC (sbitmap, heap) *valid_mult_list;
3071 sbitmap valid_mult;
3073 if (data_index >= VEC_length (sbitmap, valid_mult_list))
3074 VEC_safe_grow_cleared (sbitmap, heap, valid_mult_list, data_index + 1);
3076 valid_mult = VEC_index (sbitmap, valid_mult_list, data_index);
3077 if (!valid_mult)
3079 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3080 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3081 rtx addr;
3082 HOST_WIDE_INT i;
3084 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3085 sbitmap_zero (valid_mult);
3086 addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3087 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3089 XEXP (addr, 1) = gen_int_mode (i, address_mode);
3090 if (memory_address_addr_space_p (mode, addr, as))
3091 SET_BIT (valid_mult, i + MAX_RATIO);
3094 if (dump_file && (dump_flags & TDF_DETAILS))
3096 fprintf (dump_file, " allowed multipliers:");
3097 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3098 if (TEST_BIT (valid_mult, i + MAX_RATIO))
3099 fprintf (dump_file, " %d", (int) i);
3100 fprintf (dump_file, "\n");
3101 fprintf (dump_file, "\n");
3104 VEC_replace (sbitmap, valid_mult_list, data_index, valid_mult);
3107 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3108 return false;
3110 return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3113 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3114 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3115 variable is omitted. Compute the cost for a memory reference that accesses
3116 a memory location of mode MEM_MODE in address space AS.
3118 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3119 size of MEM_MODE / RATIO) is available. To make this determination, we
3120 look at the size of the increment to be made, which is given in CSTEP.
3121 CSTEP may be zero if the step is unknown.
3122 STMT_AFTER_INC is true iff the statement we're looking at is after the
3123 increment of the original biv.
3125 TODO -- there must be some better way. This all is quite crude. */
3127 typedef struct
3129 HOST_WIDE_INT min_offset, max_offset;
3130 unsigned costs[2][2][2][2];
3131 } *address_cost_data;
3133 DEF_VEC_P (address_cost_data);
3134 DEF_VEC_ALLOC_P (address_cost_data, heap);
3136 static comp_cost
3137 get_address_cost (bool symbol_present, bool var_present,
3138 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3139 HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3140 addr_space_t as, bool speed,
3141 bool stmt_after_inc, bool *may_autoinc)
3143 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3144 static VEC(address_cost_data, heap) *address_cost_data_list;
3145 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3146 address_cost_data data;
3147 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3148 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3149 unsigned cost, acost, complexity;
3150 bool offset_p, ratio_p, autoinc;
3151 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3152 unsigned HOST_WIDE_INT mask;
3153 unsigned bits;
3155 if (data_index >= VEC_length (address_cost_data, address_cost_data_list))
3156 VEC_safe_grow_cleared (address_cost_data, heap, address_cost_data_list,
3157 data_index + 1);
3159 data = VEC_index (address_cost_data, address_cost_data_list, data_index);
3160 if (!data)
3162 HOST_WIDE_INT i;
3163 HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
3164 HOST_WIDE_INT rat, off;
3165 int old_cse_not_expected;
3166 unsigned sym_p, var_p, off_p, rat_p, add_c;
3167 rtx seq, addr, base;
3168 rtx reg0, reg1;
3170 data = (address_cost_data) xcalloc (1, sizeof (*data));
3172 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3174 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3175 for (i = start; i <= 1 << 20; i <<= 1)
3177 XEXP (addr, 1) = gen_int_mode (i, address_mode);
3178 if (!memory_address_addr_space_p (mem_mode, addr, as))
3179 break;
3181 data->max_offset = i == start ? 0 : i >> 1;
3182 off = data->max_offset;
3184 for (i = start; i <= 1 << 20; i <<= 1)
3186 XEXP (addr, 1) = gen_int_mode (-i, address_mode);
3187 if (!memory_address_addr_space_p (mem_mode, addr, as))
3188 break;
3190 data->min_offset = i == start ? 0 : -(i >> 1);
3192 if (dump_file && (dump_flags & TDF_DETAILS))
3194 fprintf (dump_file, "get_address_cost:\n");
3195 fprintf (dump_file, " min offset %s %d\n",
3196 GET_MODE_NAME (mem_mode),
3197 (int) data->min_offset);
3198 fprintf (dump_file, " max offset %s %d\n",
3199 GET_MODE_NAME (mem_mode),
3200 (int) data->max_offset);
3203 rat = 1;
3204 for (i = 2; i <= MAX_RATIO; i++)
3205 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3207 rat = i;
3208 break;
3211 /* Compute the cost of various addressing modes. */
3212 acost = 0;
3213 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3214 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3216 if (HAVE_PRE_DECREMENT)
3218 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3219 has_predec[mem_mode]
3220 = memory_address_addr_space_p (mem_mode, addr, as);
3222 if (HAVE_POST_DECREMENT)
3224 addr = gen_rtx_POST_DEC (address_mode, reg0);
3225 has_postdec[mem_mode]
3226 = memory_address_addr_space_p (mem_mode, addr, as);
3228 if (HAVE_PRE_INCREMENT)
3230 addr = gen_rtx_PRE_INC (address_mode, reg0);
3231 has_preinc[mem_mode]
3232 = memory_address_addr_space_p (mem_mode, addr, as);
3234 if (HAVE_POST_INCREMENT)
3236 addr = gen_rtx_POST_INC (address_mode, reg0);
3237 has_postinc[mem_mode]
3238 = memory_address_addr_space_p (mem_mode, addr, as);
3240 for (i = 0; i < 16; i++)
3242 sym_p = i & 1;
3243 var_p = (i >> 1) & 1;
3244 off_p = (i >> 2) & 1;
3245 rat_p = (i >> 3) & 1;
3247 addr = reg0;
3248 if (rat_p)
3249 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3250 gen_int_mode (rat, address_mode));
3252 if (var_p)
3253 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3255 if (sym_p)
3257 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3258 /* ??? We can run into trouble with some backends by presenting
3259 it with symbols which haven't been properly passed through
3260 targetm.encode_section_info. By setting the local bit, we
3261 enhance the probability of things working. */
3262 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3264 if (off_p)
3265 base = gen_rtx_fmt_e (CONST, address_mode,
3266 gen_rtx_fmt_ee
3267 (PLUS, address_mode, base,
3268 gen_int_mode (off, address_mode)));
3270 else if (off_p)
3271 base = gen_int_mode (off, address_mode);
3272 else
3273 base = NULL_RTX;
3275 if (base)
3276 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3278 start_sequence ();
3279 /* To avoid splitting addressing modes, pretend that no cse will
3280 follow. */
3281 old_cse_not_expected = cse_not_expected;
3282 cse_not_expected = true;
3283 addr = memory_address_addr_space (mem_mode, addr, as);
3284 cse_not_expected = old_cse_not_expected;
3285 seq = get_insns ();
3286 end_sequence ();
3288 acost = seq_cost (seq, speed);
3289 acost += address_cost (addr, mem_mode, as, speed);
3291 if (!acost)
3292 acost = 1;
3293 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3296 /* On some targets, it is quite expensive to load symbol to a register,
3297 which makes addresses that contain symbols look much more expensive.
3298 However, the symbol will have to be loaded in any case before the
3299 loop (and quite likely we have it in register already), so it does not
3300 make much sense to penalize them too heavily. So make some final
3301 tweaks for the SYMBOL_PRESENT modes:
3303 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3304 var is cheaper, use this mode with small penalty.
3305 If VAR_PRESENT is true, try whether the mode with
3306 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3307 if this is the case, use it. */
3308 add_c = add_cost (address_mode, speed);
3309 for (i = 0; i < 8; i++)
3311 var_p = i & 1;
3312 off_p = (i >> 1) & 1;
3313 rat_p = (i >> 2) & 1;
3315 acost = data->costs[0][1][off_p][rat_p] + 1;
3316 if (var_p)
3317 acost += add_c;
3319 if (acost < data->costs[1][var_p][off_p][rat_p])
3320 data->costs[1][var_p][off_p][rat_p] = acost;
3323 if (dump_file && (dump_flags & TDF_DETAILS))
3325 fprintf (dump_file, "Address costs:\n");
3327 for (i = 0; i < 16; i++)
3329 sym_p = i & 1;
3330 var_p = (i >> 1) & 1;
3331 off_p = (i >> 2) & 1;
3332 rat_p = (i >> 3) & 1;
3334 fprintf (dump_file, " ");
3335 if (sym_p)
3336 fprintf (dump_file, "sym + ");
3337 if (var_p)
3338 fprintf (dump_file, "var + ");
3339 if (off_p)
3340 fprintf (dump_file, "cst + ");
3341 if (rat_p)
3342 fprintf (dump_file, "rat * ");
3344 acost = data->costs[sym_p][var_p][off_p][rat_p];
3345 fprintf (dump_file, "index costs %d\n", acost);
3347 if (has_predec[mem_mode] || has_postdec[mem_mode]
3348 || has_preinc[mem_mode] || has_postinc[mem_mode])
3349 fprintf (dump_file, " May include autoinc/dec\n");
3350 fprintf (dump_file, "\n");
3353 VEC_replace (address_cost_data, address_cost_data_list,
3354 data_index, data);
3357 bits = GET_MODE_BITSIZE (address_mode);
3358 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3359 offset &= mask;
3360 if ((offset >> (bits - 1) & 1))
3361 offset |= ~mask;
3362 s_offset = offset;
3364 autoinc = false;
3365 msize = GET_MODE_SIZE (mem_mode);
3366 autoinc_offset = offset;
3367 if (stmt_after_inc)
3368 autoinc_offset += ratio * cstep;
3369 if (symbol_present || var_present || ratio != 1)
3370 autoinc = false;
3371 else if ((has_postinc[mem_mode] && autoinc_offset == 0
3372 && msize == cstep)
3373 || (has_postdec[mem_mode] && autoinc_offset == 0
3374 && msize == -cstep)
3375 || (has_preinc[mem_mode] && autoinc_offset == msize
3376 && msize == cstep)
3377 || (has_predec[mem_mode] && autoinc_offset == -msize
3378 && msize == -cstep))
3379 autoinc = true;
3381 cost = 0;
3382 offset_p = (s_offset != 0
3383 && data->min_offset <= s_offset
3384 && s_offset <= data->max_offset);
3385 ratio_p = (ratio != 1
3386 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3388 if (ratio != 1 && !ratio_p)
3389 cost += multiply_by_cost (ratio, address_mode, speed);
3391 if (s_offset && !offset_p && !symbol_present)
3392 cost += add_cost (address_mode, speed);
3394 if (may_autoinc)
3395 *may_autoinc = autoinc;
3396 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3397 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3398 return new_cost (cost + acost, complexity);
3401 /* Estimates cost of forcing expression EXPR into a variable. */
3403 static comp_cost
3404 force_expr_to_var_cost (tree expr, bool speed)
3406 static bool costs_initialized = false;
3407 static unsigned integer_cost [2];
3408 static unsigned symbol_cost [2];
3409 static unsigned address_cost [2];
3410 tree op0, op1;
3411 comp_cost cost0, cost1, cost;
3412 enum machine_mode mode;
3414 if (!costs_initialized)
3416 tree type = build_pointer_type (integer_type_node);
3417 tree var, addr;
3418 rtx x;
3419 int i;
3421 var = create_tmp_var_raw (integer_type_node, "test_var");
3422 TREE_STATIC (var) = 1;
3423 x = produce_memory_decl_rtl (var, NULL);
3424 SET_DECL_RTL (var, x);
3426 addr = build1 (ADDR_EXPR, type, var);
3429 for (i = 0; i < 2; i++)
3431 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3432 2000), i);
3434 symbol_cost[i] = computation_cost (addr, i) + 1;
3436 address_cost[i]
3437 = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3438 addr,
3439 build_int_cst (sizetype, 2000)), i) + 1;
3440 if (dump_file && (dump_flags & TDF_DETAILS))
3442 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3443 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3444 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3445 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3446 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3447 fprintf (dump_file, "\n");
3451 costs_initialized = true;
3454 STRIP_NOPS (expr);
3456 if (SSA_VAR_P (expr))
3457 return zero_cost;
3459 if (is_gimple_min_invariant (expr))
3461 if (TREE_CODE (expr) == INTEGER_CST)
3462 return new_cost (integer_cost [speed], 0);
3464 if (TREE_CODE (expr) == ADDR_EXPR)
3466 tree obj = TREE_OPERAND (expr, 0);
3468 if (TREE_CODE (obj) == VAR_DECL
3469 || TREE_CODE (obj) == PARM_DECL
3470 || TREE_CODE (obj) == RESULT_DECL)
3471 return new_cost (symbol_cost [speed], 0);
3474 return new_cost (address_cost [speed], 0);
3477 switch (TREE_CODE (expr))
3479 case POINTER_PLUS_EXPR:
3480 case PLUS_EXPR:
3481 case MINUS_EXPR:
3482 case MULT_EXPR:
3483 op0 = TREE_OPERAND (expr, 0);
3484 op1 = TREE_OPERAND (expr, 1);
3485 STRIP_NOPS (op0);
3486 STRIP_NOPS (op1);
3488 if (is_gimple_val (op0))
3489 cost0 = zero_cost;
3490 else
3491 cost0 = force_expr_to_var_cost (op0, speed);
3493 if (is_gimple_val (op1))
3494 cost1 = zero_cost;
3495 else
3496 cost1 = force_expr_to_var_cost (op1, speed);
3498 break;
3500 case NEGATE_EXPR:
3501 op0 = TREE_OPERAND (expr, 0);
3502 STRIP_NOPS (op0);
3503 op1 = NULL_TREE;
3505 if (is_gimple_val (op0))
3506 cost0 = zero_cost;
3507 else
3508 cost0 = force_expr_to_var_cost (op0, speed);
3510 cost1 = zero_cost;
3511 break;
3513 default:
3514 /* Just an arbitrary value, FIXME. */
3515 return new_cost (target_spill_cost[speed], 0);
3518 mode = TYPE_MODE (TREE_TYPE (expr));
3519 switch (TREE_CODE (expr))
3521 case POINTER_PLUS_EXPR:
3522 case PLUS_EXPR:
3523 case MINUS_EXPR:
3524 case NEGATE_EXPR:
3525 cost = new_cost (add_cost (mode, speed), 0);
3526 break;
3528 case MULT_EXPR:
3529 if (cst_and_fits_in_hwi (op0))
3530 cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3531 else if (cst_and_fits_in_hwi (op1))
3532 cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
3533 else
3534 return new_cost (target_spill_cost [speed], 0);
3535 break;
3537 default:
3538 gcc_unreachable ();
3541 cost = add_costs (cost, cost0);
3542 cost = add_costs (cost, cost1);
3544 /* Bound the cost by target_spill_cost. The parts of complicated
3545 computations often are either loop invariant or at least can
3546 be shared between several iv uses, so letting this grow without
3547 limits would not give reasonable results. */
3548 if (cost.cost > (int) target_spill_cost [speed])
3549 cost.cost = target_spill_cost [speed];
3551 return cost;
3554 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3555 invariants the computation depends on. */
3557 static comp_cost
3558 force_var_cost (struct ivopts_data *data,
3559 tree expr, bitmap *depends_on)
3561 if (depends_on)
3563 fd_ivopts_data = data;
3564 walk_tree (&expr, find_depends, depends_on, NULL);
3567 return force_expr_to_var_cost (expr, data->speed);
3570 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3571 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3572 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3573 invariants the computation depends on. */
3575 static comp_cost
3576 split_address_cost (struct ivopts_data *data,
3577 tree addr, bool *symbol_present, bool *var_present,
3578 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3580 tree core;
3581 HOST_WIDE_INT bitsize;
3582 HOST_WIDE_INT bitpos;
3583 tree toffset;
3584 enum machine_mode mode;
3585 int unsignedp, volatilep;
3587 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3588 &unsignedp, &volatilep, false);
3590 if (toffset != 0
3591 || bitpos % BITS_PER_UNIT != 0
3592 || TREE_CODE (core) != VAR_DECL)
3594 *symbol_present = false;
3595 *var_present = true;
3596 fd_ivopts_data = data;
3597 walk_tree (&addr, find_depends, depends_on, NULL);
3598 return new_cost (target_spill_cost[data->speed], 0);
3601 *offset += bitpos / BITS_PER_UNIT;
3602 if (TREE_STATIC (core)
3603 || DECL_EXTERNAL (core))
3605 *symbol_present = true;
3606 *var_present = false;
3607 return zero_cost;
3610 *symbol_present = false;
3611 *var_present = true;
3612 return zero_cost;
3615 /* Estimates cost of expressing difference of addresses E1 - E2 as
3616 var + symbol + offset. The value of offset is added to OFFSET,
3617 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3618 part is missing. DEPENDS_ON is a set of the invariants the computation
3619 depends on. */
3621 static comp_cost
3622 ptr_difference_cost (struct ivopts_data *data,
3623 tree e1, tree e2, bool *symbol_present, bool *var_present,
3624 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3626 HOST_WIDE_INT diff = 0;
3627 aff_tree aff_e1, aff_e2;
3628 tree type;
3630 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3632 if (ptr_difference_const (e1, e2, &diff))
3634 *offset += diff;
3635 *symbol_present = false;
3636 *var_present = false;
3637 return zero_cost;
3640 if (integer_zerop (e2))
3641 return split_address_cost (data, TREE_OPERAND (e1, 0),
3642 symbol_present, var_present, offset, depends_on);
3644 *symbol_present = false;
3645 *var_present = true;
3647 type = signed_type_for (TREE_TYPE (e1));
3648 tree_to_aff_combination (e1, type, &aff_e1);
3649 tree_to_aff_combination (e2, type, &aff_e2);
3650 aff_combination_scale (&aff_e2, double_int_minus_one);
3651 aff_combination_add (&aff_e1, &aff_e2);
3653 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3656 /* Estimates cost of expressing difference E1 - E2 as
3657 var + symbol + offset. The value of offset is added to OFFSET,
3658 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3659 part is missing. DEPENDS_ON is a set of the invariants the computation
3660 depends on. */
3662 static comp_cost
3663 difference_cost (struct ivopts_data *data,
3664 tree e1, tree e2, bool *symbol_present, bool *var_present,
3665 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3667 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3668 unsigned HOST_WIDE_INT off1, off2;
3669 aff_tree aff_e1, aff_e2;
3670 tree type;
3672 e1 = strip_offset (e1, &off1);
3673 e2 = strip_offset (e2, &off2);
3674 *offset += off1 - off2;
3676 STRIP_NOPS (e1);
3677 STRIP_NOPS (e2);
3679 if (TREE_CODE (e1) == ADDR_EXPR)
3680 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3681 offset, depends_on);
3682 *symbol_present = false;
3684 if (operand_equal_p (e1, e2, 0))
3686 *var_present = false;
3687 return zero_cost;
3690 *var_present = true;
3692 if (integer_zerop (e2))
3693 return force_var_cost (data, e1, depends_on);
3695 if (integer_zerop (e1))
3697 comp_cost cost = force_var_cost (data, e2, depends_on);
3698 cost.cost += multiply_by_cost (-1, mode, data->speed);
3699 return cost;
3702 type = signed_type_for (TREE_TYPE (e1));
3703 tree_to_aff_combination (e1, type, &aff_e1);
3704 tree_to_aff_combination (e2, type, &aff_e2);
3705 aff_combination_scale (&aff_e2, double_int_minus_one);
3706 aff_combination_add (&aff_e1, &aff_e2);
3708 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3711 /* Determines the cost of the computation by that USE is expressed
3712 from induction variable CAND. If ADDRESS_P is true, we just need
3713 to create an address from it, otherwise we want to get it into
3714 register. A set of invariants we depend on is stored in
3715 DEPENDS_ON. AT is the statement at that the value is computed.
3716 If CAN_AUTOINC is nonnull, use it to record whether autoinc
3717 addressing is likely. */
3719 static comp_cost
3720 get_computation_cost_at (struct ivopts_data *data,
3721 struct iv_use *use, struct iv_cand *cand,
3722 bool address_p, bitmap *depends_on, gimple at,
3723 bool *can_autoinc)
3725 tree ubase = use->iv->base, ustep = use->iv->step;
3726 tree cbase, cstep;
3727 tree utype = TREE_TYPE (ubase), ctype;
3728 unsigned HOST_WIDE_INT cstepi, offset = 0;
3729 HOST_WIDE_INT ratio, aratio;
3730 bool var_present, symbol_present, stmt_is_after_inc;
3731 comp_cost cost;
3732 double_int rat;
3733 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3735 *depends_on = NULL;
3737 /* Only consider real candidates. */
3738 if (!cand->iv)
3739 return infinite_cost;
3741 cbase = cand->iv->base;
3742 cstep = cand->iv->step;
3743 ctype = TREE_TYPE (cbase);
3745 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3747 /* We do not have a precision to express the values of use. */
3748 return infinite_cost;
3751 if (address_p)
3753 /* Do not try to express address of an object with computation based
3754 on address of a different object. This may cause problems in rtl
3755 level alias analysis (that does not expect this to be happening,
3756 as this is illegal in C), and would be unlikely to be useful
3757 anyway. */
3758 if (use->iv->base_object
3759 && cand->iv->base_object
3760 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3761 return infinite_cost;
3764 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3766 /* TODO -- add direct handling of this case. */
3767 goto fallback;
3770 /* CSTEPI is removed from the offset in case statement is after the
3771 increment. If the step is not constant, we use zero instead.
3772 This is a bit imprecise (there is the extra addition), but
3773 redundancy elimination is likely to transform the code so that
3774 it uses value of the variable before increment anyway,
3775 so it is not that much unrealistic. */
3776 if (cst_and_fits_in_hwi (cstep))
3777 cstepi = int_cst_value (cstep);
3778 else
3779 cstepi = 0;
3781 if (!constant_multiple_of (ustep, cstep, &rat))
3782 return infinite_cost;
3784 if (double_int_fits_in_shwi_p (rat))
3785 ratio = double_int_to_shwi (rat);
3786 else
3787 return infinite_cost;
3789 STRIP_NOPS (cbase);
3790 ctype = TREE_TYPE (cbase);
3792 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3793 or ratio == 1, it is better to handle this like
3795 ubase - ratio * cbase + ratio * var
3797 (also holds in the case ratio == -1, TODO. */
3799 if (cst_and_fits_in_hwi (cbase))
3801 offset = - ratio * int_cst_value (cbase);
3802 cost = difference_cost (data,
3803 ubase, build_int_cst (utype, 0),
3804 &symbol_present, &var_present, &offset,
3805 depends_on);
3807 else if (ratio == 1)
3809 cost = difference_cost (data,
3810 ubase, cbase,
3811 &symbol_present, &var_present, &offset,
3812 depends_on);
3814 else if (address_p
3815 && !POINTER_TYPE_P (ctype)
3816 && multiplier_allowed_in_address_p
3817 (ratio, TYPE_MODE (TREE_TYPE (utype)),
3818 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
3820 cbase
3821 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
3822 cost = difference_cost (data,
3823 ubase, cbase,
3824 &symbol_present, &var_present, &offset,
3825 depends_on);
3827 else
3829 cost = force_var_cost (data, cbase, depends_on);
3830 cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
3831 cost = add_costs (cost,
3832 difference_cost (data,
3833 ubase, build_int_cst (utype, 0),
3834 &symbol_present, &var_present,
3835 &offset, depends_on));
3838 /* If we are after the increment, the value of the candidate is higher by
3839 one iteration. */
3840 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
3841 if (stmt_is_after_inc)
3842 offset -= ratio * cstepi;
3844 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3845 (symbol/var1/const parts may be omitted). If we are looking for an
3846 address, find the cost of addressing this. */
3847 if (address_p)
3848 return add_costs (cost,
3849 get_address_cost (symbol_present, var_present,
3850 offset, ratio, cstepi,
3851 TYPE_MODE (TREE_TYPE (utype)),
3852 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
3853 speed, stmt_is_after_inc,
3854 can_autoinc));
3856 /* Otherwise estimate the costs for computing the expression. */
3857 if (!symbol_present && !var_present && !offset)
3859 if (ratio != 1)
3860 cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
3861 return cost;
3864 /* Symbol + offset should be compile-time computable so consider that they
3865 are added once to the variable, if present. */
3866 if (var_present && (symbol_present || offset))
3867 cost.cost += adjust_setup_cost (data,
3868 add_cost (TYPE_MODE (ctype), speed));
3870 /* Having offset does not affect runtime cost in case it is added to
3871 symbol, but it increases complexity. */
3872 if (offset)
3873 cost.complexity++;
3875 cost.cost += add_cost (TYPE_MODE (ctype), speed);
3877 aratio = ratio > 0 ? ratio : -ratio;
3878 if (aratio != 1)
3879 cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
3880 return cost;
3882 fallback:
3883 if (can_autoinc)
3884 *can_autoinc = false;
3887 /* Just get the expression, expand it and measure the cost. */
3888 tree comp = get_computation_at (data->current_loop, use, cand, at);
3890 if (!comp)
3891 return infinite_cost;
3893 if (address_p)
3894 comp = build_simple_mem_ref (comp);
3896 return new_cost (computation_cost (comp, speed), 0);
3900 /* Determines the cost of the computation by that USE is expressed
3901 from induction variable CAND. If ADDRESS_P is true, we just need
3902 to create an address from it, otherwise we want to get it into
3903 register. A set of invariants we depend on is stored in
3904 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
3905 autoinc addressing is likely. */
3907 static comp_cost
3908 get_computation_cost (struct ivopts_data *data,
3909 struct iv_use *use, struct iv_cand *cand,
3910 bool address_p, bitmap *depends_on, bool *can_autoinc)
3912 return get_computation_cost_at (data,
3913 use, cand, address_p, depends_on, use->stmt,
3914 can_autoinc);
3917 /* Determines cost of basing replacement of USE on CAND in a generic
3918 expression. */
3920 static bool
3921 determine_use_iv_cost_generic (struct ivopts_data *data,
3922 struct iv_use *use, struct iv_cand *cand)
3924 bitmap depends_on;
3925 comp_cost cost;
3927 /* The simple case first -- if we need to express value of the preserved
3928 original biv, the cost is 0. This also prevents us from counting the
3929 cost of increment twice -- once at this use and once in the cost of
3930 the candidate. */
3931 if (cand->pos == IP_ORIGINAL
3932 && cand->incremented_at == use->stmt)
3934 set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE);
3935 return true;
3938 cost = get_computation_cost (data, use, cand, false, &depends_on, NULL);
3939 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3941 return !infinite_cost_p (cost);
3944 /* Determines cost of basing replacement of USE on CAND in an address. */
3946 static bool
3947 determine_use_iv_cost_address (struct ivopts_data *data,
3948 struct iv_use *use, struct iv_cand *cand)
3950 bitmap depends_on;
3951 bool can_autoinc;
3952 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
3953 &can_autoinc);
3955 if (cand->ainc_use == use)
3957 if (can_autoinc)
3958 cost.cost -= cand->cost_step;
3959 /* If we generated the candidate solely for exploiting autoincrement
3960 opportunities, and it turns out it can't be used, set the cost to
3961 infinity to make sure we ignore it. */
3962 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
3963 cost = infinite_cost;
3965 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3967 return !infinite_cost_p (cost);
3970 /* Computes value of candidate CAND at position AT in iteration NITER, and
3971 stores it to VAL. */
3973 static void
3974 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
3975 aff_tree *val)
3977 aff_tree step, delta, nit;
3978 struct iv *iv = cand->iv;
3979 tree type = TREE_TYPE (iv->base);
3980 tree steptype = type;
3981 if (POINTER_TYPE_P (type))
3982 steptype = sizetype;
3984 tree_to_aff_combination (iv->step, steptype, &step);
3985 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3986 aff_combination_convert (&nit, steptype);
3987 aff_combination_mult (&nit, &step, &delta);
3988 if (stmt_after_increment (loop, cand, at))
3989 aff_combination_add (&delta, &step);
3991 tree_to_aff_combination (iv->base, type, val);
3992 aff_combination_add (val, &delta);
3995 /* Returns period of induction variable iv. */
3997 static tree
3998 iv_period (struct iv *iv)
4000 tree step = iv->step, period, type;
4001 tree pow2div;
4003 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4005 /* Period of the iv is gcd (step, type range). Since type range is power
4006 of two, it suffices to determine the maximum power of two that divides
4007 step. */
4008 pow2div = num_ending_zeros (step);
4009 type = unsigned_type_for (TREE_TYPE (step));
4011 period = build_low_bits_mask (type,
4012 (TYPE_PRECISION (type)
4013 - tree_low_cst (pow2div, 1)));
4015 return period;
4018 /* Returns the comparison operator used when eliminating the iv USE. */
4020 static enum tree_code
4021 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4023 struct loop *loop = data->current_loop;
4024 basic_block ex_bb;
4025 edge exit;
4027 ex_bb = gimple_bb (use->stmt);
4028 exit = EDGE_SUCC (ex_bb, 0);
4029 if (flow_bb_inside_loop_p (loop, exit->dest))
4030 exit = EDGE_SUCC (ex_bb, 1);
4032 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4035 /* Check whether it is possible to express the condition in USE by comparison
4036 of candidate CAND. If so, store the value compared with to BOUND. */
4038 static bool
4039 may_eliminate_iv (struct ivopts_data *data,
4040 struct iv_use *use, struct iv_cand *cand, tree *bound)
4042 basic_block ex_bb;
4043 edge exit;
4044 tree nit, period;
4045 struct loop *loop = data->current_loop;
4046 aff_tree bnd;
4048 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4049 return false;
4051 /* For now works only for exits that dominate the loop latch.
4052 TODO: extend to other conditions inside loop body. */
4053 ex_bb = gimple_bb (use->stmt);
4054 if (use->stmt != last_stmt (ex_bb)
4055 || gimple_code (use->stmt) != GIMPLE_COND
4056 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4057 return false;
4059 exit = EDGE_SUCC (ex_bb, 0);
4060 if (flow_bb_inside_loop_p (loop, exit->dest))
4061 exit = EDGE_SUCC (ex_bb, 1);
4062 if (flow_bb_inside_loop_p (loop, exit->dest))
4063 return false;
4065 nit = niter_for_exit (data, exit);
4066 if (!nit)
4067 return false;
4069 /* Determine whether we can use the variable to test the exit condition.
4070 This is the case iff the period of the induction variable is greater
4071 than the number of iterations for which the exit condition is true. */
4072 period = iv_period (cand->iv);
4074 /* If the number of iterations is constant, compare against it directly. */
4075 if (TREE_CODE (nit) == INTEGER_CST)
4077 if (!tree_int_cst_lt (nit, period))
4078 return false;
4081 /* If not, and if this is the only possible exit of the loop, see whether
4082 we can get a conservative estimate on the number of iterations of the
4083 entire loop and compare against that instead. */
4084 else if (loop_only_exit_p (loop, exit))
4086 double_int period_value, max_niter;
4087 if (!estimated_loop_iterations (loop, true, &max_niter))
4088 return false;
4089 period_value = tree_to_double_int (period);
4090 if (double_int_ucmp (max_niter, period_value) >= 0)
4091 return false;
4094 /* Otherwise, punt. */
4095 else
4096 return false;
4098 cand_value_at (loop, cand, use->stmt, nit, &bnd);
4100 *bound = aff_combination_to_tree (&bnd);
4101 /* It is unlikely that computing the number of iterations using division
4102 would be more profitable than keeping the original induction variable. */
4103 if (expression_expensive_p (*bound))
4104 return false;
4105 return true;
4108 /* Determines cost of basing replacement of USE on CAND in a condition. */
4110 static bool
4111 determine_use_iv_cost_condition (struct ivopts_data *data,
4112 struct iv_use *use, struct iv_cand *cand)
4114 tree bound = NULL_TREE;
4115 struct iv *cmp_iv;
4116 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4117 comp_cost elim_cost, express_cost, cost;
4118 bool ok;
4119 tree *control_var, *bound_cst;
4121 /* Only consider real candidates. */
4122 if (!cand->iv)
4124 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE);
4125 return false;
4128 /* Try iv elimination. */
4129 if (may_eliminate_iv (data, use, cand, &bound))
4131 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4132 /* The bound is a loop invariant, so it will be only computed
4133 once. */
4134 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4136 else
4137 elim_cost = infinite_cost;
4139 /* Try expressing the original giv. If it is compared with an invariant,
4140 note that we cannot get rid of it. */
4141 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4142 NULL, &cmp_iv);
4143 gcc_assert (ok);
4145 /* When the condition is a comparison of the candidate IV against
4146 zero, prefer this IV.
4148 TODO: The constant that we're substracting from the cost should
4149 be target-dependent. This information should be added to the
4150 target costs for each backend. */
4151 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4152 && integer_zerop (*bound_cst)
4153 && (operand_equal_p (*control_var, cand->var_after, 0)
4154 || operand_equal_p (*control_var, cand->var_before, 0)))
4155 elim_cost.cost -= 1;
4157 express_cost = get_computation_cost (data, use, cand, false,
4158 &depends_on_express, NULL);
4159 fd_ivopts_data = data;
4160 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4162 /* Choose the better approach, preferring the eliminated IV. */
4163 if (compare_costs (elim_cost, express_cost) <= 0)
4165 cost = elim_cost;
4166 depends_on = depends_on_elim;
4167 depends_on_elim = NULL;
4169 else
4171 cost = express_cost;
4172 depends_on = depends_on_express;
4173 depends_on_express = NULL;
4174 bound = NULL_TREE;
4177 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4179 if (depends_on_elim)
4180 BITMAP_FREE (depends_on_elim);
4181 if (depends_on_express)
4182 BITMAP_FREE (depends_on_express);
4184 return !infinite_cost_p (cost);
4187 /* Determines cost of basing replacement of USE on CAND. Returns false
4188 if USE cannot be based on CAND. */
4190 static bool
4191 determine_use_iv_cost (struct ivopts_data *data,
4192 struct iv_use *use, struct iv_cand *cand)
4194 switch (use->type)
4196 case USE_NONLINEAR_EXPR:
4197 return determine_use_iv_cost_generic (data, use, cand);
4199 case USE_ADDRESS:
4200 return determine_use_iv_cost_address (data, use, cand);
4202 case USE_COMPARE:
4203 return determine_use_iv_cost_condition (data, use, cand);
4205 default:
4206 gcc_unreachable ();
4210 /* Return true if get_computation_cost indicates that autoincrement is
4211 a possibility for the pair of USE and CAND, false otherwise. */
4213 static bool
4214 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4215 struct iv_cand *cand)
4217 bitmap depends_on;
4218 bool can_autoinc;
4219 comp_cost cost;
4221 if (use->type != USE_ADDRESS)
4222 return false;
4224 cost = get_computation_cost (data, use, cand, true, &depends_on,
4225 &can_autoinc);
4227 BITMAP_FREE (depends_on);
4229 return !infinite_cost_p (cost) && can_autoinc;
4232 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4233 use that allows autoincrement, and set their AINC_USE if possible. */
4235 static void
4236 set_autoinc_for_original_candidates (struct ivopts_data *data)
4238 unsigned i, j;
4240 for (i = 0; i < n_iv_cands (data); i++)
4242 struct iv_cand *cand = iv_cand (data, i);
4243 struct iv_use *closest = NULL;
4244 if (cand->pos != IP_ORIGINAL)
4245 continue;
4246 for (j = 0; j < n_iv_uses (data); j++)
4248 struct iv_use *use = iv_use (data, j);
4249 unsigned uid = gimple_uid (use->stmt);
4250 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4251 || uid > gimple_uid (cand->incremented_at))
4252 continue;
4253 if (closest == NULL || uid > gimple_uid (closest->stmt))
4254 closest = use;
4256 if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4257 continue;
4258 cand->ainc_use = closest;
4262 /* Finds the candidates for the induction variables. */
4264 static void
4265 find_iv_candidates (struct ivopts_data *data)
4267 /* Add commonly used ivs. */
4268 add_standard_iv_candidates (data);
4270 /* Add old induction variables. */
4271 add_old_ivs_candidates (data);
4273 /* Add induction variables derived from uses. */
4274 add_derived_ivs_candidates (data);
4276 set_autoinc_for_original_candidates (data);
4278 /* Record the important candidates. */
4279 record_important_candidates (data);
4282 /* Determines costs of basing the use of the iv on an iv candidate. */
4284 static void
4285 determine_use_iv_costs (struct ivopts_data *data)
4287 unsigned i, j;
4288 struct iv_use *use;
4289 struct iv_cand *cand;
4290 bitmap to_clear = BITMAP_ALLOC (NULL);
4292 alloc_use_cost_map (data);
4294 for (i = 0; i < n_iv_uses (data); i++)
4296 use = iv_use (data, i);
4298 if (data->consider_all_candidates)
4300 for (j = 0; j < n_iv_cands (data); j++)
4302 cand = iv_cand (data, j);
4303 determine_use_iv_cost (data, use, cand);
4306 else
4308 bitmap_iterator bi;
4310 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4312 cand = iv_cand (data, j);
4313 if (!determine_use_iv_cost (data, use, cand))
4314 bitmap_set_bit (to_clear, j);
4317 /* Remove the candidates for that the cost is infinite from
4318 the list of related candidates. */
4319 bitmap_and_compl_into (use->related_cands, to_clear);
4320 bitmap_clear (to_clear);
4324 BITMAP_FREE (to_clear);
4326 if (dump_file && (dump_flags & TDF_DETAILS))
4328 fprintf (dump_file, "Use-candidate costs:\n");
4330 for (i = 0; i < n_iv_uses (data); i++)
4332 use = iv_use (data, i);
4334 fprintf (dump_file, "Use %d:\n", i);
4335 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
4336 for (j = 0; j < use->n_map_members; j++)
4338 if (!use->cost_map[j].cand
4339 || infinite_cost_p (use->cost_map[j].cost))
4340 continue;
4342 fprintf (dump_file, " %d\t%d\t%d\t",
4343 use->cost_map[j].cand->id,
4344 use->cost_map[j].cost.cost,
4345 use->cost_map[j].cost.complexity);
4346 if (use->cost_map[j].depends_on)
4347 bitmap_print (dump_file,
4348 use->cost_map[j].depends_on, "","");
4349 fprintf (dump_file, "\n");
4352 fprintf (dump_file, "\n");
4354 fprintf (dump_file, "\n");
4358 /* Determines cost of the candidate CAND. */
4360 static void
4361 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4363 comp_cost cost_base;
4364 unsigned cost, cost_step;
4365 tree base;
4367 if (!cand->iv)
4369 cand->cost = 0;
4370 return;
4373 /* There are two costs associated with the candidate -- its increment
4374 and its initialization. The second is almost negligible for any loop
4375 that rolls enough, so we take it just very little into account. */
4377 base = cand->iv->base;
4378 cost_base = force_var_cost (data, base, NULL);
4379 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
4381 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
4383 /* Prefer the original ivs unless we may gain something by replacing it.
4384 The reason is to make debugging simpler; so this is not relevant for
4385 artificial ivs created by other optimization passes. */
4386 if (cand->pos != IP_ORIGINAL
4387 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4388 cost++;
4390 /* Prefer not to insert statements into latch unless there are some
4391 already (so that we do not create unnecessary jumps). */
4392 if (cand->pos == IP_END
4393 && empty_block_p (ip_end_pos (data->current_loop)))
4394 cost++;
4396 cand->cost = cost;
4397 cand->cost_step = cost_step;
4400 /* Determines costs of computation of the candidates. */
4402 static void
4403 determine_iv_costs (struct ivopts_data *data)
4405 unsigned i;
4407 if (dump_file && (dump_flags & TDF_DETAILS))
4409 fprintf (dump_file, "Candidate costs:\n");
4410 fprintf (dump_file, " cand\tcost\n");
4413 for (i = 0; i < n_iv_cands (data); i++)
4415 struct iv_cand *cand = iv_cand (data, i);
4417 determine_iv_cost (data, cand);
4419 if (dump_file && (dump_flags & TDF_DETAILS))
4420 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4423 if (dump_file && (dump_flags & TDF_DETAILS))
4424 fprintf (dump_file, "\n");
4427 /* Calculates cost for having SIZE induction variables. */
4429 static unsigned
4430 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4432 /* We add size to the cost, so that we prefer eliminating ivs
4433 if possible. */
4434 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed);
4437 /* For each size of the induction variable set determine the penalty. */
4439 static void
4440 determine_set_costs (struct ivopts_data *data)
4442 unsigned j, n;
4443 gimple phi;
4444 gimple_stmt_iterator psi;
4445 tree op;
4446 struct loop *loop = data->current_loop;
4447 bitmap_iterator bi;
4449 if (dump_file && (dump_flags & TDF_DETAILS))
4451 fprintf (dump_file, "Global costs:\n");
4452 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4453 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
4454 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
4457 n = 0;
4458 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
4460 phi = gsi_stmt (psi);
4461 op = PHI_RESULT (phi);
4463 if (!is_gimple_reg (op))
4464 continue;
4466 if (get_iv (data, op))
4467 continue;
4469 n++;
4472 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4474 struct version_info *info = ver_info (data, j);
4476 if (info->inv_id && info->has_nonlin_use)
4477 n++;
4480 data->regs_used = n;
4481 if (dump_file && (dump_flags & TDF_DETAILS))
4482 fprintf (dump_file, " regs_used %d\n", n);
4484 if (dump_file && (dump_flags & TDF_DETAILS))
4486 fprintf (dump_file, " cost for size:\n");
4487 fprintf (dump_file, " ivs\tcost\n");
4488 for (j = 0; j <= 2 * target_avail_regs; j++)
4489 fprintf (dump_file, " %d\t%d\n", j,
4490 ivopts_global_cost_for_size (data, j));
4491 fprintf (dump_file, "\n");
4495 /* Returns true if A is a cheaper cost pair than B. */
4497 static bool
4498 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4500 int cmp;
4502 if (!a)
4503 return false;
4505 if (!b)
4506 return true;
4508 cmp = compare_costs (a->cost, b->cost);
4509 if (cmp < 0)
4510 return true;
4512 if (cmp > 0)
4513 return false;
4515 /* In case the costs are the same, prefer the cheaper candidate. */
4516 if (a->cand->cost < b->cand->cost)
4517 return true;
4519 return false;
4522 /* Computes the cost field of IVS structure. */
4524 static void
4525 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4527 comp_cost cost = ivs->cand_use_cost;
4528 cost.cost += ivs->cand_cost;
4529 cost.cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4531 ivs->cost = cost;
4534 /* Remove invariants in set INVS to set IVS. */
4536 static void
4537 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4539 bitmap_iterator bi;
4540 unsigned iid;
4542 if (!invs)
4543 return;
4545 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4547 ivs->n_invariant_uses[iid]--;
4548 if (ivs->n_invariant_uses[iid] == 0)
4549 ivs->n_regs--;
4553 /* Set USE not to be expressed by any candidate in IVS. */
4555 static void
4556 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4557 struct iv_use *use)
4559 unsigned uid = use->id, cid;
4560 struct cost_pair *cp;
4562 cp = ivs->cand_for_use[uid];
4563 if (!cp)
4564 return;
4565 cid = cp->cand->id;
4567 ivs->bad_uses++;
4568 ivs->cand_for_use[uid] = NULL;
4569 ivs->n_cand_uses[cid]--;
4571 if (ivs->n_cand_uses[cid] == 0)
4573 bitmap_clear_bit (ivs->cands, cid);
4574 /* Do not count the pseudocandidates. */
4575 if (cp->cand->iv)
4576 ivs->n_regs--;
4577 ivs->n_cands--;
4578 ivs->cand_cost -= cp->cand->cost;
4580 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4583 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4585 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4586 iv_ca_recount_cost (data, ivs);
4589 /* Add invariants in set INVS to set IVS. */
4591 static void
4592 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4594 bitmap_iterator bi;
4595 unsigned iid;
4597 if (!invs)
4598 return;
4600 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4602 ivs->n_invariant_uses[iid]++;
4603 if (ivs->n_invariant_uses[iid] == 1)
4604 ivs->n_regs++;
4608 /* Set cost pair for USE in set IVS to CP. */
4610 static void
4611 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4612 struct iv_use *use, struct cost_pair *cp)
4614 unsigned uid = use->id, cid;
4616 if (ivs->cand_for_use[uid] == cp)
4617 return;
4619 if (ivs->cand_for_use[uid])
4620 iv_ca_set_no_cp (data, ivs, use);
4622 if (cp)
4624 cid = cp->cand->id;
4626 ivs->bad_uses--;
4627 ivs->cand_for_use[uid] = cp;
4628 ivs->n_cand_uses[cid]++;
4629 if (ivs->n_cand_uses[cid] == 1)
4631 bitmap_set_bit (ivs->cands, cid);
4632 /* Do not count the pseudocandidates. */
4633 if (cp->cand->iv)
4634 ivs->n_regs++;
4635 ivs->n_cands++;
4636 ivs->cand_cost += cp->cand->cost;
4638 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4641 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4642 iv_ca_set_add_invariants (ivs, cp->depends_on);
4643 iv_ca_recount_cost (data, ivs);
4647 /* Extend set IVS by expressing USE by some of the candidates in it
4648 if possible. */
4650 static void
4651 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4652 struct iv_use *use)
4654 struct cost_pair *best_cp = NULL, *cp;
4655 bitmap_iterator bi;
4656 unsigned i;
4658 gcc_assert (ivs->upto >= use->id);
4660 if (ivs->upto == use->id)
4662 ivs->upto++;
4663 ivs->bad_uses++;
4666 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4668 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4670 if (cheaper_cost_pair (cp, best_cp))
4671 best_cp = cp;
4674 iv_ca_set_cp (data, ivs, use, best_cp);
4677 /* Get cost for assignment IVS. */
4679 static comp_cost
4680 iv_ca_cost (struct iv_ca *ivs)
4682 /* This was a conditional expression but it triggered a bug in
4683 Sun C 5.5. */
4684 if (ivs->bad_uses)
4685 return infinite_cost;
4686 else
4687 return ivs->cost;
4690 /* Returns true if all dependences of CP are among invariants in IVS. */
4692 static bool
4693 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4695 unsigned i;
4696 bitmap_iterator bi;
4698 if (!cp->depends_on)
4699 return true;
4701 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4703 if (ivs->n_invariant_uses[i] == 0)
4704 return false;
4707 return true;
4710 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4711 it before NEXT_CHANGE. */
4713 static struct iv_ca_delta *
4714 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4715 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4717 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4719 change->use = use;
4720 change->old_cp = old_cp;
4721 change->new_cp = new_cp;
4722 change->next_change = next_change;
4724 return change;
4727 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4728 are rewritten. */
4730 static struct iv_ca_delta *
4731 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4733 struct iv_ca_delta *last;
4735 if (!l2)
4736 return l1;
4738 if (!l1)
4739 return l2;
4741 for (last = l1; last->next_change; last = last->next_change)
4742 continue;
4743 last->next_change = l2;
4745 return l1;
4748 /* Returns candidate by that USE is expressed in IVS. */
4750 static struct cost_pair *
4751 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4753 return ivs->cand_for_use[use->id];
4756 /* Reverse the list of changes DELTA, forming the inverse to it. */
4758 static struct iv_ca_delta *
4759 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4761 struct iv_ca_delta *act, *next, *prev = NULL;
4762 struct cost_pair *tmp;
4764 for (act = delta; act; act = next)
4766 next = act->next_change;
4767 act->next_change = prev;
4768 prev = act;
4770 tmp = act->old_cp;
4771 act->old_cp = act->new_cp;
4772 act->new_cp = tmp;
4775 return prev;
4778 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4779 reverted instead. */
4781 static void
4782 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4783 struct iv_ca_delta *delta, bool forward)
4785 struct cost_pair *from, *to;
4786 struct iv_ca_delta *act;
4788 if (!forward)
4789 delta = iv_ca_delta_reverse (delta);
4791 for (act = delta; act; act = act->next_change)
4793 from = act->old_cp;
4794 to = act->new_cp;
4795 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4796 iv_ca_set_cp (data, ivs, act->use, to);
4799 if (!forward)
4800 iv_ca_delta_reverse (delta);
4803 /* Returns true if CAND is used in IVS. */
4805 static bool
4806 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4808 return ivs->n_cand_uses[cand->id] > 0;
4811 /* Returns number of induction variable candidates in the set IVS. */
4813 static unsigned
4814 iv_ca_n_cands (struct iv_ca *ivs)
4816 return ivs->n_cands;
4819 /* Free the list of changes DELTA. */
4821 static void
4822 iv_ca_delta_free (struct iv_ca_delta **delta)
4824 struct iv_ca_delta *act, *next;
4826 for (act = *delta; act; act = next)
4828 next = act->next_change;
4829 free (act);
4832 *delta = NULL;
4835 /* Allocates new iv candidates assignment. */
4837 static struct iv_ca *
4838 iv_ca_new (struct ivopts_data *data)
4840 struct iv_ca *nw = XNEW (struct iv_ca);
4842 nw->upto = 0;
4843 nw->bad_uses = 0;
4844 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4845 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4846 nw->cands = BITMAP_ALLOC (NULL);
4847 nw->n_cands = 0;
4848 nw->n_regs = 0;
4849 nw->cand_use_cost = zero_cost;
4850 nw->cand_cost = 0;
4851 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4852 nw->cost = zero_cost;
4854 return nw;
4857 /* Free memory occupied by the set IVS. */
4859 static void
4860 iv_ca_free (struct iv_ca **ivs)
4862 free ((*ivs)->cand_for_use);
4863 free ((*ivs)->n_cand_uses);
4864 BITMAP_FREE ((*ivs)->cands);
4865 free ((*ivs)->n_invariant_uses);
4866 free (*ivs);
4867 *ivs = NULL;
4870 /* Dumps IVS to FILE. */
4872 static void
4873 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4875 const char *pref = " invariants ";
4876 unsigned i;
4877 comp_cost cost = iv_ca_cost (ivs);
4879 fprintf (file, " cost %d (complexity %d)\n", cost.cost, cost.complexity);
4880 bitmap_print (file, ivs->cands, " candidates ","\n");
4882 for (i = 1; i <= data->max_inv_id; i++)
4883 if (ivs->n_invariant_uses[i])
4885 fprintf (file, "%s%d", pref, i);
4886 pref = ", ";
4888 fprintf (file, "\n");
4891 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4892 new set, and store differences in DELTA. Number of induction variables
4893 in the new set is stored to N_IVS. */
4895 static comp_cost
4896 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4897 struct iv_cand *cand, struct iv_ca_delta **delta,
4898 unsigned *n_ivs)
4900 unsigned i;
4901 comp_cost cost;
4902 struct iv_use *use;
4903 struct cost_pair *old_cp, *new_cp;
4905 *delta = NULL;
4906 for (i = 0; i < ivs->upto; i++)
4908 use = iv_use (data, i);
4909 old_cp = iv_ca_cand_for_use (ivs, use);
4911 if (old_cp
4912 && old_cp->cand == cand)
4913 continue;
4915 new_cp = get_use_iv_cost (data, use, cand);
4916 if (!new_cp)
4917 continue;
4919 if (!iv_ca_has_deps (ivs, new_cp))
4920 continue;
4922 if (!cheaper_cost_pair (new_cp, old_cp))
4923 continue;
4925 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4928 iv_ca_delta_commit (data, ivs, *delta, true);
4929 cost = iv_ca_cost (ivs);
4930 if (n_ivs)
4931 *n_ivs = iv_ca_n_cands (ivs);
4932 iv_ca_delta_commit (data, ivs, *delta, false);
4934 return cost;
4937 /* Try narrowing set IVS by removing CAND. Return the cost of
4938 the new set and store the differences in DELTA. */
4940 static comp_cost
4941 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4942 struct iv_cand *cand, struct iv_ca_delta **delta)
4944 unsigned i, ci;
4945 struct iv_use *use;
4946 struct cost_pair *old_cp, *new_cp, *cp;
4947 bitmap_iterator bi;
4948 struct iv_cand *cnd;
4949 comp_cost cost;
4951 *delta = NULL;
4952 for (i = 0; i < n_iv_uses (data); i++)
4954 use = iv_use (data, i);
4956 old_cp = iv_ca_cand_for_use (ivs, use);
4957 if (old_cp->cand != cand)
4958 continue;
4960 new_cp = NULL;
4962 if (data->consider_all_candidates)
4964 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4966 if (ci == cand->id)
4967 continue;
4969 cnd = iv_cand (data, ci);
4971 cp = get_use_iv_cost (data, use, cnd);
4972 if (!cp)
4973 continue;
4974 if (!iv_ca_has_deps (ivs, cp))
4975 continue;
4977 if (!cheaper_cost_pair (cp, new_cp))
4978 continue;
4980 new_cp = cp;
4983 else
4985 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4987 if (ci == cand->id)
4988 continue;
4990 cnd = iv_cand (data, ci);
4992 cp = get_use_iv_cost (data, use, cnd);
4993 if (!cp)
4994 continue;
4995 if (!iv_ca_has_deps (ivs, cp))
4996 continue;
4998 if (!cheaper_cost_pair (cp, new_cp))
4999 continue;
5001 new_cp = cp;
5005 if (!new_cp)
5007 iv_ca_delta_free (delta);
5008 return infinite_cost;
5011 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5014 iv_ca_delta_commit (data, ivs, *delta, true);
5015 cost = iv_ca_cost (ivs);
5016 iv_ca_delta_commit (data, ivs, *delta, false);
5018 return cost;
5021 /* Try optimizing the set of candidates IVS by removing candidates different
5022 from to EXCEPT_CAND from it. Return cost of the new set, and store
5023 differences in DELTA. */
5025 static comp_cost
5026 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5027 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5029 bitmap_iterator bi;
5030 struct iv_ca_delta *act_delta, *best_delta;
5031 unsigned i;
5032 comp_cost best_cost, acost;
5033 struct iv_cand *cand;
5035 best_delta = NULL;
5036 best_cost = iv_ca_cost (ivs);
5038 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5040 cand = iv_cand (data, i);
5042 if (cand == except_cand)
5043 continue;
5045 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5047 if (compare_costs (acost, best_cost) < 0)
5049 best_cost = acost;
5050 iv_ca_delta_free (&best_delta);
5051 best_delta = act_delta;
5053 else
5054 iv_ca_delta_free (&act_delta);
5057 if (!best_delta)
5059 *delta = NULL;
5060 return best_cost;
5063 /* Recurse to possibly remove other unnecessary ivs. */
5064 iv_ca_delta_commit (data, ivs, best_delta, true);
5065 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5066 iv_ca_delta_commit (data, ivs, best_delta, false);
5067 *delta = iv_ca_delta_join (best_delta, *delta);
5068 return best_cost;
5071 /* Tries to extend the sets IVS in the best possible way in order
5072 to express the USE. If ORIGINALP is true, prefer candidates from
5073 the original set of IVs, otherwise favor important candidates not
5074 based on any memory object. */
5076 static bool
5077 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5078 struct iv_use *use, bool originalp)
5080 comp_cost best_cost, act_cost;
5081 unsigned i;
5082 bitmap_iterator bi;
5083 struct iv_cand *cand;
5084 struct iv_ca_delta *best_delta = NULL, *act_delta;
5085 struct cost_pair *cp;
5087 iv_ca_add_use (data, ivs, use);
5088 best_cost = iv_ca_cost (ivs);
5090 cp = iv_ca_cand_for_use (ivs, use);
5091 if (cp)
5093 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5094 iv_ca_set_no_cp (data, ivs, use);
5097 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5098 first try important candidates not based on any memory object. Only if
5099 this fails, try the specific ones. Rationale -- in loops with many
5100 variables the best choice often is to use just one generic biv. If we
5101 added here many ivs specific to the uses, the optimization algorithm later
5102 would be likely to get stuck in a local minimum, thus causing us to create
5103 too many ivs. The approach from few ivs to more seems more likely to be
5104 successful -- starting from few ivs, replacing an expensive use by a
5105 specific iv should always be a win. */
5106 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5108 cand = iv_cand (data, i);
5110 if (originalp && cand->pos !=IP_ORIGINAL)
5111 continue;
5113 if (!originalp && cand->iv->base_object != NULL_TREE)
5114 continue;
5116 if (iv_ca_cand_used_p (ivs, cand))
5117 continue;
5119 cp = get_use_iv_cost (data, use, cand);
5120 if (!cp)
5121 continue;
5123 iv_ca_set_cp (data, ivs, use, cp);
5124 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5125 iv_ca_set_no_cp (data, ivs, use);
5126 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5128 if (compare_costs (act_cost, best_cost) < 0)
5130 best_cost = act_cost;
5132 iv_ca_delta_free (&best_delta);
5133 best_delta = act_delta;
5135 else
5136 iv_ca_delta_free (&act_delta);
5139 if (infinite_cost_p (best_cost))
5141 for (i = 0; i < use->n_map_members; i++)
5143 cp = use->cost_map + i;
5144 cand = cp->cand;
5145 if (!cand)
5146 continue;
5148 /* Already tried this. */
5149 if (cand->important)
5151 if (originalp && cand->pos == IP_ORIGINAL)
5152 continue;
5153 if (!originalp && cand->iv->base_object == NULL_TREE)
5154 continue;
5157 if (iv_ca_cand_used_p (ivs, cand))
5158 continue;
5160 act_delta = NULL;
5161 iv_ca_set_cp (data, ivs, use, cp);
5162 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5163 iv_ca_set_no_cp (data, ivs, use);
5164 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5165 cp, act_delta);
5167 if (compare_costs (act_cost, best_cost) < 0)
5169 best_cost = act_cost;
5171 if (best_delta)
5172 iv_ca_delta_free (&best_delta);
5173 best_delta = act_delta;
5175 else
5176 iv_ca_delta_free (&act_delta);
5180 iv_ca_delta_commit (data, ivs, best_delta, true);
5181 iv_ca_delta_free (&best_delta);
5183 return !infinite_cost_p (best_cost);
5186 /* Finds an initial assignment of candidates to uses. */
5188 static struct iv_ca *
5189 get_initial_solution (struct ivopts_data *data, bool originalp)
5191 struct iv_ca *ivs = iv_ca_new (data);
5192 unsigned i;
5194 for (i = 0; i < n_iv_uses (data); i++)
5195 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5197 iv_ca_free (&ivs);
5198 return NULL;
5201 return ivs;
5204 /* Tries to improve set of induction variables IVS. */
5206 static bool
5207 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5209 unsigned i, n_ivs;
5210 comp_cost acost, best_cost = iv_ca_cost (ivs);
5211 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5212 struct iv_cand *cand;
5214 /* Try extending the set of induction variables by one. */
5215 for (i = 0; i < n_iv_cands (data); i++)
5217 cand = iv_cand (data, i);
5219 if (iv_ca_cand_used_p (ivs, cand))
5220 continue;
5222 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5223 if (!act_delta)
5224 continue;
5226 /* If we successfully added the candidate and the set is small enough,
5227 try optimizing it by removing other candidates. */
5228 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5230 iv_ca_delta_commit (data, ivs, act_delta, true);
5231 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5232 iv_ca_delta_commit (data, ivs, act_delta, false);
5233 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5236 if (compare_costs (acost, best_cost) < 0)
5238 best_cost = acost;
5239 iv_ca_delta_free (&best_delta);
5240 best_delta = act_delta;
5242 else
5243 iv_ca_delta_free (&act_delta);
5246 if (!best_delta)
5248 /* Try removing the candidates from the set instead. */
5249 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5251 /* Nothing more we can do. */
5252 if (!best_delta)
5253 return false;
5256 iv_ca_delta_commit (data, ivs, best_delta, true);
5257 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5258 iv_ca_delta_free (&best_delta);
5259 return true;
5262 /* Attempts to find the optimal set of induction variables. We do simple
5263 greedy heuristic -- we try to replace at most one candidate in the selected
5264 solution and remove the unused ivs while this improves the cost. */
5266 static struct iv_ca *
5267 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
5269 struct iv_ca *set;
5271 /* Get the initial solution. */
5272 set = get_initial_solution (data, originalp);
5273 if (!set)
5275 if (dump_file && (dump_flags & TDF_DETAILS))
5276 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5277 return NULL;
5280 if (dump_file && (dump_flags & TDF_DETAILS))
5282 fprintf (dump_file, "Initial set of candidates:\n");
5283 iv_ca_dump (data, dump_file, set);
5286 while (try_improve_iv_set (data, set))
5288 if (dump_file && (dump_flags & TDF_DETAILS))
5290 fprintf (dump_file, "Improved to:\n");
5291 iv_ca_dump (data, dump_file, set);
5295 return set;
5298 static struct iv_ca *
5299 find_optimal_iv_set (struct ivopts_data *data)
5301 unsigned i;
5302 struct iv_ca *set, *origset;
5303 struct iv_use *use;
5304 comp_cost cost, origcost;
5306 /* Determine the cost based on a strategy that starts with original IVs,
5307 and try again using a strategy that prefers candidates not based
5308 on any IVs. */
5309 origset = find_optimal_iv_set_1 (data, true);
5310 set = find_optimal_iv_set_1 (data, false);
5312 if (!origset && !set)
5313 return NULL;
5315 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
5316 cost = set ? iv_ca_cost (set) : infinite_cost;
5318 if (dump_file && (dump_flags & TDF_DETAILS))
5320 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
5321 origcost.cost, origcost.complexity);
5322 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
5323 cost.cost, cost.complexity);
5326 /* Choose the one with the best cost. */
5327 if (compare_costs (origcost, cost) <= 0)
5329 if (set)
5330 iv_ca_free (&set);
5331 set = origset;
5333 else if (origset)
5334 iv_ca_free (&origset);
5336 for (i = 0; i < n_iv_uses (data); i++)
5338 use = iv_use (data, i);
5339 use->selected = iv_ca_cand_for_use (set, use)->cand;
5342 return set;
5345 /* Creates a new induction variable corresponding to CAND. */
5347 static void
5348 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5350 gimple_stmt_iterator incr_pos;
5351 tree base;
5352 bool after = false;
5354 if (!cand->iv)
5355 return;
5357 switch (cand->pos)
5359 case IP_NORMAL:
5360 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
5361 break;
5363 case IP_END:
5364 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
5365 after = true;
5366 break;
5368 case IP_AFTER_USE:
5369 after = true;
5370 /* fall through */
5371 case IP_BEFORE_USE:
5372 incr_pos = gsi_for_stmt (cand->incremented_at);
5373 break;
5375 case IP_ORIGINAL:
5376 /* Mark that the iv is preserved. */
5377 name_info (data, cand->var_before)->preserve_biv = true;
5378 name_info (data, cand->var_after)->preserve_biv = true;
5380 /* Rewrite the increment so that it uses var_before directly. */
5381 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5383 return;
5386 gimple_add_tmp_var (cand->var_before);
5387 add_referenced_var (cand->var_before);
5389 base = unshare_expr (cand->iv->base);
5391 create_iv (base, unshare_expr (cand->iv->step),
5392 cand->var_before, data->current_loop,
5393 &incr_pos, after, &cand->var_before, &cand->var_after);
5396 /* Creates new induction variables described in SET. */
5398 static void
5399 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5401 unsigned i;
5402 struct iv_cand *cand;
5403 bitmap_iterator bi;
5405 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5407 cand = iv_cand (data, i);
5408 create_new_iv (data, cand);
5413 /* Rewrites USE (definition of iv used in a nonlinear expression)
5414 using candidate CAND. */
5416 static void
5417 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5418 struct iv_use *use, struct iv_cand *cand)
5420 tree comp;
5421 tree op, tgt;
5422 gimple ass;
5423 gimple_stmt_iterator bsi;
5425 /* An important special case -- if we are asked to express value of
5426 the original iv by itself, just exit; there is no need to
5427 introduce a new computation (that might also need casting the
5428 variable to unsigned and back). */
5429 if (cand->pos == IP_ORIGINAL
5430 && cand->incremented_at == use->stmt)
5432 tree step, ctype, utype;
5433 enum tree_code incr_code = PLUS_EXPR, old_code;
5435 gcc_assert (is_gimple_assign (use->stmt));
5436 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
5438 step = cand->iv->step;
5439 ctype = TREE_TYPE (step);
5440 utype = TREE_TYPE (cand->var_after);
5441 if (TREE_CODE (step) == NEGATE_EXPR)
5443 incr_code = MINUS_EXPR;
5444 step = TREE_OPERAND (step, 0);
5447 /* Check whether we may leave the computation unchanged.
5448 This is the case only if it does not rely on other
5449 computations in the loop -- otherwise, the computation
5450 we rely upon may be removed in remove_unused_ivs,
5451 thus leading to ICE. */
5452 old_code = gimple_assign_rhs_code (use->stmt);
5453 if (old_code == PLUS_EXPR
5454 || old_code == MINUS_EXPR
5455 || old_code == POINTER_PLUS_EXPR)
5457 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
5458 op = gimple_assign_rhs2 (use->stmt);
5459 else if (old_code != MINUS_EXPR
5460 && gimple_assign_rhs2 (use->stmt) == cand->var_before)
5461 op = gimple_assign_rhs1 (use->stmt);
5462 else
5463 op = NULL_TREE;
5465 else
5466 op = NULL_TREE;
5468 if (op
5469 && (TREE_CODE (op) == INTEGER_CST
5470 || operand_equal_p (op, step, 0)))
5471 return;
5473 /* Otherwise, add the necessary computations to express
5474 the iv. */
5475 op = fold_convert (ctype, cand->var_before);
5476 comp = fold_convert (utype,
5477 build2 (incr_code, ctype, op,
5478 unshare_expr (step)));
5480 else
5482 comp = get_computation (data->current_loop, use, cand);
5483 gcc_assert (comp != NULL_TREE);
5486 switch (gimple_code (use->stmt))
5488 case GIMPLE_PHI:
5489 tgt = PHI_RESULT (use->stmt);
5491 /* If we should keep the biv, do not replace it. */
5492 if (name_info (data, tgt)->preserve_biv)
5493 return;
5495 bsi = gsi_after_labels (gimple_bb (use->stmt));
5496 break;
5498 case GIMPLE_ASSIGN:
5499 tgt = gimple_assign_lhs (use->stmt);
5500 bsi = gsi_for_stmt (use->stmt);
5501 break;
5503 default:
5504 gcc_unreachable ();
5507 if (!valid_gimple_rhs_p (comp)
5508 || (gimple_code (use->stmt) != GIMPLE_PHI
5509 /* We can't allow re-allocating the stmt as it might be pointed
5510 to still. */
5511 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
5512 >= gimple_num_ops (gsi_stmt (bsi)))))
5514 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
5515 true, GSI_SAME_STMT);
5516 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
5517 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
5520 if (gimple_code (use->stmt) == GIMPLE_PHI)
5522 ass = gimple_build_assign (tgt, comp);
5523 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
5525 bsi = gsi_for_stmt (use->stmt);
5526 remove_phi_node (&bsi, false);
5528 else
5530 gimple_assign_set_rhs_from_tree (&bsi, comp);
5531 use->stmt = gsi_stmt (bsi);
5535 /* Replaces ssa name in index IDX by its basic variable. Callback for
5536 for_each_index. */
5538 static bool
5539 idx_remove_ssa_names (tree base, tree *idx,
5540 void *data ATTRIBUTE_UNUSED)
5542 tree *op;
5544 if (TREE_CODE (*idx) == SSA_NAME)
5545 *idx = SSA_NAME_VAR (*idx);
5547 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
5549 op = &TREE_OPERAND (base, 2);
5550 if (*op
5551 && TREE_CODE (*op) == SSA_NAME)
5552 *op = SSA_NAME_VAR (*op);
5553 op = &TREE_OPERAND (base, 3);
5554 if (*op
5555 && TREE_CODE (*op) == SSA_NAME)
5556 *op = SSA_NAME_VAR (*op);
5559 return true;
5562 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5564 static tree
5565 unshare_and_remove_ssa_names (tree ref)
5567 ref = unshare_expr (ref);
5568 for_each_index (&ref, idx_remove_ssa_names, NULL);
5570 return ref;
5573 /* Copies the reference information from OLD_REF to NEW_REF. */
5575 static void
5576 copy_ref_info (tree new_ref, tree old_ref)
5578 tree new_ptr_base = NULL_TREE;
5580 if (TREE_CODE (old_ref) == TARGET_MEM_REF
5581 && TREE_CODE (new_ref) == TARGET_MEM_REF)
5582 TMR_ORIGINAL (new_ref) = TMR_ORIGINAL (old_ref);
5583 else if (TREE_CODE (new_ref) == TARGET_MEM_REF)
5584 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5586 TREE_SIDE_EFFECTS (new_ref) = TREE_SIDE_EFFECTS (old_ref);
5587 TREE_THIS_VOLATILE (new_ref) = TREE_THIS_VOLATILE (old_ref);
5589 if (TREE_CODE (new_ref) == TARGET_MEM_REF)
5590 new_ptr_base = TMR_BASE (new_ref);
5591 else if (TREE_CODE (new_ref) == MEM_REF)
5592 new_ptr_base = TREE_OPERAND (new_ref, 0);
5594 /* We can transfer points-to information from an old pointer
5595 or decl base to the new one. */
5596 if (new_ptr_base
5597 && TREE_CODE (new_ptr_base) == SSA_NAME
5598 && POINTER_TYPE_P (TREE_TYPE (new_ptr_base))
5599 && !SSA_NAME_PTR_INFO (new_ptr_base))
5601 tree base = get_base_address (old_ref);
5602 if ((INDIRECT_REF_P (base)
5603 || TREE_CODE (base) == MEM_REF)
5604 && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME)
5605 duplicate_ssa_name_ptr_info
5606 (new_ptr_base, SSA_NAME_PTR_INFO (TREE_OPERAND (base, 0)));
5607 else if (TREE_CODE (base) == VAR_DECL
5608 || TREE_CODE (base) == PARM_DECL
5609 || TREE_CODE (base) == RESULT_DECL)
5611 struct ptr_info_def *pi = get_ptr_info (new_ptr_base);
5612 pt_solution_set_var (&pi->pt, base);
5617 /* Rewrites USE (address that is an iv) using candidate CAND. */
5619 static void
5620 rewrite_use_address (struct ivopts_data *data,
5621 struct iv_use *use, struct iv_cand *cand)
5623 aff_tree aff;
5624 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5625 tree base_hint = NULL_TREE;
5626 tree ref;
5627 bool ok;
5629 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5630 gcc_assert (ok);
5631 unshare_aff_combination (&aff);
5633 /* To avoid undefined overflow problems, all IV candidates use unsigned
5634 integer types. The drawback is that this makes it impossible for
5635 create_mem_ref to distinguish an IV that is based on a memory object
5636 from one that represents simply an offset.
5638 To work around this problem, we pass a hint to create_mem_ref that
5639 indicates which variable (if any) in aff is an IV based on a memory
5640 object. Note that we only consider the candidate. If this is not
5641 based on an object, the base of the reference is in some subexpression
5642 of the use -- but these will use pointer types, so they are recognized
5643 by the create_mem_ref heuristics anyway. */
5644 if (cand->iv->base_object)
5645 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
5647 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p),
5648 reference_alias_ptr_type (*use->op_p),
5649 &aff, base_hint, data->speed);
5650 copy_ref_info (ref, *use->op_p);
5651 *use->op_p = ref;
5654 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5655 candidate CAND. */
5657 static void
5658 rewrite_use_compare (struct ivopts_data *data,
5659 struct iv_use *use, struct iv_cand *cand)
5661 tree comp, *var_p, op, bound;
5662 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5663 enum tree_code compare;
5664 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5665 bool ok;
5667 bound = cp->value;
5668 if (bound)
5670 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5671 tree var_type = TREE_TYPE (var);
5672 gimple_seq stmts;
5674 compare = iv_elimination_compare (data, use);
5675 bound = unshare_expr (fold_convert (var_type, bound));
5676 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
5677 if (stmts)
5678 gsi_insert_seq_on_edge_immediate (
5679 loop_preheader_edge (data->current_loop),
5680 stmts);
5682 gimple_cond_set_lhs (use->stmt, var);
5683 gimple_cond_set_code (use->stmt, compare);
5684 gimple_cond_set_rhs (use->stmt, op);
5685 return;
5688 /* The induction variable elimination failed; just express the original
5689 giv. */
5690 comp = get_computation (data->current_loop, use, cand);
5691 gcc_assert (comp != NULL_TREE);
5693 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
5694 gcc_assert (ok);
5696 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
5697 true, GSI_SAME_STMT);
5700 /* Rewrites USE using candidate CAND. */
5702 static void
5703 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5705 switch (use->type)
5707 case USE_NONLINEAR_EXPR:
5708 rewrite_use_nonlinear_expr (data, use, cand);
5709 break;
5711 case USE_ADDRESS:
5712 rewrite_use_address (data, use, cand);
5713 break;
5715 case USE_COMPARE:
5716 rewrite_use_compare (data, use, cand);
5717 break;
5719 default:
5720 gcc_unreachable ();
5723 update_stmt (use->stmt);
5726 /* Rewrite the uses using the selected induction variables. */
5728 static void
5729 rewrite_uses (struct ivopts_data *data)
5731 unsigned i;
5732 struct iv_cand *cand;
5733 struct iv_use *use;
5735 for (i = 0; i < n_iv_uses (data); i++)
5737 use = iv_use (data, i);
5738 cand = use->selected;
5739 gcc_assert (cand);
5741 rewrite_use (data, use, cand);
5745 /* Removes the ivs that are not used after rewriting. */
5747 static void
5748 remove_unused_ivs (struct ivopts_data *data)
5750 unsigned j;
5751 bitmap_iterator bi;
5752 bitmap toremove = BITMAP_ALLOC (NULL);
5754 /* Figure out an order in which to release SSA DEFs so that we don't
5755 release something that we'd have to propagate into a debug stmt
5756 afterwards. */
5757 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5759 struct version_info *info;
5761 info = ver_info (data, j);
5762 if (info->iv
5763 && !integer_zerop (info->iv->step)
5764 && !info->inv_id
5765 && !info->iv->have_use_for
5766 && !info->preserve_biv)
5767 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
5770 release_defs_bitset (toremove);
5772 BITMAP_FREE (toremove);
5775 /* Frees data allocated by the optimization of a single loop. */
5777 static void
5778 free_loop_data (struct ivopts_data *data)
5780 unsigned i, j;
5781 bitmap_iterator bi;
5782 tree obj;
5784 if (data->niters)
5786 pointer_map_destroy (data->niters);
5787 data->niters = NULL;
5790 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5792 struct version_info *info;
5794 info = ver_info (data, i);
5795 if (info->iv)
5796 free (info->iv);
5797 info->iv = NULL;
5798 info->has_nonlin_use = false;
5799 info->preserve_biv = false;
5800 info->inv_id = 0;
5802 bitmap_clear (data->relevant);
5803 bitmap_clear (data->important_candidates);
5805 for (i = 0; i < n_iv_uses (data); i++)
5807 struct iv_use *use = iv_use (data, i);
5809 free (use->iv);
5810 BITMAP_FREE (use->related_cands);
5811 for (j = 0; j < use->n_map_members; j++)
5812 if (use->cost_map[j].depends_on)
5813 BITMAP_FREE (use->cost_map[j].depends_on);
5814 free (use->cost_map);
5815 free (use);
5817 VEC_truncate (iv_use_p, data->iv_uses, 0);
5819 for (i = 0; i < n_iv_cands (data); i++)
5821 struct iv_cand *cand = iv_cand (data, i);
5823 if (cand->iv)
5824 free (cand->iv);
5825 if (cand->depends_on)
5826 BITMAP_FREE (cand->depends_on);
5827 free (cand);
5829 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5831 if (data->version_info_size < num_ssa_names)
5833 data->version_info_size = 2 * num_ssa_names;
5834 free (data->version_info);
5835 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5838 data->max_inv_id = 0;
5840 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5841 SET_DECL_RTL (obj, NULL_RTX);
5843 VEC_truncate (tree, decl_rtl_to_reset, 0);
5846 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5847 loop tree. */
5849 static void
5850 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5852 free_loop_data (data);
5853 free (data->version_info);
5854 BITMAP_FREE (data->relevant);
5855 BITMAP_FREE (data->important_candidates);
5857 VEC_free (tree, heap, decl_rtl_to_reset);
5858 VEC_free (iv_use_p, heap, data->iv_uses);
5859 VEC_free (iv_cand_p, heap, data->iv_candidates);
5862 /* Optimizes the LOOP. Returns true if anything changed. */
5864 static bool
5865 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5867 bool changed = false;
5868 struct iv_ca *iv_ca;
5869 edge exit;
5870 basic_block *body;
5872 gcc_assert (!data->niters);
5873 data->current_loop = loop;
5874 data->speed = optimize_loop_for_speed_p (loop);
5876 if (dump_file && (dump_flags & TDF_DETAILS))
5878 fprintf (dump_file, "Processing loop %d\n", loop->num);
5880 exit = single_dom_exit (loop);
5881 if (exit)
5883 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5884 exit->src->index, exit->dest->index);
5885 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
5886 fprintf (dump_file, "\n");
5889 fprintf (dump_file, "\n");
5892 body = get_loop_body (loop);
5893 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
5894 free (body);
5896 /* For each ssa name determines whether it behaves as an induction variable
5897 in some loop. */
5898 if (!find_induction_variables (data))
5899 goto finish;
5901 /* Finds interesting uses (item 1). */
5902 find_interesting_uses (data);
5903 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5904 goto finish;
5906 /* Finds candidates for the induction variables (item 2). */
5907 find_iv_candidates (data);
5909 /* Calculates the costs (item 3, part 1). */
5910 determine_iv_costs (data);
5911 determine_use_iv_costs (data);
5912 determine_set_costs (data);
5914 /* Find the optimal set of induction variables (item 3, part 2). */
5915 iv_ca = find_optimal_iv_set (data);
5916 if (!iv_ca)
5917 goto finish;
5918 changed = true;
5920 /* Create the new induction variables (item 4, part 1). */
5921 create_new_ivs (data, iv_ca);
5922 iv_ca_free (&iv_ca);
5924 /* Rewrite the uses (item 4, part 2). */
5925 rewrite_uses (data);
5927 /* Remove the ivs that are unused after rewriting. */
5928 remove_unused_ivs (data);
5930 /* We have changed the structure of induction variables; it might happen
5931 that definitions in the scev database refer to some of them that were
5932 eliminated. */
5933 scev_reset ();
5935 finish:
5936 free_loop_data (data);
5938 return changed;
5941 /* Main entry point. Optimizes induction variables in loops. */
5943 void
5944 tree_ssa_iv_optimize (void)
5946 struct loop *loop;
5947 struct ivopts_data data;
5948 loop_iterator li;
5950 tree_ssa_iv_optimize_init (&data);
5952 /* Optimize the loops starting with the innermost ones. */
5953 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5955 if (dump_file && (dump_flags & TDF_DETAILS))
5956 flow_loop_dump (loop, dump_file, NULL, 1);
5958 tree_ssa_iv_optimize_loop (&data, loop);
5961 tree_ssa_iv_optimize_finalize (&data);