Merge trunk version 193672 into gupc branch.
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blobed1317080ea9e6f85a14530ed14c302380a425ee
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "tm_p.h"
71 #include "basic-block.h"
72 #include "gimple-pretty-print.h"
73 #include "tree-flow.h"
74 #include "cfgloop.h"
75 #include "tree-pass.h"
76 #include "ggc.h"
77 #include "insn-config.h"
78 #include "pointer-set.h"
79 #include "hashtab.h"
80 #include "tree-chrec.h"
81 #include "tree-scalar-evolution.h"
82 #include "cfgloop.h"
83 #include "params.h"
84 #include "langhooks.h"
85 #include "tree-affine.h"
86 #include "target.h"
87 #include "tree-inline.h"
88 #include "tree-ssa-propagate.h"
89 #include "expmed.h"
91 /* FIXME: Expressions are expanded to RTL in this pass to determine the
92 cost of different addressing modes. This should be moved to a TBD
93 interface between the GIMPLE and RTL worlds. */
94 #include "expr.h"
95 #include "recog.h"
97 /* The infinite cost. */
98 #define INFTY 10000000
100 #define AVG_LOOP_NITER(LOOP) 5
102 /* Returns the expected number of loop iterations for LOOP.
103 The average trip count is computed from profile data if it
104 exists. */
106 static inline HOST_WIDE_INT
107 avg_loop_niter (struct loop *loop)
109 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
110 if (niter == -1)
111 return AVG_LOOP_NITER (loop);
113 return niter;
116 /* Representation of the induction variable. */
117 struct iv
119 tree base; /* Initial value of the iv. */
120 tree base_object; /* A memory object to that the induction variable points. */
121 tree step; /* Step of the iv (constant only). */
122 tree ssa_name; /* The ssa name with the value. */
123 bool biv_p; /* Is it a biv? */
124 bool have_use_for; /* Do we already have a use for it? */
125 unsigned use_id; /* The identifier in the use if it is the case. */
128 /* Per-ssa version information (induction variable descriptions, etc.). */
129 struct version_info
131 tree name; /* The ssa name. */
132 struct iv *iv; /* Induction variable description. */
133 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
134 an expression that is not an induction variable. */
135 bool preserve_biv; /* For the original biv, whether to preserve it. */
136 unsigned inv_id; /* Id of an invariant. */
139 /* Types of uses. */
140 enum use_type
142 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
143 USE_ADDRESS, /* Use in an address. */
144 USE_COMPARE /* Use is a compare. */
147 /* Cost of a computation. */
148 typedef struct
150 int cost; /* The runtime cost. */
151 unsigned complexity; /* The estimate of the complexity of the code for
152 the computation (in no concrete units --
153 complexity field should be larger for more
154 complex expressions and addressing modes). */
155 } comp_cost;
157 static const comp_cost no_cost = {0, 0};
158 static const comp_cost infinite_cost = {INFTY, INFTY};
160 /* The candidate - cost pair. */
161 struct cost_pair
163 struct iv_cand *cand; /* The candidate. */
164 comp_cost cost; /* The cost. */
165 bitmap depends_on; /* The list of invariants that have to be
166 preserved. */
167 tree value; /* For final value elimination, the expression for
168 the final value of the iv. For iv elimination,
169 the new bound to compare with. */
170 enum tree_code comp; /* For iv elimination, the comparison. */
171 int inv_expr_id; /* Loop invariant expression id. */
174 /* Use. */
175 struct iv_use
177 unsigned id; /* The id of the use. */
178 enum use_type type; /* Type of the use. */
179 struct iv *iv; /* The induction variable it is based on. */
180 gimple stmt; /* Statement in that it occurs. */
181 tree *op_p; /* The place where it occurs. */
182 bitmap related_cands; /* The set of "related" iv candidates, plus the common
183 important ones. */
185 unsigned n_map_members; /* Number of candidates in the cost_map list. */
186 struct cost_pair *cost_map;
187 /* The costs wrto the iv candidates. */
189 struct iv_cand *selected;
190 /* The selected candidate. */
193 /* The position where the iv is computed. */
194 enum iv_position
196 IP_NORMAL, /* At the end, just before the exit condition. */
197 IP_END, /* At the end of the latch block. */
198 IP_BEFORE_USE, /* Immediately before a specific use. */
199 IP_AFTER_USE, /* Immediately after a specific use. */
200 IP_ORIGINAL /* The original biv. */
203 /* The induction variable candidate. */
204 struct iv_cand
206 unsigned id; /* The number of the candidate. */
207 bool important; /* Whether this is an "important" candidate, i.e. such
208 that it should be considered by all uses. */
209 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
210 gimple incremented_at;/* For original biv, the statement where it is
211 incremented. */
212 tree var_before; /* The variable used for it before increment. */
213 tree var_after; /* The variable used for it after increment. */
214 struct iv *iv; /* The value of the candidate. NULL for
215 "pseudocandidate" used to indicate the possibility
216 to replace the final value of an iv by direct
217 computation of the value. */
218 unsigned cost; /* Cost of the candidate. */
219 unsigned cost_step; /* Cost of the candidate's increment operation. */
220 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
221 where it is incremented. */
222 bitmap depends_on; /* The list of invariants that are used in step of the
223 biv. */
226 /* Loop invariant expression hashtable entry. */
227 struct iv_inv_expr_ent
229 tree expr;
230 int id;
231 hashval_t hash;
234 /* The data used by the induction variable optimizations. */
236 typedef struct iv_use *iv_use_p;
238 typedef struct iv_cand *iv_cand_p;
240 struct ivopts_data
242 /* The currently optimized loop. */
243 struct loop *current_loop;
245 /* Numbers of iterations for all exits of the current loop. */
246 struct pointer_map_t *niters;
248 /* Number of registers used in it. */
249 unsigned regs_used;
251 /* The size of version_info array allocated. */
252 unsigned version_info_size;
254 /* The array of information for the ssa names. */
255 struct version_info *version_info;
257 /* The hashtable of loop invariant expressions created
258 by ivopt. */
259 htab_t inv_expr_tab;
261 /* Loop invariant expression id. */
262 int inv_expr_id;
264 /* The bitmap of indices in version_info whose value was changed. */
265 bitmap relevant;
267 /* The uses of induction variables. */
268 vec<iv_use_p> iv_uses;
270 /* The candidates. */
271 vec<iv_cand_p> iv_candidates;
273 /* A bitmap of important candidates. */
274 bitmap important_candidates;
276 /* The maximum invariant id. */
277 unsigned max_inv_id;
279 /* Whether to consider just related and important candidates when replacing a
280 use. */
281 bool consider_all_candidates;
283 /* Are we optimizing for speed? */
284 bool speed;
286 /* Whether the loop body includes any function calls. */
287 bool body_includes_call;
289 /* Whether the loop body can only be exited via single exit. */
290 bool loop_single_exit_p;
293 /* An assignment of iv candidates to uses. */
295 struct iv_ca
297 /* The number of uses covered by the assignment. */
298 unsigned upto;
300 /* Number of uses that cannot be expressed by the candidates in the set. */
301 unsigned bad_uses;
303 /* Candidate assigned to a use, together with the related costs. */
304 struct cost_pair **cand_for_use;
306 /* Number of times each candidate is used. */
307 unsigned *n_cand_uses;
309 /* The candidates used. */
310 bitmap cands;
312 /* The number of candidates in the set. */
313 unsigned n_cands;
315 /* Total number of registers needed. */
316 unsigned n_regs;
318 /* Total cost of expressing uses. */
319 comp_cost cand_use_cost;
321 /* Total cost of candidates. */
322 unsigned cand_cost;
324 /* Number of times each invariant is used. */
325 unsigned *n_invariant_uses;
327 /* The array holding the number of uses of each loop
328 invariant expressions created by ivopt. */
329 unsigned *used_inv_expr;
331 /* The number of created loop invariants. */
332 unsigned num_used_inv_expr;
334 /* Total cost of the assignment. */
335 comp_cost cost;
338 /* Difference of two iv candidate assignments. */
340 struct iv_ca_delta
342 /* Changed use. */
343 struct iv_use *use;
345 /* An old assignment (for rollback purposes). */
346 struct cost_pair *old_cp;
348 /* A new assignment. */
349 struct cost_pair *new_cp;
351 /* Next change in the list. */
352 struct iv_ca_delta *next_change;
355 /* Bound on number of candidates below that all candidates are considered. */
357 #define CONSIDER_ALL_CANDIDATES_BOUND \
358 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
360 /* If there are more iv occurrences, we just give up (it is quite unlikely that
361 optimizing such a loop would help, and it would take ages). */
363 #define MAX_CONSIDERED_USES \
364 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
366 /* If there are at most this number of ivs in the set, try removing unnecessary
367 ivs from the set always. */
369 #define ALWAYS_PRUNE_CAND_SET_BOUND \
370 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
372 /* The list of trees for that the decl_rtl field must be reset is stored
373 here. */
375 static vec<tree> decl_rtl_to_reset;
377 static comp_cost force_expr_to_var_cost (tree, bool);
379 /* Number of uses recorded in DATA. */
381 static inline unsigned
382 n_iv_uses (struct ivopts_data *data)
384 return data->iv_uses.length ();
387 /* Ith use recorded in DATA. */
389 static inline struct iv_use *
390 iv_use (struct ivopts_data *data, unsigned i)
392 return data->iv_uses[i];
395 /* Number of candidates recorded in DATA. */
397 static inline unsigned
398 n_iv_cands (struct ivopts_data *data)
400 return data->iv_candidates.length ();
403 /* Ith candidate recorded in DATA. */
405 static inline struct iv_cand *
406 iv_cand (struct ivopts_data *data, unsigned i)
408 return data->iv_candidates[i];
411 /* The single loop exit if it dominates the latch, NULL otherwise. */
413 edge
414 single_dom_exit (struct loop *loop)
416 edge exit = single_exit (loop);
418 if (!exit)
419 return NULL;
421 if (!just_once_each_iteration_p (loop, exit->src))
422 return NULL;
424 return exit;
427 /* Dumps information about the induction variable IV to FILE. */
429 extern void dump_iv (FILE *, struct iv *);
430 void
431 dump_iv (FILE *file, struct iv *iv)
433 if (iv->ssa_name)
435 fprintf (file, "ssa name ");
436 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
437 fprintf (file, "\n");
440 fprintf (file, " type ");
441 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
442 fprintf (file, "\n");
444 if (iv->step)
446 fprintf (file, " base ");
447 print_generic_expr (file, iv->base, TDF_SLIM);
448 fprintf (file, "\n");
450 fprintf (file, " step ");
451 print_generic_expr (file, iv->step, TDF_SLIM);
452 fprintf (file, "\n");
454 else
456 fprintf (file, " invariant ");
457 print_generic_expr (file, iv->base, TDF_SLIM);
458 fprintf (file, "\n");
461 if (iv->base_object)
463 fprintf (file, " base object ");
464 print_generic_expr (file, iv->base_object, TDF_SLIM);
465 fprintf (file, "\n");
468 if (iv->biv_p)
469 fprintf (file, " is a biv\n");
472 /* Dumps information about the USE to FILE. */
474 extern void dump_use (FILE *, struct iv_use *);
475 void
476 dump_use (FILE *file, struct iv_use *use)
478 fprintf (file, "use %d\n", use->id);
480 switch (use->type)
482 case USE_NONLINEAR_EXPR:
483 fprintf (file, " generic\n");
484 break;
486 case USE_ADDRESS:
487 fprintf (file, " address\n");
488 break;
490 case USE_COMPARE:
491 fprintf (file, " compare\n");
492 break;
494 default:
495 gcc_unreachable ();
498 fprintf (file, " in statement ");
499 print_gimple_stmt (file, use->stmt, 0, 0);
500 fprintf (file, "\n");
502 fprintf (file, " at position ");
503 if (use->op_p)
504 print_generic_expr (file, *use->op_p, TDF_SLIM);
505 fprintf (file, "\n");
507 dump_iv (file, use->iv);
509 if (use->related_cands)
511 fprintf (file, " related candidates ");
512 dump_bitmap (file, use->related_cands);
516 /* Dumps information about the uses to FILE. */
518 extern void dump_uses (FILE *, struct ivopts_data *);
519 void
520 dump_uses (FILE *file, struct ivopts_data *data)
522 unsigned i;
523 struct iv_use *use;
525 for (i = 0; i < n_iv_uses (data); i++)
527 use = iv_use (data, i);
529 dump_use (file, use);
530 fprintf (file, "\n");
534 /* Dumps information about induction variable candidate CAND to FILE. */
536 extern void dump_cand (FILE *, struct iv_cand *);
537 void
538 dump_cand (FILE *file, struct iv_cand *cand)
540 struct iv *iv = cand->iv;
542 fprintf (file, "candidate %d%s\n",
543 cand->id, cand->important ? " (important)" : "");
545 if (cand->depends_on)
547 fprintf (file, " depends on ");
548 dump_bitmap (file, cand->depends_on);
551 if (!iv)
553 fprintf (file, " final value replacement\n");
554 return;
557 if (cand->var_before)
559 fprintf (file, " var_before ");
560 print_generic_expr (file, cand->var_before, TDF_SLIM);
561 fprintf (file, "\n");
563 if (cand->var_after)
565 fprintf (file, " var_after ");
566 print_generic_expr (file, cand->var_after, TDF_SLIM);
567 fprintf (file, "\n");
570 switch (cand->pos)
572 case IP_NORMAL:
573 fprintf (file, " incremented before exit test\n");
574 break;
576 case IP_BEFORE_USE:
577 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
578 break;
580 case IP_AFTER_USE:
581 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
582 break;
584 case IP_END:
585 fprintf (file, " incremented at end\n");
586 break;
588 case IP_ORIGINAL:
589 fprintf (file, " original biv\n");
590 break;
593 dump_iv (file, iv);
596 /* Returns the info for ssa version VER. */
598 static inline struct version_info *
599 ver_info (struct ivopts_data *data, unsigned ver)
601 return data->version_info + ver;
604 /* Returns the info for ssa name NAME. */
606 static inline struct version_info *
607 name_info (struct ivopts_data *data, tree name)
609 return ver_info (data, SSA_NAME_VERSION (name));
612 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
613 emitted in LOOP. */
615 static bool
616 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
618 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
620 gcc_assert (bb);
622 if (sbb == loop->latch)
623 return true;
625 if (sbb != bb)
626 return false;
628 return stmt == last_stmt (bb);
631 /* Returns true if STMT if after the place where the original induction
632 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
633 if the positions are identical. */
635 static bool
636 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
638 basic_block cand_bb = gimple_bb (cand->incremented_at);
639 basic_block stmt_bb = gimple_bb (stmt);
641 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
642 return false;
644 if (stmt_bb != cand_bb)
645 return true;
647 if (true_if_equal
648 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
649 return true;
650 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
653 /* Returns true if STMT if after the place where the induction variable
654 CAND is incremented in LOOP. */
656 static bool
657 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
659 switch (cand->pos)
661 case IP_END:
662 return false;
664 case IP_NORMAL:
665 return stmt_after_ip_normal_pos (loop, stmt);
667 case IP_ORIGINAL:
668 case IP_AFTER_USE:
669 return stmt_after_inc_pos (cand, stmt, false);
671 case IP_BEFORE_USE:
672 return stmt_after_inc_pos (cand, stmt, true);
674 default:
675 gcc_unreachable ();
679 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
681 static bool
682 abnormal_ssa_name_p (tree exp)
684 if (!exp)
685 return false;
687 if (TREE_CODE (exp) != SSA_NAME)
688 return false;
690 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
693 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
694 abnormal phi node. Callback for for_each_index. */
696 static bool
697 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
698 void *data ATTRIBUTE_UNUSED)
700 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
702 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
703 return false;
704 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
705 return false;
708 return !abnormal_ssa_name_p (*index);
711 /* Returns true if EXPR contains a ssa name that occurs in an
712 abnormal phi node. */
714 bool
715 contains_abnormal_ssa_name_p (tree expr)
717 enum tree_code code;
718 enum tree_code_class codeclass;
720 if (!expr)
721 return false;
723 code = TREE_CODE (expr);
724 codeclass = TREE_CODE_CLASS (code);
726 if (code == SSA_NAME)
727 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
729 if (code == INTEGER_CST
730 || is_gimple_min_invariant (expr))
731 return false;
733 if (code == ADDR_EXPR)
734 return !for_each_index (&TREE_OPERAND (expr, 0),
735 idx_contains_abnormal_ssa_name_p,
736 NULL);
738 if (code == COND_EXPR)
739 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
740 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
741 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
743 switch (codeclass)
745 case tcc_binary:
746 case tcc_comparison:
747 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
748 return true;
750 /* Fallthru. */
751 case tcc_unary:
752 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
753 return true;
755 break;
757 default:
758 gcc_unreachable ();
761 return false;
764 /* Returns the structure describing number of iterations determined from
765 EXIT of DATA->current_loop, or NULL if something goes wrong. */
767 static struct tree_niter_desc *
768 niter_for_exit (struct ivopts_data *data, edge exit)
770 struct tree_niter_desc *desc;
771 void **slot;
773 if (!data->niters)
775 data->niters = pointer_map_create ();
776 slot = NULL;
778 else
779 slot = pointer_map_contains (data->niters, exit);
781 if (!slot)
783 /* Try to determine number of iterations. We cannot safely work with ssa
784 names that appear in phi nodes on abnormal edges, so that we do not
785 create overlapping life ranges for them (PR 27283). */
786 desc = XNEW (struct tree_niter_desc);
787 if (!number_of_iterations_exit (data->current_loop,
788 exit, desc, true)
789 || contains_abnormal_ssa_name_p (desc->niter))
791 XDELETE (desc);
792 desc = NULL;
794 slot = pointer_map_insert (data->niters, exit);
795 *slot = desc;
797 else
798 desc = (struct tree_niter_desc *) *slot;
800 return desc;
803 /* Returns the structure describing number of iterations determined from
804 single dominating exit of DATA->current_loop, or NULL if something
805 goes wrong. */
807 static struct tree_niter_desc *
808 niter_for_single_dom_exit (struct ivopts_data *data)
810 edge exit = single_dom_exit (data->current_loop);
812 if (!exit)
813 return NULL;
815 return niter_for_exit (data, exit);
818 /* Hash table equality function for expressions. */
820 static int
821 htab_inv_expr_eq (const void *ent1, const void *ent2)
823 const struct iv_inv_expr_ent *expr1 =
824 (const struct iv_inv_expr_ent *)ent1;
825 const struct iv_inv_expr_ent *expr2 =
826 (const struct iv_inv_expr_ent *)ent2;
828 return expr1->hash == expr2->hash
829 && operand_equal_p (expr1->expr, expr2->expr, 0);
832 /* Hash function for loop invariant expressions. */
834 static hashval_t
835 htab_inv_expr_hash (const void *ent)
837 const struct iv_inv_expr_ent *expr =
838 (const struct iv_inv_expr_ent *)ent;
839 return expr->hash;
842 /* Initializes data structures used by the iv optimization pass, stored
843 in DATA. */
845 static void
846 tree_ssa_iv_optimize_init (struct ivopts_data *data)
848 data->version_info_size = 2 * num_ssa_names;
849 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
850 data->relevant = BITMAP_ALLOC (NULL);
851 data->important_candidates = BITMAP_ALLOC (NULL);
852 data->max_inv_id = 0;
853 data->niters = NULL;
854 data->iv_uses.create (20);
855 data->iv_candidates.create (20);
856 data->inv_expr_tab = htab_create (10, htab_inv_expr_hash,
857 htab_inv_expr_eq, free);
858 data->inv_expr_id = 0;
859 decl_rtl_to_reset.create (20);
862 /* Returns a memory object to that EXPR points. In case we are able to
863 determine that it does not point to any such object, NULL is returned. */
865 static tree
866 determine_base_object (tree expr)
868 enum tree_code code = TREE_CODE (expr);
869 tree base, obj;
871 /* If this is a pointer casted to any type, we need to determine
872 the base object for the pointer; so handle conversions before
873 throwing away non-pointer expressions. */
874 if (CONVERT_EXPR_P (expr))
875 return determine_base_object (TREE_OPERAND (expr, 0));
877 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
878 return NULL_TREE;
880 switch (code)
882 case INTEGER_CST:
883 return NULL_TREE;
885 case ADDR_EXPR:
886 obj = TREE_OPERAND (expr, 0);
887 base = get_base_address (obj);
889 if (!base)
890 return expr;
892 if (TREE_CODE (base) == MEM_REF)
893 return determine_base_object (TREE_OPERAND (base, 0));
895 return fold_convert (ptr_type_node,
896 build_fold_addr_expr (base));
898 case POINTER_PLUS_EXPR:
899 return determine_base_object (TREE_OPERAND (expr, 0));
901 case PLUS_EXPR:
902 case MINUS_EXPR:
903 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
904 gcc_unreachable ();
906 default:
907 return fold_convert (ptr_type_node, expr);
911 /* Allocates an induction variable with given initial value BASE and step STEP
912 for loop LOOP. */
914 static struct iv *
915 alloc_iv (tree base, tree step)
917 struct iv *iv = XCNEW (struct iv);
918 gcc_assert (step != NULL_TREE);
920 iv->base = base;
921 iv->base_object = determine_base_object (base);
922 iv->step = step;
923 iv->biv_p = false;
924 iv->have_use_for = false;
925 iv->use_id = 0;
926 iv->ssa_name = NULL_TREE;
928 return iv;
931 /* Sets STEP and BASE for induction variable IV. */
933 static void
934 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
936 struct version_info *info = name_info (data, iv);
938 gcc_assert (!info->iv);
940 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
941 info->iv = alloc_iv (base, step);
942 info->iv->ssa_name = iv;
945 /* Finds induction variable declaration for VAR. */
947 static struct iv *
948 get_iv (struct ivopts_data *data, tree var)
950 basic_block bb;
951 tree type = TREE_TYPE (var);
953 if (!POINTER_TYPE_P (type)
954 && !INTEGRAL_TYPE_P (type))
955 return NULL;
957 if (!name_info (data, var)->iv)
959 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
961 if (!bb
962 || !flow_bb_inside_loop_p (data->current_loop, bb))
963 set_iv (data, var, var, build_int_cst (type, 0));
966 return name_info (data, var)->iv;
969 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
970 not define a simple affine biv with nonzero step. */
972 static tree
973 determine_biv_step (gimple phi)
975 struct loop *loop = gimple_bb (phi)->loop_father;
976 tree name = PHI_RESULT (phi);
977 affine_iv iv;
979 if (virtual_operand_p (name))
980 return NULL_TREE;
982 if (!simple_iv (loop, loop, name, &iv, true))
983 return NULL_TREE;
985 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
988 /* Finds basic ivs. */
990 static bool
991 find_bivs (struct ivopts_data *data)
993 gimple phi;
994 tree step, type, base;
995 bool found = false;
996 struct loop *loop = data->current_loop;
997 gimple_stmt_iterator psi;
999 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1001 phi = gsi_stmt (psi);
1003 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1004 continue;
1006 step = determine_biv_step (phi);
1007 if (!step)
1008 continue;
1010 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1011 base = expand_simple_operations (base);
1012 if (contains_abnormal_ssa_name_p (base)
1013 || contains_abnormal_ssa_name_p (step))
1014 continue;
1016 type = TREE_TYPE (PHI_RESULT (phi));
1017 base = fold_convert (type, base);
1018 if (step)
1020 if (POINTER_TYPE_P (type))
1021 step = convert_to_ptrofftype (step);
1022 else
1023 step = fold_convert (type, step);
1026 set_iv (data, PHI_RESULT (phi), base, step);
1027 found = true;
1030 return found;
1033 /* Marks basic ivs. */
1035 static void
1036 mark_bivs (struct ivopts_data *data)
1038 gimple phi;
1039 tree var;
1040 struct iv *iv, *incr_iv;
1041 struct loop *loop = data->current_loop;
1042 basic_block incr_bb;
1043 gimple_stmt_iterator psi;
1045 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1047 phi = gsi_stmt (psi);
1049 iv = get_iv (data, PHI_RESULT (phi));
1050 if (!iv)
1051 continue;
1053 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1054 incr_iv = get_iv (data, var);
1055 if (!incr_iv)
1056 continue;
1058 /* If the increment is in the subloop, ignore it. */
1059 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1060 if (incr_bb->loop_father != data->current_loop
1061 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1062 continue;
1064 iv->biv_p = true;
1065 incr_iv->biv_p = true;
1069 /* Checks whether STMT defines a linear induction variable and stores its
1070 parameters to IV. */
1072 static bool
1073 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1075 tree lhs;
1076 struct loop *loop = data->current_loop;
1078 iv->base = NULL_TREE;
1079 iv->step = NULL_TREE;
1081 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1082 return false;
1084 lhs = gimple_assign_lhs (stmt);
1085 if (TREE_CODE (lhs) != SSA_NAME)
1086 return false;
1088 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1089 return false;
1090 iv->base = expand_simple_operations (iv->base);
1092 if (contains_abnormal_ssa_name_p (iv->base)
1093 || contains_abnormal_ssa_name_p (iv->step))
1094 return false;
1096 /* If STMT could throw, then do not consider STMT as defining a GIV.
1097 While this will suppress optimizations, we can not safely delete this
1098 GIV and associated statements, even if it appears it is not used. */
1099 if (stmt_could_throw_p (stmt))
1100 return false;
1102 return true;
1105 /* Finds general ivs in statement STMT. */
1107 static void
1108 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1110 affine_iv iv;
1112 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1113 return;
1115 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1118 /* Finds general ivs in basic block BB. */
1120 static void
1121 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1123 gimple_stmt_iterator bsi;
1125 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1126 find_givs_in_stmt (data, gsi_stmt (bsi));
1129 /* Finds general ivs. */
1131 static void
1132 find_givs (struct ivopts_data *data)
1134 struct loop *loop = data->current_loop;
1135 basic_block *body = get_loop_body_in_dom_order (loop);
1136 unsigned i;
1138 for (i = 0; i < loop->num_nodes; i++)
1139 find_givs_in_bb (data, body[i]);
1140 free (body);
1143 /* For each ssa name defined in LOOP determines whether it is an induction
1144 variable and if so, its initial value and step. */
1146 static bool
1147 find_induction_variables (struct ivopts_data *data)
1149 unsigned i;
1150 bitmap_iterator bi;
1152 if (!find_bivs (data))
1153 return false;
1155 find_givs (data);
1156 mark_bivs (data);
1158 if (dump_file && (dump_flags & TDF_DETAILS))
1160 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1162 if (niter)
1164 fprintf (dump_file, " number of iterations ");
1165 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1166 if (!integer_zerop (niter->may_be_zero))
1168 fprintf (dump_file, "; zero if ");
1169 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1171 fprintf (dump_file, "\n\n");
1174 fprintf (dump_file, "Induction variables:\n\n");
1176 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1178 if (ver_info (data, i)->iv)
1179 dump_iv (dump_file, ver_info (data, i)->iv);
1183 return true;
1186 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1188 static struct iv_use *
1189 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1190 gimple stmt, enum use_type use_type)
1192 struct iv_use *use = XCNEW (struct iv_use);
1194 use->id = n_iv_uses (data);
1195 use->type = use_type;
1196 use->iv = iv;
1197 use->stmt = stmt;
1198 use->op_p = use_p;
1199 use->related_cands = BITMAP_ALLOC (NULL);
1201 /* To avoid showing ssa name in the dumps, if it was not reset by the
1202 caller. */
1203 iv->ssa_name = NULL_TREE;
1205 if (dump_file && (dump_flags & TDF_DETAILS))
1206 dump_use (dump_file, use);
1208 data->iv_uses.safe_push (use);
1210 return use;
1213 /* Checks whether OP is a loop-level invariant and if so, records it.
1214 NONLINEAR_USE is true if the invariant is used in a way we do not
1215 handle specially. */
1217 static void
1218 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1220 basic_block bb;
1221 struct version_info *info;
1223 if (TREE_CODE (op) != SSA_NAME
1224 || virtual_operand_p (op))
1225 return;
1227 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1228 if (bb
1229 && flow_bb_inside_loop_p (data->current_loop, bb))
1230 return;
1232 info = name_info (data, op);
1233 info->name = op;
1234 info->has_nonlin_use |= nonlinear_use;
1235 if (!info->inv_id)
1236 info->inv_id = ++data->max_inv_id;
1237 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1240 /* Checks whether the use OP is interesting and if so, records it. */
1242 static struct iv_use *
1243 find_interesting_uses_op (struct ivopts_data *data, tree op)
1245 struct iv *iv;
1246 struct iv *civ;
1247 gimple stmt;
1248 struct iv_use *use;
1250 if (TREE_CODE (op) != SSA_NAME)
1251 return NULL;
1253 iv = get_iv (data, op);
1254 if (!iv)
1255 return NULL;
1257 if (iv->have_use_for)
1259 use = iv_use (data, iv->use_id);
1261 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1262 return use;
1265 if (integer_zerop (iv->step))
1267 record_invariant (data, op, true);
1268 return NULL;
1270 iv->have_use_for = true;
1272 civ = XNEW (struct iv);
1273 *civ = *iv;
1275 stmt = SSA_NAME_DEF_STMT (op);
1276 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1277 || is_gimple_assign (stmt));
1279 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1280 iv->use_id = use->id;
1282 return use;
1285 /* Given a condition in statement STMT, checks whether it is a compare
1286 of an induction variable and an invariant. If this is the case,
1287 CONTROL_VAR is set to location of the iv, BOUND to the location of
1288 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1289 induction variable descriptions, and true is returned. If this is not
1290 the case, CONTROL_VAR and BOUND are set to the arguments of the
1291 condition and false is returned. */
1293 static bool
1294 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1295 tree **control_var, tree **bound,
1296 struct iv **iv_var, struct iv **iv_bound)
1298 /* The objects returned when COND has constant operands. */
1299 static struct iv const_iv;
1300 static tree zero;
1301 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1302 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1303 bool ret = false;
1305 if (gimple_code (stmt) == GIMPLE_COND)
1307 op0 = gimple_cond_lhs_ptr (stmt);
1308 op1 = gimple_cond_rhs_ptr (stmt);
1310 else
1312 op0 = gimple_assign_rhs1_ptr (stmt);
1313 op1 = gimple_assign_rhs2_ptr (stmt);
1316 zero = integer_zero_node;
1317 const_iv.step = integer_zero_node;
1319 if (TREE_CODE (*op0) == SSA_NAME)
1320 iv0 = get_iv (data, *op0);
1321 if (TREE_CODE (*op1) == SSA_NAME)
1322 iv1 = get_iv (data, *op1);
1324 /* Exactly one of the compared values must be an iv, and the other one must
1325 be an invariant. */
1326 if (!iv0 || !iv1)
1327 goto end;
1329 if (integer_zerop (iv0->step))
1331 /* Control variable may be on the other side. */
1332 tmp_op = op0; op0 = op1; op1 = tmp_op;
1333 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1335 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1337 end:
1338 if (control_var)
1339 *control_var = op0;;
1340 if (iv_var)
1341 *iv_var = iv0;;
1342 if (bound)
1343 *bound = op1;
1344 if (iv_bound)
1345 *iv_bound = iv1;
1347 return ret;
1350 /* Checks whether the condition in STMT is interesting and if so,
1351 records it. */
1353 static void
1354 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1356 tree *var_p, *bound_p;
1357 struct iv *var_iv, *civ;
1359 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1361 find_interesting_uses_op (data, *var_p);
1362 find_interesting_uses_op (data, *bound_p);
1363 return;
1366 civ = XNEW (struct iv);
1367 *civ = *var_iv;
1368 record_use (data, NULL, civ, stmt, USE_COMPARE);
1371 /* Returns true if expression EXPR is obviously invariant in LOOP,
1372 i.e. if all its operands are defined outside of the LOOP. LOOP
1373 should not be the function body. */
1375 bool
1376 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1378 basic_block def_bb;
1379 unsigned i, len;
1381 gcc_assert (loop_depth (loop) > 0);
1383 if (is_gimple_min_invariant (expr))
1384 return true;
1386 if (TREE_CODE (expr) == SSA_NAME)
1388 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1389 if (def_bb
1390 && flow_bb_inside_loop_p (loop, def_bb))
1391 return false;
1393 return true;
1396 if (!EXPR_P (expr))
1397 return false;
1399 len = TREE_OPERAND_LENGTH (expr);
1400 for (i = 0; i < len; i++)
1401 if (TREE_OPERAND (expr, i)
1402 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1403 return false;
1405 return true;
1408 /* Returns true if statement STMT is obviously invariant in LOOP,
1409 i.e. if all its operands on the RHS are defined outside of the LOOP.
1410 LOOP should not be the function body. */
1412 bool
1413 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1415 unsigned i;
1416 tree lhs;
1418 gcc_assert (loop_depth (loop) > 0);
1420 lhs = gimple_get_lhs (stmt);
1421 for (i = 0; i < gimple_num_ops (stmt); i++)
1423 tree op = gimple_op (stmt, i);
1424 if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1425 return false;
1428 return true;
1431 /* Cumulates the steps of indices into DATA and replaces their values with the
1432 initial ones. Returns false when the value of the index cannot be determined.
1433 Callback for for_each_index. */
1435 struct ifs_ivopts_data
1437 struct ivopts_data *ivopts_data;
1438 gimple stmt;
1439 tree step;
1442 static bool
1443 idx_find_step (tree base, tree *idx, void *data)
1445 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1446 struct iv *iv;
1447 tree step, iv_base, iv_step, lbound, off;
1448 struct loop *loop = dta->ivopts_data->current_loop;
1450 /* If base is a component ref, require that the offset of the reference
1451 be invariant. */
1452 if (TREE_CODE (base) == COMPONENT_REF)
1454 off = component_ref_field_offset (base);
1455 return expr_invariant_in_loop_p (loop, off);
1458 /* If base is array, first check whether we will be able to move the
1459 reference out of the loop (in order to take its address in strength
1460 reduction). In order for this to work we need both lower bound
1461 and step to be loop invariants. */
1462 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1464 /* Moreover, for a range, the size needs to be invariant as well. */
1465 if (TREE_CODE (base) == ARRAY_RANGE_REF
1466 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1467 return false;
1469 step = array_ref_element_size (base);
1470 lbound = array_ref_low_bound (base);
1472 if (!expr_invariant_in_loop_p (loop, step)
1473 || !expr_invariant_in_loop_p (loop, lbound))
1474 return false;
1477 if (TREE_CODE (*idx) != SSA_NAME)
1478 return true;
1480 iv = get_iv (dta->ivopts_data, *idx);
1481 if (!iv)
1482 return false;
1484 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1485 *&x[0], which is not folded and does not trigger the
1486 ARRAY_REF path below. */
1487 *idx = iv->base;
1489 if (integer_zerop (iv->step))
1490 return true;
1492 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1494 step = array_ref_element_size (base);
1496 /* We only handle addresses whose step is an integer constant. */
1497 if (TREE_CODE (step) != INTEGER_CST)
1498 return false;
1500 else
1501 /* The step for pointer arithmetics already is 1 byte. */
1502 step = size_one_node;
1504 iv_base = iv->base;
1505 iv_step = iv->step;
1506 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1507 sizetype, &iv_base, &iv_step, dta->stmt,
1508 false))
1510 /* The index might wrap. */
1511 return false;
1514 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1515 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1517 return true;
1520 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1521 object is passed to it in DATA. */
1523 static bool
1524 idx_record_use (tree base, tree *idx,
1525 void *vdata)
1527 struct ivopts_data *data = (struct ivopts_data *) vdata;
1528 find_interesting_uses_op (data, *idx);
1529 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1531 find_interesting_uses_op (data, array_ref_element_size (base));
1532 find_interesting_uses_op (data, array_ref_low_bound (base));
1534 return true;
1537 /* If we can prove that TOP = cst * BOT for some constant cst,
1538 store cst to MUL and return true. Otherwise return false.
1539 The returned value is always sign-extended, regardless of the
1540 signedness of TOP and BOT. */
1542 static bool
1543 constant_multiple_of (tree top, tree bot, double_int *mul)
1545 tree mby;
1546 enum tree_code code;
1547 double_int res, p0, p1;
1548 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1550 STRIP_NOPS (top);
1551 STRIP_NOPS (bot);
1553 if (operand_equal_p (top, bot, 0))
1555 *mul = double_int_one;
1556 return true;
1559 code = TREE_CODE (top);
1560 switch (code)
1562 case MULT_EXPR:
1563 mby = TREE_OPERAND (top, 1);
1564 if (TREE_CODE (mby) != INTEGER_CST)
1565 return false;
1567 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1568 return false;
1570 *mul = (res * tree_to_double_int (mby)).sext (precision);
1571 return true;
1573 case PLUS_EXPR:
1574 case MINUS_EXPR:
1575 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1576 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1577 return false;
1579 if (code == MINUS_EXPR)
1580 p1 = -p1;
1581 *mul = (p0 + p1).sext (precision);
1582 return true;
1584 case INTEGER_CST:
1585 if (TREE_CODE (bot) != INTEGER_CST)
1586 return false;
1588 p0 = tree_to_double_int (top).sext (precision);
1589 p1 = tree_to_double_int (bot).sext (precision);
1590 if (p1.is_zero ())
1591 return false;
1592 *mul = p0.sdivmod (p1, FLOOR_DIV_EXPR, &res).sext (precision);
1593 return res.is_zero ();
1595 default:
1596 return false;
1600 /* Returns true if memory reference REF with step STEP may be unaligned. */
1602 static bool
1603 may_be_unaligned_p (tree ref, tree step)
1605 tree base;
1606 tree base_type;
1607 HOST_WIDE_INT bitsize;
1608 HOST_WIDE_INT bitpos;
1609 tree toffset;
1610 enum machine_mode mode;
1611 int unsignedp, volatilep;
1612 unsigned base_align;
1614 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1615 thus they are not misaligned. */
1616 if (TREE_CODE (ref) == TARGET_MEM_REF)
1617 return false;
1619 /* The test below is basically copy of what expr.c:normal_inner_ref
1620 does to check whether the object must be loaded by parts when
1621 STRICT_ALIGNMENT is true. */
1622 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1623 &unsignedp, &volatilep, true);
1624 base_type = TREE_TYPE (base);
1625 base_align = get_object_alignment (base);
1626 base_align = MAX (base_align, TYPE_ALIGN (base_type));
1628 if (mode != BLKmode)
1630 unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1632 if (base_align < mode_align
1633 || (bitpos % mode_align) != 0
1634 || (bitpos % BITS_PER_UNIT) != 0)
1635 return true;
1637 if (toffset
1638 && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1639 return true;
1641 if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1642 return true;
1645 return false;
1648 /* Return true if EXPR may be non-addressable. */
1650 bool
1651 may_be_nonaddressable_p (tree expr)
1653 switch (TREE_CODE (expr))
1655 case TARGET_MEM_REF:
1656 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1657 target, thus they are always addressable. */
1658 return false;
1660 case COMPONENT_REF:
1661 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1662 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1664 case VIEW_CONVERT_EXPR:
1665 /* This kind of view-conversions may wrap non-addressable objects
1666 and make them look addressable. After some processing the
1667 non-addressability may be uncovered again, causing ADDR_EXPRs
1668 of inappropriate objects to be built. */
1669 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1670 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1671 return true;
1673 /* ... fall through ... */
1675 case ARRAY_REF:
1676 case ARRAY_RANGE_REF:
1677 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1679 CASE_CONVERT:
1680 return true;
1682 default:
1683 break;
1686 return false;
1689 /* Finds addresses in *OP_P inside STMT. */
1691 static void
1692 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1694 tree base = *op_p, step = size_zero_node;
1695 struct iv *civ;
1696 struct ifs_ivopts_data ifs_ivopts_data;
1698 /* Do not play with volatile memory references. A bit too conservative,
1699 perhaps, but safe. */
1700 if (gimple_has_volatile_ops (stmt))
1701 goto fail;
1703 /* Ignore bitfields for now. Not really something terribly complicated
1704 to handle. TODO. */
1705 if (TREE_CODE (base) == BIT_FIELD_REF)
1706 goto fail;
1708 base = unshare_expr (base);
1710 if (TREE_CODE (base) == TARGET_MEM_REF)
1712 tree type = build_pointer_type (TREE_TYPE (base));
1713 tree astep;
1715 if (TMR_BASE (base)
1716 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1718 civ = get_iv (data, TMR_BASE (base));
1719 if (!civ)
1720 goto fail;
1722 TMR_BASE (base) = civ->base;
1723 step = civ->step;
1725 if (TMR_INDEX2 (base)
1726 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1728 civ = get_iv (data, TMR_INDEX2 (base));
1729 if (!civ)
1730 goto fail;
1732 TMR_INDEX2 (base) = civ->base;
1733 step = civ->step;
1735 if (TMR_INDEX (base)
1736 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1738 civ = get_iv (data, TMR_INDEX (base));
1739 if (!civ)
1740 goto fail;
1742 TMR_INDEX (base) = civ->base;
1743 astep = civ->step;
1745 if (astep)
1747 if (TMR_STEP (base))
1748 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1750 step = fold_build2 (PLUS_EXPR, type, step, astep);
1754 if (integer_zerop (step))
1755 goto fail;
1756 base = tree_mem_ref_addr (type, base);
1758 else
1760 ifs_ivopts_data.ivopts_data = data;
1761 ifs_ivopts_data.stmt = stmt;
1762 ifs_ivopts_data.step = size_zero_node;
1763 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1764 || integer_zerop (ifs_ivopts_data.step))
1765 goto fail;
1766 step = ifs_ivopts_data.step;
1768 /* Check that the base expression is addressable. This needs
1769 to be done after substituting bases of IVs into it. */
1770 if (may_be_nonaddressable_p (base))
1771 goto fail;
1773 /* Moreover, on strict alignment platforms, check that it is
1774 sufficiently aligned. */
1775 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1776 goto fail;
1778 base = build_fold_addr_expr (base);
1780 /* Substituting bases of IVs into the base expression might
1781 have caused folding opportunities. */
1782 if (TREE_CODE (base) == ADDR_EXPR)
1784 tree *ref = &TREE_OPERAND (base, 0);
1785 while (handled_component_p (*ref))
1786 ref = &TREE_OPERAND (*ref, 0);
1787 if (TREE_CODE (*ref) == MEM_REF)
1789 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1790 TREE_OPERAND (*ref, 0),
1791 TREE_OPERAND (*ref, 1));
1792 if (tem)
1793 *ref = tem;
1798 civ = alloc_iv (base, step);
1799 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1800 return;
1802 fail:
1803 for_each_index (op_p, idx_record_use, data);
1806 /* Finds and records invariants used in STMT. */
1808 static void
1809 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1811 ssa_op_iter iter;
1812 use_operand_p use_p;
1813 tree op;
1815 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1817 op = USE_FROM_PTR (use_p);
1818 record_invariant (data, op, false);
1822 /* Finds interesting uses of induction variables in the statement STMT. */
1824 static void
1825 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1827 struct iv *iv;
1828 tree op, *lhs, *rhs;
1829 ssa_op_iter iter;
1830 use_operand_p use_p;
1831 enum tree_code code;
1833 find_invariants_stmt (data, stmt);
1835 if (gimple_code (stmt) == GIMPLE_COND)
1837 find_interesting_uses_cond (data, stmt);
1838 return;
1841 if (is_gimple_assign (stmt))
1843 lhs = gimple_assign_lhs_ptr (stmt);
1844 rhs = gimple_assign_rhs1_ptr (stmt);
1846 if (TREE_CODE (*lhs) == SSA_NAME)
1848 /* If the statement defines an induction variable, the uses are not
1849 interesting by themselves. */
1851 iv = get_iv (data, *lhs);
1853 if (iv && !integer_zerop (iv->step))
1854 return;
1857 code = gimple_assign_rhs_code (stmt);
1858 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1859 && (REFERENCE_CLASS_P (*rhs)
1860 || is_gimple_val (*rhs)))
1862 if (REFERENCE_CLASS_P (*rhs))
1863 find_interesting_uses_address (data, stmt, rhs);
1864 else
1865 find_interesting_uses_op (data, *rhs);
1867 if (REFERENCE_CLASS_P (*lhs))
1868 find_interesting_uses_address (data, stmt, lhs);
1869 return;
1871 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1873 find_interesting_uses_cond (data, stmt);
1874 return;
1877 /* TODO -- we should also handle address uses of type
1879 memory = call (whatever);
1883 call (memory). */
1886 if (gimple_code (stmt) == GIMPLE_PHI
1887 && gimple_bb (stmt) == data->current_loop->header)
1889 iv = get_iv (data, PHI_RESULT (stmt));
1891 if (iv && !integer_zerop (iv->step))
1892 return;
1895 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1897 op = USE_FROM_PTR (use_p);
1899 if (TREE_CODE (op) != SSA_NAME)
1900 continue;
1902 iv = get_iv (data, op);
1903 if (!iv)
1904 continue;
1906 find_interesting_uses_op (data, op);
1910 /* Finds interesting uses of induction variables outside of loops
1911 on loop exit edge EXIT. */
1913 static void
1914 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1916 gimple phi;
1917 gimple_stmt_iterator psi;
1918 tree def;
1920 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1922 phi = gsi_stmt (psi);
1923 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1924 if (!virtual_operand_p (def))
1925 find_interesting_uses_op (data, def);
1929 /* Finds uses of the induction variables that are interesting. */
1931 static void
1932 find_interesting_uses (struct ivopts_data *data)
1934 basic_block bb;
1935 gimple_stmt_iterator bsi;
1936 basic_block *body = get_loop_body (data->current_loop);
1937 unsigned i;
1938 struct version_info *info;
1939 edge e;
1941 if (dump_file && (dump_flags & TDF_DETAILS))
1942 fprintf (dump_file, "Uses:\n\n");
1944 for (i = 0; i < data->current_loop->num_nodes; i++)
1946 edge_iterator ei;
1947 bb = body[i];
1949 FOR_EACH_EDGE (e, ei, bb->succs)
1950 if (e->dest != EXIT_BLOCK_PTR
1951 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1952 find_interesting_uses_outside (data, e);
1954 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1955 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1956 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1957 if (!is_gimple_debug (gsi_stmt (bsi)))
1958 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1961 if (dump_file && (dump_flags & TDF_DETAILS))
1963 bitmap_iterator bi;
1965 fprintf (dump_file, "\n");
1967 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1969 info = ver_info (data, i);
1970 if (info->inv_id)
1972 fprintf (dump_file, " ");
1973 print_generic_expr (dump_file, info->name, TDF_SLIM);
1974 fprintf (dump_file, " is invariant (%d)%s\n",
1975 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1979 fprintf (dump_file, "\n");
1982 free (body);
1985 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1986 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1987 we are at the top-level of the processed address. */
1989 static tree
1990 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1991 unsigned HOST_WIDE_INT *offset)
1993 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1994 enum tree_code code;
1995 tree type, orig_type = TREE_TYPE (expr);
1996 unsigned HOST_WIDE_INT off0, off1, st;
1997 tree orig_expr = expr;
1999 STRIP_NOPS (expr);
2001 type = TREE_TYPE (expr);
2002 code = TREE_CODE (expr);
2003 *offset = 0;
2005 switch (code)
2007 case INTEGER_CST:
2008 if (!cst_and_fits_in_hwi (expr)
2009 || integer_zerop (expr))
2010 return orig_expr;
2012 *offset = int_cst_value (expr);
2013 return build_int_cst (orig_type, 0);
2015 case POINTER_PLUS_EXPR:
2016 case PLUS_EXPR:
2017 case MINUS_EXPR:
2018 op0 = TREE_OPERAND (expr, 0);
2019 op1 = TREE_OPERAND (expr, 1);
2021 op0 = strip_offset_1 (op0, false, false, &off0);
2022 op1 = strip_offset_1 (op1, false, false, &off1);
2024 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2025 if (op0 == TREE_OPERAND (expr, 0)
2026 && op1 == TREE_OPERAND (expr, 1))
2027 return orig_expr;
2029 if (integer_zerop (op1))
2030 expr = op0;
2031 else if (integer_zerop (op0))
2033 if (code == MINUS_EXPR)
2034 expr = fold_build1 (NEGATE_EXPR, type, op1);
2035 else
2036 expr = op1;
2038 else
2039 expr = fold_build2 (code, type, op0, op1);
2041 return fold_convert (orig_type, expr);
2043 case MULT_EXPR:
2044 op1 = TREE_OPERAND (expr, 1);
2045 if (!cst_and_fits_in_hwi (op1))
2046 return orig_expr;
2048 op0 = TREE_OPERAND (expr, 0);
2049 op0 = strip_offset_1 (op0, false, false, &off0);
2050 if (op0 == TREE_OPERAND (expr, 0))
2051 return orig_expr;
2053 *offset = off0 * int_cst_value (op1);
2054 if (integer_zerop (op0))
2055 expr = op0;
2056 else
2057 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2059 return fold_convert (orig_type, expr);
2061 case ARRAY_REF:
2062 case ARRAY_RANGE_REF:
2063 if (!inside_addr)
2064 return orig_expr;
2066 step = array_ref_element_size (expr);
2067 if (!cst_and_fits_in_hwi (step))
2068 break;
2070 st = int_cst_value (step);
2071 op1 = TREE_OPERAND (expr, 1);
2072 op1 = strip_offset_1 (op1, false, false, &off1);
2073 *offset = off1 * st;
2075 if (top_compref
2076 && integer_zerop (op1))
2078 /* Strip the component reference completely. */
2079 op0 = TREE_OPERAND (expr, 0);
2080 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2081 *offset += off0;
2082 return op0;
2084 break;
2086 case COMPONENT_REF:
2087 if (!inside_addr)
2088 return orig_expr;
2090 tmp = component_ref_field_offset (expr);
2091 if (top_compref
2092 && cst_and_fits_in_hwi (tmp))
2094 /* Strip the component reference completely. */
2095 op0 = TREE_OPERAND (expr, 0);
2096 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2097 *offset = off0 + int_cst_value (tmp);
2098 return op0;
2100 break;
2102 case ADDR_EXPR:
2103 op0 = TREE_OPERAND (expr, 0);
2104 op0 = strip_offset_1 (op0, true, true, &off0);
2105 *offset += off0;
2107 if (op0 == TREE_OPERAND (expr, 0))
2108 return orig_expr;
2110 expr = build_fold_addr_expr (op0);
2111 return fold_convert (orig_type, expr);
2113 case MEM_REF:
2114 /* ??? Offset operand? */
2115 inside_addr = false;
2116 break;
2118 default:
2119 return orig_expr;
2122 /* Default handling of expressions for that we want to recurse into
2123 the first operand. */
2124 op0 = TREE_OPERAND (expr, 0);
2125 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2126 *offset += off0;
2128 if (op0 == TREE_OPERAND (expr, 0)
2129 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2130 return orig_expr;
2132 expr = copy_node (expr);
2133 TREE_OPERAND (expr, 0) = op0;
2134 if (op1)
2135 TREE_OPERAND (expr, 1) = op1;
2137 /* Inside address, we might strip the top level component references,
2138 thus changing type of the expression. Handling of ADDR_EXPR
2139 will fix that. */
2140 expr = fold_convert (orig_type, expr);
2142 return expr;
2145 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2147 static tree
2148 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2150 return strip_offset_1 (expr, false, false, offset);
2153 /* Returns variant of TYPE that can be used as base for different uses.
2154 We return unsigned type with the same precision, which avoids problems
2155 with overflows. */
2157 static tree
2158 generic_type_for (tree type)
2160 if (POINTER_TYPE_P (type))
2161 return unsigned_type_for (type);
2163 if (TYPE_UNSIGNED (type))
2164 return type;
2166 return unsigned_type_for (type);
2169 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2170 the bitmap to that we should store it. */
2172 static struct ivopts_data *fd_ivopts_data;
2173 static tree
2174 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2176 bitmap *depends_on = (bitmap *) data;
2177 struct version_info *info;
2179 if (TREE_CODE (*expr_p) != SSA_NAME)
2180 return NULL_TREE;
2181 info = name_info (fd_ivopts_data, *expr_p);
2183 if (!info->inv_id || info->has_nonlin_use)
2184 return NULL_TREE;
2186 if (!*depends_on)
2187 *depends_on = BITMAP_ALLOC (NULL);
2188 bitmap_set_bit (*depends_on, info->inv_id);
2190 return NULL_TREE;
2193 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2194 position to POS. If USE is not NULL, the candidate is set as related to
2195 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2196 replacement of the final value of the iv by a direct computation. */
2198 static struct iv_cand *
2199 add_candidate_1 (struct ivopts_data *data,
2200 tree base, tree step, bool important, enum iv_position pos,
2201 struct iv_use *use, gimple incremented_at)
2203 unsigned i;
2204 struct iv_cand *cand = NULL;
2205 tree type, orig_type;
2207 /* For non-original variables, make sure their values are computed in a type
2208 that does not invoke undefined behavior on overflows (since in general,
2209 we cannot prove that these induction variables are non-wrapping). */
2210 if (pos != IP_ORIGINAL)
2212 orig_type = TREE_TYPE (base);
2213 type = generic_type_for (orig_type);
2214 if (type != orig_type)
2216 base = fold_convert (type, base);
2217 step = fold_convert (type, step);
2221 for (i = 0; i < n_iv_cands (data); i++)
2223 cand = iv_cand (data, i);
2225 if (cand->pos != pos)
2226 continue;
2228 if (cand->incremented_at != incremented_at
2229 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2230 && cand->ainc_use != use))
2231 continue;
2233 if (!cand->iv)
2235 if (!base && !step)
2236 break;
2238 continue;
2241 if (!base && !step)
2242 continue;
2244 if (operand_equal_p (base, cand->iv->base, 0)
2245 && operand_equal_p (step, cand->iv->step, 0)
2246 && (TYPE_PRECISION (TREE_TYPE (base))
2247 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2248 break;
2251 if (i == n_iv_cands (data))
2253 cand = XCNEW (struct iv_cand);
2254 cand->id = i;
2256 if (!base && !step)
2257 cand->iv = NULL;
2258 else
2259 cand->iv = alloc_iv (base, step);
2261 cand->pos = pos;
2262 if (pos != IP_ORIGINAL && cand->iv)
2264 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2265 cand->var_after = cand->var_before;
2267 cand->important = important;
2268 cand->incremented_at = incremented_at;
2269 data->iv_candidates.safe_push (cand);
2271 if (step
2272 && TREE_CODE (step) != INTEGER_CST)
2274 fd_ivopts_data = data;
2275 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2278 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2279 cand->ainc_use = use;
2280 else
2281 cand->ainc_use = NULL;
2283 if (dump_file && (dump_flags & TDF_DETAILS))
2284 dump_cand (dump_file, cand);
2287 if (important && !cand->important)
2289 cand->important = true;
2290 if (dump_file && (dump_flags & TDF_DETAILS))
2291 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2294 if (use)
2296 bitmap_set_bit (use->related_cands, i);
2297 if (dump_file && (dump_flags & TDF_DETAILS))
2298 fprintf (dump_file, "Candidate %d is related to use %d\n",
2299 cand->id, use->id);
2302 return cand;
2305 /* Returns true if incrementing the induction variable at the end of the LOOP
2306 is allowed.
2308 The purpose is to avoid splitting latch edge with a biv increment, thus
2309 creating a jump, possibly confusing other optimization passes and leaving
2310 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2311 is not available (so we do not have a better alternative), or if the latch
2312 edge is already nonempty. */
2314 static bool
2315 allow_ip_end_pos_p (struct loop *loop)
2317 if (!ip_normal_pos (loop))
2318 return true;
2320 if (!empty_block_p (ip_end_pos (loop)))
2321 return true;
2323 return false;
2326 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2327 Important field is set to IMPORTANT. */
2329 static void
2330 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2331 bool important, struct iv_use *use)
2333 basic_block use_bb = gimple_bb (use->stmt);
2334 enum machine_mode mem_mode;
2335 unsigned HOST_WIDE_INT cstepi;
2337 /* If we insert the increment in any position other than the standard
2338 ones, we must ensure that it is incremented once per iteration.
2339 It must not be in an inner nested loop, or one side of an if
2340 statement. */
2341 if (use_bb->loop_father != data->current_loop
2342 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2343 || stmt_could_throw_p (use->stmt)
2344 || !cst_and_fits_in_hwi (step))
2345 return;
2347 cstepi = int_cst_value (step);
2349 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2350 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2351 || USE_STORE_PRE_INCREMENT (mem_mode))
2352 && GET_MODE_SIZE (mem_mode) == cstepi)
2353 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2354 || USE_STORE_PRE_DECREMENT (mem_mode))
2355 && GET_MODE_SIZE (mem_mode) == -cstepi))
2357 enum tree_code code = MINUS_EXPR;
2358 tree new_base;
2359 tree new_step = step;
2361 if (POINTER_TYPE_P (TREE_TYPE (base)))
2363 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2364 code = POINTER_PLUS_EXPR;
2366 else
2367 new_step = fold_convert (TREE_TYPE (base), new_step);
2368 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2369 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2370 use->stmt);
2372 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2373 || USE_STORE_POST_INCREMENT (mem_mode))
2374 && GET_MODE_SIZE (mem_mode) == cstepi)
2375 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2376 || USE_STORE_POST_DECREMENT (mem_mode))
2377 && GET_MODE_SIZE (mem_mode) == -cstepi))
2379 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2380 use->stmt);
2384 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2385 position to POS. If USE is not NULL, the candidate is set as related to
2386 it. The candidate computation is scheduled on all available positions. */
2388 static void
2389 add_candidate (struct ivopts_data *data,
2390 tree base, tree step, bool important, struct iv_use *use)
2392 if (ip_normal_pos (data->current_loop))
2393 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2394 if (ip_end_pos (data->current_loop)
2395 && allow_ip_end_pos_p (data->current_loop))
2396 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2398 if (use != NULL && use->type == USE_ADDRESS)
2399 add_autoinc_candidates (data, base, step, important, use);
2402 /* Adds standard iv candidates. */
2404 static void
2405 add_standard_iv_candidates (struct ivopts_data *data)
2407 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2409 /* The same for a double-integer type if it is still fast enough. */
2410 if (TYPE_PRECISION
2411 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2412 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2413 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2414 build_int_cst (long_integer_type_node, 1), true, NULL);
2416 /* The same for a double-integer type if it is still fast enough. */
2417 if (TYPE_PRECISION
2418 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2419 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2420 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2421 build_int_cst (long_long_integer_type_node, 1), true, NULL);
2425 /* Adds candidates bases on the old induction variable IV. */
2427 static void
2428 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2430 gimple phi;
2431 tree def;
2432 struct iv_cand *cand;
2434 add_candidate (data, iv->base, iv->step, true, NULL);
2436 /* The same, but with initial value zero. */
2437 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2438 add_candidate (data, size_int (0), iv->step, true, NULL);
2439 else
2440 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2441 iv->step, true, NULL);
2443 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2444 if (gimple_code (phi) == GIMPLE_PHI)
2446 /* Additionally record the possibility of leaving the original iv
2447 untouched. */
2448 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2449 cand = add_candidate_1 (data,
2450 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2451 SSA_NAME_DEF_STMT (def));
2452 cand->var_before = iv->ssa_name;
2453 cand->var_after = def;
2457 /* Adds candidates based on the old induction variables. */
2459 static void
2460 add_old_ivs_candidates (struct ivopts_data *data)
2462 unsigned i;
2463 struct iv *iv;
2464 bitmap_iterator bi;
2466 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2468 iv = ver_info (data, i)->iv;
2469 if (iv && iv->biv_p && !integer_zerop (iv->step))
2470 add_old_iv_candidates (data, iv);
2474 /* Adds candidates based on the value of the induction variable IV and USE. */
2476 static void
2477 add_iv_value_candidates (struct ivopts_data *data,
2478 struct iv *iv, struct iv_use *use)
2480 unsigned HOST_WIDE_INT offset;
2481 tree base;
2482 tree basetype;
2484 add_candidate (data, iv->base, iv->step, false, use);
2486 /* The same, but with initial value zero. Make such variable important,
2487 since it is generic enough so that possibly many uses may be based
2488 on it. */
2489 basetype = TREE_TYPE (iv->base);
2490 if (POINTER_TYPE_P (basetype))
2491 basetype = sizetype;
2492 add_candidate (data, build_int_cst (basetype, 0),
2493 iv->step, true, use);
2495 /* Third, try removing the constant offset. Make sure to even
2496 add a candidate for &a[0] vs. (T *)&a. */
2497 base = strip_offset (iv->base, &offset);
2498 if (offset
2499 || base != iv->base)
2500 add_candidate (data, base, iv->step, false, use);
2503 /* Adds candidates based on the uses. */
2505 static void
2506 add_derived_ivs_candidates (struct ivopts_data *data)
2508 unsigned i;
2510 for (i = 0; i < n_iv_uses (data); i++)
2512 struct iv_use *use = iv_use (data, i);
2514 if (!use)
2515 continue;
2517 switch (use->type)
2519 case USE_NONLINEAR_EXPR:
2520 case USE_COMPARE:
2521 case USE_ADDRESS:
2522 /* Just add the ivs based on the value of the iv used here. */
2523 add_iv_value_candidates (data, use->iv, use);
2524 break;
2526 default:
2527 gcc_unreachable ();
2532 /* Record important candidates and add them to related_cands bitmaps
2533 if needed. */
2535 static void
2536 record_important_candidates (struct ivopts_data *data)
2538 unsigned i;
2539 struct iv_use *use;
2541 for (i = 0; i < n_iv_cands (data); i++)
2543 struct iv_cand *cand = iv_cand (data, i);
2545 if (cand->important)
2546 bitmap_set_bit (data->important_candidates, i);
2549 data->consider_all_candidates = (n_iv_cands (data)
2550 <= CONSIDER_ALL_CANDIDATES_BOUND);
2552 if (data->consider_all_candidates)
2554 /* We will not need "related_cands" bitmaps in this case,
2555 so release them to decrease peak memory consumption. */
2556 for (i = 0; i < n_iv_uses (data); i++)
2558 use = iv_use (data, i);
2559 BITMAP_FREE (use->related_cands);
2562 else
2564 /* Add important candidates to the related_cands bitmaps. */
2565 for (i = 0; i < n_iv_uses (data); i++)
2566 bitmap_ior_into (iv_use (data, i)->related_cands,
2567 data->important_candidates);
2571 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2572 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2573 we allocate a simple list to every use. */
2575 static void
2576 alloc_use_cost_map (struct ivopts_data *data)
2578 unsigned i, size, s, j;
2580 for (i = 0; i < n_iv_uses (data); i++)
2582 struct iv_use *use = iv_use (data, i);
2583 bitmap_iterator bi;
2585 if (data->consider_all_candidates)
2586 size = n_iv_cands (data);
2587 else
2589 s = 0;
2590 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2592 s++;
2595 /* Round up to the power of two, so that moduling by it is fast. */
2596 for (size = 1; size < s; size <<= 1)
2597 continue;
2600 use->n_map_members = size;
2601 use->cost_map = XCNEWVEC (struct cost_pair, size);
2605 /* Returns description of computation cost of expression whose runtime
2606 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2608 static comp_cost
2609 new_cost (unsigned runtime, unsigned complexity)
2611 comp_cost cost;
2613 cost.cost = runtime;
2614 cost.complexity = complexity;
2616 return cost;
2619 /* Adds costs COST1 and COST2. */
2621 static comp_cost
2622 add_costs (comp_cost cost1, comp_cost cost2)
2624 cost1.cost += cost2.cost;
2625 cost1.complexity += cost2.complexity;
2627 return cost1;
2629 /* Subtracts costs COST1 and COST2. */
2631 static comp_cost
2632 sub_costs (comp_cost cost1, comp_cost cost2)
2634 cost1.cost -= cost2.cost;
2635 cost1.complexity -= cost2.complexity;
2637 return cost1;
2640 /* Returns a negative number if COST1 < COST2, a positive number if
2641 COST1 > COST2, and 0 if COST1 = COST2. */
2643 static int
2644 compare_costs (comp_cost cost1, comp_cost cost2)
2646 if (cost1.cost == cost2.cost)
2647 return cost1.complexity - cost2.complexity;
2649 return cost1.cost - cost2.cost;
2652 /* Returns true if COST is infinite. */
2654 static bool
2655 infinite_cost_p (comp_cost cost)
2657 return cost.cost == INFTY;
2660 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2661 on invariants DEPENDS_ON and that the value used in expressing it
2662 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2664 static void
2665 set_use_iv_cost (struct ivopts_data *data,
2666 struct iv_use *use, struct iv_cand *cand,
2667 comp_cost cost, bitmap depends_on, tree value,
2668 enum tree_code comp, int inv_expr_id)
2670 unsigned i, s;
2672 if (infinite_cost_p (cost))
2674 BITMAP_FREE (depends_on);
2675 return;
2678 if (data->consider_all_candidates)
2680 use->cost_map[cand->id].cand = cand;
2681 use->cost_map[cand->id].cost = cost;
2682 use->cost_map[cand->id].depends_on = depends_on;
2683 use->cost_map[cand->id].value = value;
2684 use->cost_map[cand->id].comp = comp;
2685 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2686 return;
2689 /* n_map_members is a power of two, so this computes modulo. */
2690 s = cand->id & (use->n_map_members - 1);
2691 for (i = s; i < use->n_map_members; i++)
2692 if (!use->cost_map[i].cand)
2693 goto found;
2694 for (i = 0; i < s; i++)
2695 if (!use->cost_map[i].cand)
2696 goto found;
2698 gcc_unreachable ();
2700 found:
2701 use->cost_map[i].cand = cand;
2702 use->cost_map[i].cost = cost;
2703 use->cost_map[i].depends_on = depends_on;
2704 use->cost_map[i].value = value;
2705 use->cost_map[i].comp = comp;
2706 use->cost_map[i].inv_expr_id = inv_expr_id;
2709 /* Gets cost of (USE, CANDIDATE) pair. */
2711 static struct cost_pair *
2712 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2713 struct iv_cand *cand)
2715 unsigned i, s;
2716 struct cost_pair *ret;
2718 if (!cand)
2719 return NULL;
2721 if (data->consider_all_candidates)
2723 ret = use->cost_map + cand->id;
2724 if (!ret->cand)
2725 return NULL;
2727 return ret;
2730 /* n_map_members is a power of two, so this computes modulo. */
2731 s = cand->id & (use->n_map_members - 1);
2732 for (i = s; i < use->n_map_members; i++)
2733 if (use->cost_map[i].cand == cand)
2734 return use->cost_map + i;
2736 for (i = 0; i < s; i++)
2737 if (use->cost_map[i].cand == cand)
2738 return use->cost_map + i;
2740 return NULL;
2743 /* Returns estimate on cost of computing SEQ. */
2745 static unsigned
2746 seq_cost (rtx seq, bool speed)
2748 unsigned cost = 0;
2749 rtx set;
2751 for (; seq; seq = NEXT_INSN (seq))
2753 set = single_set (seq);
2754 if (set)
2755 cost += set_src_cost (SET_SRC (set), speed);
2756 else
2757 cost++;
2760 return cost;
2763 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2764 static rtx
2765 produce_memory_decl_rtl (tree obj, int *regno)
2767 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2768 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2769 rtx x;
2771 gcc_assert (obj);
2772 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2774 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2775 x = gen_rtx_SYMBOL_REF (address_mode, name);
2776 SET_SYMBOL_REF_DECL (x, obj);
2777 x = gen_rtx_MEM (DECL_MODE (obj), x);
2778 set_mem_addr_space (x, as);
2779 targetm.encode_section_info (obj, x, true);
2781 else
2783 x = gen_raw_REG (address_mode, (*regno)++);
2784 x = gen_rtx_MEM (DECL_MODE (obj), x);
2785 set_mem_addr_space (x, as);
2788 return x;
2791 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2792 walk_tree. DATA contains the actual fake register number. */
2794 static tree
2795 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2797 tree obj = NULL_TREE;
2798 rtx x = NULL_RTX;
2799 int *regno = (int *) data;
2801 switch (TREE_CODE (*expr_p))
2803 case ADDR_EXPR:
2804 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2805 handled_component_p (*expr_p);
2806 expr_p = &TREE_OPERAND (*expr_p, 0))
2807 continue;
2808 obj = *expr_p;
2809 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2810 x = produce_memory_decl_rtl (obj, regno);
2811 break;
2813 case SSA_NAME:
2814 *ws = 0;
2815 obj = SSA_NAME_VAR (*expr_p);
2816 /* Defer handling of anonymous SSA_NAMEs to the expander. */
2817 if (!obj)
2818 return NULL_TREE;
2819 if (!DECL_RTL_SET_P (obj))
2820 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2821 break;
2823 case VAR_DECL:
2824 case PARM_DECL:
2825 case RESULT_DECL:
2826 *ws = 0;
2827 obj = *expr_p;
2829 if (DECL_RTL_SET_P (obj))
2830 break;
2832 if (DECL_MODE (obj) == BLKmode)
2833 x = produce_memory_decl_rtl (obj, regno);
2834 else
2835 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2837 break;
2839 default:
2840 break;
2843 if (x)
2845 decl_rtl_to_reset.safe_push (obj);
2846 SET_DECL_RTL (obj, x);
2849 return NULL_TREE;
2852 /* Determines cost of the computation of EXPR. */
2854 static unsigned
2855 computation_cost (tree expr, bool speed)
2857 rtx seq, rslt;
2858 tree type = TREE_TYPE (expr);
2859 unsigned cost;
2860 /* Avoid using hard regs in ways which may be unsupported. */
2861 int regno = LAST_VIRTUAL_REGISTER + 1;
2862 struct cgraph_node *node = cgraph_get_node (current_function_decl);
2863 enum node_frequency real_frequency = node->frequency;
2865 node->frequency = NODE_FREQUENCY_NORMAL;
2866 crtl->maybe_hot_insn_p = speed;
2867 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2868 start_sequence ();
2869 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2870 seq = get_insns ();
2871 end_sequence ();
2872 default_rtl_profile ();
2873 node->frequency = real_frequency;
2875 cost = seq_cost (seq, speed);
2876 if (MEM_P (rslt))
2877 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2878 TYPE_ADDR_SPACE (type), speed);
2879 else if (!REG_P (rslt))
2880 cost += set_src_cost (rslt, speed);
2882 return cost;
2885 /* Returns variable containing the value of candidate CAND at statement AT. */
2887 static tree
2888 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2890 if (stmt_after_increment (loop, cand, stmt))
2891 return cand->var_after;
2892 else
2893 return cand->var_before;
2896 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2897 same precision that is at least as wide as the precision of TYPE, stores
2898 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2899 type of A and B. */
2901 static tree
2902 determine_common_wider_type (tree *a, tree *b)
2904 tree wider_type = NULL;
2905 tree suba, subb;
2906 tree atype = TREE_TYPE (*a);
2908 if (CONVERT_EXPR_P (*a))
2910 suba = TREE_OPERAND (*a, 0);
2911 wider_type = TREE_TYPE (suba);
2912 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2913 return atype;
2915 else
2916 return atype;
2918 if (CONVERT_EXPR_P (*b))
2920 subb = TREE_OPERAND (*b, 0);
2921 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2922 return atype;
2924 else
2925 return atype;
2927 *a = suba;
2928 *b = subb;
2929 return wider_type;
2932 /* Determines the expression by that USE is expressed from induction variable
2933 CAND at statement AT in LOOP. The expression is stored in a decomposed
2934 form into AFF. Returns false if USE cannot be expressed using CAND. */
2936 static bool
2937 get_computation_aff (struct loop *loop,
2938 struct iv_use *use, struct iv_cand *cand, gimple at,
2939 struct affine_tree_combination *aff)
2941 tree ubase = use->iv->base;
2942 tree ustep = use->iv->step;
2943 tree cbase = cand->iv->base;
2944 tree cstep = cand->iv->step, cstep_common;
2945 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2946 tree common_type, var;
2947 tree uutype;
2948 aff_tree cbase_aff, var_aff;
2949 double_int rat;
2951 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2953 /* We do not have a precision to express the values of use. */
2954 return false;
2957 var = var_at_stmt (loop, cand, at);
2958 uutype = unsigned_type_for (utype);
2960 /* If the conversion is not noop, perform it. */
2961 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2963 cstep = fold_convert (uutype, cstep);
2964 cbase = fold_convert (uutype, cbase);
2965 var = fold_convert (uutype, var);
2968 if (!constant_multiple_of (ustep, cstep, &rat))
2969 return false;
2971 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2972 type, we achieve better folding by computing their difference in this
2973 wider type, and cast the result to UUTYPE. We do not need to worry about
2974 overflows, as all the arithmetics will in the end be performed in UUTYPE
2975 anyway. */
2976 common_type = determine_common_wider_type (&ubase, &cbase);
2978 /* use = ubase - ratio * cbase + ratio * var. */
2979 tree_to_aff_combination (ubase, common_type, aff);
2980 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2981 tree_to_aff_combination (var, uutype, &var_aff);
2983 /* We need to shift the value if we are after the increment. */
2984 if (stmt_after_increment (loop, cand, at))
2986 aff_tree cstep_aff;
2988 if (common_type != uutype)
2989 cstep_common = fold_convert (common_type, cstep);
2990 else
2991 cstep_common = cstep;
2993 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2994 aff_combination_add (&cbase_aff, &cstep_aff);
2997 aff_combination_scale (&cbase_aff, -rat);
2998 aff_combination_add (aff, &cbase_aff);
2999 if (common_type != uutype)
3000 aff_combination_convert (aff, uutype);
3002 aff_combination_scale (&var_aff, rat);
3003 aff_combination_add (aff, &var_aff);
3005 return true;
3008 /* Return the type of USE. */
3010 static tree
3011 get_use_type (struct iv_use *use)
3013 tree base_type = TREE_TYPE (use->iv->base);
3014 tree type;
3016 if (use->type == USE_ADDRESS)
3018 /* The base_type may be a void pointer. Create a pointer type based on
3019 the mem_ref instead. */
3020 type = build_pointer_type (TREE_TYPE (*use->op_p));
3021 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3022 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3024 else
3025 type = base_type;
3027 return type;
3030 /* Determines the expression by that USE is expressed from induction variable
3031 CAND at statement AT in LOOP. The computation is unshared. */
3033 static tree
3034 get_computation_at (struct loop *loop,
3035 struct iv_use *use, struct iv_cand *cand, gimple at)
3037 aff_tree aff;
3038 tree type = get_use_type (use);
3040 if (!get_computation_aff (loop, use, cand, at, &aff))
3041 return NULL_TREE;
3042 unshare_aff_combination (&aff);
3043 return fold_convert (type, aff_combination_to_tree (&aff));
3046 /* Determines the expression by that USE is expressed from induction variable
3047 CAND in LOOP. The computation is unshared. */
3049 static tree
3050 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3052 return get_computation_at (loop, use, cand, use->stmt);
3055 /* Adjust the cost COST for being in loop setup rather than loop body.
3056 If we're optimizing for space, the loop setup overhead is constant;
3057 if we're optimizing for speed, amortize it over the per-iteration cost. */
3058 static unsigned
3059 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3061 if (cost == INFTY)
3062 return cost;
3063 else if (optimize_loop_for_speed_p (data->current_loop))
3064 return cost / avg_loop_niter (data->current_loop);
3065 else
3066 return cost;
3069 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3070 validity for a memory reference accessing memory of mode MODE in
3071 address space AS. */
3074 bool
3075 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3076 addr_space_t as)
3078 #define MAX_RATIO 128
3079 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3080 static vec<sbitmap> valid_mult_list;
3081 sbitmap valid_mult;
3083 if (data_index >= valid_mult_list.length ())
3084 valid_mult_list.safe_grow_cleared (data_index + 1);
3086 valid_mult = valid_mult_list[data_index];
3087 if (!valid_mult)
3089 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3090 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3091 rtx addr;
3092 HOST_WIDE_INT i;
3094 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3095 bitmap_clear (valid_mult);
3096 addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3097 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3099 XEXP (addr, 1) = gen_int_mode (i, address_mode);
3100 if (memory_address_addr_space_p (mode, addr, as))
3101 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3104 if (dump_file && (dump_flags & TDF_DETAILS))
3106 fprintf (dump_file, " allowed multipliers:");
3107 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3108 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3109 fprintf (dump_file, " %d", (int) i);
3110 fprintf (dump_file, "\n");
3111 fprintf (dump_file, "\n");
3114 valid_mult_list[data_index] = valid_mult;
3117 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3118 return false;
3120 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3123 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3124 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3125 variable is omitted. Compute the cost for a memory reference that accesses
3126 a memory location of mode MEM_MODE in address space AS.
3128 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3129 size of MEM_MODE / RATIO) is available. To make this determination, we
3130 look at the size of the increment to be made, which is given in CSTEP.
3131 CSTEP may be zero if the step is unknown.
3132 STMT_AFTER_INC is true iff the statement we're looking at is after the
3133 increment of the original biv.
3135 TODO -- there must be some better way. This all is quite crude. */
3137 typedef struct address_cost_data_s
3139 HOST_WIDE_INT min_offset, max_offset;
3140 unsigned costs[2][2][2][2];
3141 } *address_cost_data;
3144 static comp_cost
3145 get_address_cost (bool symbol_present, bool var_present,
3146 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3147 HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3148 addr_space_t as, bool speed,
3149 bool stmt_after_inc, bool *may_autoinc)
3151 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3152 static vec<address_cost_data> address_cost_data_list;
3153 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3154 address_cost_data data;
3155 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3156 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3157 unsigned cost, acost, complexity;
3158 bool offset_p, ratio_p, autoinc;
3159 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3160 unsigned HOST_WIDE_INT mask;
3161 unsigned bits;
3163 if (data_index >= address_cost_data_list.length ())
3164 address_cost_data_list.safe_grow_cleared (data_index + 1);
3166 data = address_cost_data_list[data_index];
3167 if (!data)
3169 HOST_WIDE_INT i;
3170 HOST_WIDE_INT rat, off = 0;
3171 int old_cse_not_expected, width;
3172 unsigned sym_p, var_p, off_p, rat_p, add_c;
3173 rtx seq, addr, base;
3174 rtx reg0, reg1;
3176 data = (address_cost_data) xcalloc (1, sizeof (*data));
3178 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3180 width = GET_MODE_BITSIZE (address_mode) - 1;
3181 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3182 width = HOST_BITS_PER_WIDE_INT - 1;
3183 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3185 for (i = width; i >= 0; i--)
3187 off = -((unsigned HOST_WIDE_INT) 1 << i);
3188 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3189 if (memory_address_addr_space_p (mem_mode, addr, as))
3190 break;
3192 data->min_offset = (i == -1? 0 : off);
3194 for (i = width; i >= 0; i--)
3196 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3197 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3198 if (memory_address_addr_space_p (mem_mode, addr, as))
3199 break;
3201 if (i == -1)
3202 off = 0;
3203 data->max_offset = off;
3205 if (dump_file && (dump_flags & TDF_DETAILS))
3207 fprintf (dump_file, "get_address_cost:\n");
3208 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3209 GET_MODE_NAME (mem_mode),
3210 data->min_offset);
3211 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3212 GET_MODE_NAME (mem_mode),
3213 data->max_offset);
3216 rat = 1;
3217 for (i = 2; i <= MAX_RATIO; i++)
3218 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3220 rat = i;
3221 break;
3224 /* Compute the cost of various addressing modes. */
3225 acost = 0;
3226 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3227 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3229 if (USE_LOAD_PRE_DECREMENT (mem_mode)
3230 || USE_STORE_PRE_DECREMENT (mem_mode))
3232 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3233 has_predec[mem_mode]
3234 = memory_address_addr_space_p (mem_mode, addr, as);
3236 if (USE_LOAD_POST_DECREMENT (mem_mode)
3237 || USE_STORE_POST_DECREMENT (mem_mode))
3239 addr = gen_rtx_POST_DEC (address_mode, reg0);
3240 has_postdec[mem_mode]
3241 = memory_address_addr_space_p (mem_mode, addr, as);
3243 if (USE_LOAD_PRE_INCREMENT (mem_mode)
3244 || USE_STORE_PRE_DECREMENT (mem_mode))
3246 addr = gen_rtx_PRE_INC (address_mode, reg0);
3247 has_preinc[mem_mode]
3248 = memory_address_addr_space_p (mem_mode, addr, as);
3250 if (USE_LOAD_POST_INCREMENT (mem_mode)
3251 || USE_STORE_POST_INCREMENT (mem_mode))
3253 addr = gen_rtx_POST_INC (address_mode, reg0);
3254 has_postinc[mem_mode]
3255 = memory_address_addr_space_p (mem_mode, addr, as);
3257 for (i = 0; i < 16; i++)
3259 sym_p = i & 1;
3260 var_p = (i >> 1) & 1;
3261 off_p = (i >> 2) & 1;
3262 rat_p = (i >> 3) & 1;
3264 addr = reg0;
3265 if (rat_p)
3266 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3267 gen_int_mode (rat, address_mode));
3269 if (var_p)
3270 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3272 if (sym_p)
3274 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3275 /* ??? We can run into trouble with some backends by presenting
3276 it with symbols which haven't been properly passed through
3277 targetm.encode_section_info. By setting the local bit, we
3278 enhance the probability of things working. */
3279 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3281 if (off_p)
3282 base = gen_rtx_fmt_e (CONST, address_mode,
3283 gen_rtx_fmt_ee
3284 (PLUS, address_mode, base,
3285 gen_int_mode (off, address_mode)));
3287 else if (off_p)
3288 base = gen_int_mode (off, address_mode);
3289 else
3290 base = NULL_RTX;
3292 if (base)
3293 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3295 start_sequence ();
3296 /* To avoid splitting addressing modes, pretend that no cse will
3297 follow. */
3298 old_cse_not_expected = cse_not_expected;
3299 cse_not_expected = true;
3300 addr = memory_address_addr_space (mem_mode, addr, as);
3301 cse_not_expected = old_cse_not_expected;
3302 seq = get_insns ();
3303 end_sequence ();
3305 acost = seq_cost (seq, speed);
3306 acost += address_cost (addr, mem_mode, as, speed);
3308 if (!acost)
3309 acost = 1;
3310 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3313 /* On some targets, it is quite expensive to load symbol to a register,
3314 which makes addresses that contain symbols look much more expensive.
3315 However, the symbol will have to be loaded in any case before the
3316 loop (and quite likely we have it in register already), so it does not
3317 make much sense to penalize them too heavily. So make some final
3318 tweaks for the SYMBOL_PRESENT modes:
3320 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3321 var is cheaper, use this mode with small penalty.
3322 If VAR_PRESENT is true, try whether the mode with
3323 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3324 if this is the case, use it. */
3325 add_c = add_cost (speed, address_mode);
3326 for (i = 0; i < 8; i++)
3328 var_p = i & 1;
3329 off_p = (i >> 1) & 1;
3330 rat_p = (i >> 2) & 1;
3332 acost = data->costs[0][1][off_p][rat_p] + 1;
3333 if (var_p)
3334 acost += add_c;
3336 if (acost < data->costs[1][var_p][off_p][rat_p])
3337 data->costs[1][var_p][off_p][rat_p] = acost;
3340 if (dump_file && (dump_flags & TDF_DETAILS))
3342 fprintf (dump_file, "Address costs:\n");
3344 for (i = 0; i < 16; i++)
3346 sym_p = i & 1;
3347 var_p = (i >> 1) & 1;
3348 off_p = (i >> 2) & 1;
3349 rat_p = (i >> 3) & 1;
3351 fprintf (dump_file, " ");
3352 if (sym_p)
3353 fprintf (dump_file, "sym + ");
3354 if (var_p)
3355 fprintf (dump_file, "var + ");
3356 if (off_p)
3357 fprintf (dump_file, "cst + ");
3358 if (rat_p)
3359 fprintf (dump_file, "rat * ");
3361 acost = data->costs[sym_p][var_p][off_p][rat_p];
3362 fprintf (dump_file, "index costs %d\n", acost);
3364 if (has_predec[mem_mode] || has_postdec[mem_mode]
3365 || has_preinc[mem_mode] || has_postinc[mem_mode])
3366 fprintf (dump_file, " May include autoinc/dec\n");
3367 fprintf (dump_file, "\n");
3370 address_cost_data_list[data_index] = data;
3373 bits = GET_MODE_BITSIZE (address_mode);
3374 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3375 offset &= mask;
3376 if ((offset >> (bits - 1) & 1))
3377 offset |= ~mask;
3378 s_offset = offset;
3380 autoinc = false;
3381 msize = GET_MODE_SIZE (mem_mode);
3382 autoinc_offset = offset;
3383 if (stmt_after_inc)
3384 autoinc_offset += ratio * cstep;
3385 if (symbol_present || var_present || ratio != 1)
3386 autoinc = false;
3387 else if ((has_postinc[mem_mode] && autoinc_offset == 0
3388 && msize == cstep)
3389 || (has_postdec[mem_mode] && autoinc_offset == 0
3390 && msize == -cstep)
3391 || (has_preinc[mem_mode] && autoinc_offset == msize
3392 && msize == cstep)
3393 || (has_predec[mem_mode] && autoinc_offset == -msize
3394 && msize == -cstep))
3395 autoinc = true;
3397 cost = 0;
3398 offset_p = (s_offset != 0
3399 && data->min_offset <= s_offset
3400 && s_offset <= data->max_offset);
3401 ratio_p = (ratio != 1
3402 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3404 if (ratio != 1 && !ratio_p)
3405 cost += mult_by_coeff_cost (ratio, address_mode, speed);
3407 if (s_offset && !offset_p && !symbol_present)
3408 cost += add_cost (speed, address_mode);
3410 if (may_autoinc)
3411 *may_autoinc = autoinc;
3412 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3413 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3414 return new_cost (cost + acost, complexity);
3417 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3418 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3419 calculating the operands of EXPR. Returns true if successful, and returns
3420 the cost in COST. */
3422 static bool
3423 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3424 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3426 comp_cost res;
3427 tree op1 = TREE_OPERAND (expr, 1);
3428 tree cst = TREE_OPERAND (mult, 1);
3429 tree multop = TREE_OPERAND (mult, 0);
3430 int m = exact_log2 (int_cst_value (cst));
3431 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3432 int sa_cost;
3434 if (!(m >= 0 && m < maxm))
3435 return false;
3437 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3438 ? shiftadd_cost (speed, mode, m)
3439 : (mult == op1
3440 ? shiftsub1_cost (speed, mode, m)
3441 : shiftsub0_cost (speed, mode, m)));
3442 res = new_cost (sa_cost, 0);
3443 res = add_costs (res, mult == op1 ? cost0 : cost1);
3445 STRIP_NOPS (multop);
3446 if (!is_gimple_val (multop))
3447 res = add_costs (res, force_expr_to_var_cost (multop, speed));
3449 *cost = res;
3450 return true;
3453 /* Estimates cost of forcing expression EXPR into a variable. */
3455 static comp_cost
3456 force_expr_to_var_cost (tree expr, bool speed)
3458 static bool costs_initialized = false;
3459 static unsigned integer_cost [2];
3460 static unsigned symbol_cost [2];
3461 static unsigned address_cost [2];
3462 tree op0, op1;
3463 comp_cost cost0, cost1, cost;
3464 enum machine_mode mode;
3466 if (!costs_initialized)
3468 tree type = build_pointer_type (integer_type_node);
3469 tree var, addr;
3470 rtx x;
3471 int i;
3473 var = create_tmp_var_raw (integer_type_node, "test_var");
3474 TREE_STATIC (var) = 1;
3475 x = produce_memory_decl_rtl (var, NULL);
3476 SET_DECL_RTL (var, x);
3478 addr = build1 (ADDR_EXPR, type, var);
3481 for (i = 0; i < 2; i++)
3483 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3484 2000), i);
3486 symbol_cost[i] = computation_cost (addr, i) + 1;
3488 address_cost[i]
3489 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3490 if (dump_file && (dump_flags & TDF_DETAILS))
3492 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3493 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3494 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3495 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3496 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3497 fprintf (dump_file, "\n");
3501 costs_initialized = true;
3504 STRIP_NOPS (expr);
3506 if (SSA_VAR_P (expr))
3507 return no_cost;
3509 if (is_gimple_min_invariant (expr))
3511 if (TREE_CODE (expr) == INTEGER_CST)
3512 return new_cost (integer_cost [speed], 0);
3514 if (TREE_CODE (expr) == ADDR_EXPR)
3516 tree obj = TREE_OPERAND (expr, 0);
3518 if (TREE_CODE (obj) == VAR_DECL
3519 || TREE_CODE (obj) == PARM_DECL
3520 || TREE_CODE (obj) == RESULT_DECL)
3521 return new_cost (symbol_cost [speed], 0);
3524 return new_cost (address_cost [speed], 0);
3527 switch (TREE_CODE (expr))
3529 case POINTER_PLUS_EXPR:
3530 case PLUS_EXPR:
3531 case MINUS_EXPR:
3532 case MULT_EXPR:
3533 op0 = TREE_OPERAND (expr, 0);
3534 op1 = TREE_OPERAND (expr, 1);
3535 STRIP_NOPS (op0);
3536 STRIP_NOPS (op1);
3538 if (is_gimple_val (op0))
3539 cost0 = no_cost;
3540 else
3541 cost0 = force_expr_to_var_cost (op0, speed);
3543 if (is_gimple_val (op1))
3544 cost1 = no_cost;
3545 else
3546 cost1 = force_expr_to_var_cost (op1, speed);
3548 break;
3550 case NEGATE_EXPR:
3551 op0 = TREE_OPERAND (expr, 0);
3552 STRIP_NOPS (op0);
3553 op1 = NULL_TREE;
3555 if (is_gimple_val (op0))
3556 cost0 = no_cost;
3557 else
3558 cost0 = force_expr_to_var_cost (op0, speed);
3560 cost1 = no_cost;
3561 break;
3563 default:
3564 /* Just an arbitrary value, FIXME. */
3565 return new_cost (target_spill_cost[speed], 0);
3568 mode = TYPE_MODE (TREE_TYPE (expr));
3569 switch (TREE_CODE (expr))
3571 case POINTER_PLUS_EXPR:
3572 case PLUS_EXPR:
3573 case MINUS_EXPR:
3574 case NEGATE_EXPR:
3575 cost = new_cost (add_cost (speed, mode), 0);
3576 if (TREE_CODE (expr) != NEGATE_EXPR)
3578 tree mult = NULL_TREE;
3579 comp_cost sa_cost;
3580 if (TREE_CODE (op1) == MULT_EXPR)
3581 mult = op1;
3582 else if (TREE_CODE (op0) == MULT_EXPR)
3583 mult = op0;
3585 if (mult != NULL_TREE
3586 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3587 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3588 speed, &sa_cost))
3589 return sa_cost;
3591 break;
3593 case MULT_EXPR:
3594 if (cst_and_fits_in_hwi (op0))
3595 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3596 mode, speed), 0);
3597 else if (cst_and_fits_in_hwi (op1))
3598 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3599 mode, speed), 0);
3600 else
3601 return new_cost (target_spill_cost [speed], 0);
3602 break;
3604 default:
3605 gcc_unreachable ();
3608 cost = add_costs (cost, cost0);
3609 cost = add_costs (cost, cost1);
3611 /* Bound the cost by target_spill_cost. The parts of complicated
3612 computations often are either loop invariant or at least can
3613 be shared between several iv uses, so letting this grow without
3614 limits would not give reasonable results. */
3615 if (cost.cost > (int) target_spill_cost [speed])
3616 cost.cost = target_spill_cost [speed];
3618 return cost;
3621 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3622 invariants the computation depends on. */
3624 static comp_cost
3625 force_var_cost (struct ivopts_data *data,
3626 tree expr, bitmap *depends_on)
3628 if (depends_on)
3630 fd_ivopts_data = data;
3631 walk_tree (&expr, find_depends, depends_on, NULL);
3634 return force_expr_to_var_cost (expr, data->speed);
3637 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3638 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3639 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3640 invariants the computation depends on. */
3642 static comp_cost
3643 split_address_cost (struct ivopts_data *data,
3644 tree addr, bool *symbol_present, bool *var_present,
3645 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3647 tree core;
3648 HOST_WIDE_INT bitsize;
3649 HOST_WIDE_INT bitpos;
3650 tree toffset;
3651 enum machine_mode mode;
3652 int unsignedp, volatilep;
3654 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3655 &unsignedp, &volatilep, false);
3657 if (toffset != 0
3658 || bitpos % BITS_PER_UNIT != 0
3659 || TREE_CODE (core) != VAR_DECL)
3661 *symbol_present = false;
3662 *var_present = true;
3663 fd_ivopts_data = data;
3664 walk_tree (&addr, find_depends, depends_on, NULL);
3665 return new_cost (target_spill_cost[data->speed], 0);
3668 *offset += bitpos / BITS_PER_UNIT;
3669 if (TREE_STATIC (core)
3670 || DECL_EXTERNAL (core))
3672 *symbol_present = true;
3673 *var_present = false;
3674 return no_cost;
3677 *symbol_present = false;
3678 *var_present = true;
3679 return no_cost;
3682 /* Estimates cost of expressing difference of addresses E1 - E2 as
3683 var + symbol + offset. The value of offset is added to OFFSET,
3684 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3685 part is missing. DEPENDS_ON is a set of the invariants the computation
3686 depends on. */
3688 static comp_cost
3689 ptr_difference_cost (struct ivopts_data *data,
3690 tree e1, tree e2, bool *symbol_present, bool *var_present,
3691 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3693 HOST_WIDE_INT diff = 0;
3694 aff_tree aff_e1, aff_e2;
3695 tree type;
3697 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3699 if (ptr_difference_const (e1, e2, &diff))
3701 *offset += diff;
3702 *symbol_present = false;
3703 *var_present = false;
3704 return no_cost;
3707 if (integer_zerop (e2))
3708 return split_address_cost (data, TREE_OPERAND (e1, 0),
3709 symbol_present, var_present, offset, depends_on);
3711 *symbol_present = false;
3712 *var_present = true;
3714 type = signed_type_for (TREE_TYPE (e1));
3715 tree_to_aff_combination (e1, type, &aff_e1);
3716 tree_to_aff_combination (e2, type, &aff_e2);
3717 aff_combination_scale (&aff_e2, double_int_minus_one);
3718 aff_combination_add (&aff_e1, &aff_e2);
3720 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3723 /* Estimates cost of expressing difference E1 - E2 as
3724 var + symbol + offset. The value of offset is added to OFFSET,
3725 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3726 part is missing. DEPENDS_ON is a set of the invariants the computation
3727 depends on. */
3729 static comp_cost
3730 difference_cost (struct ivopts_data *data,
3731 tree e1, tree e2, bool *symbol_present, bool *var_present,
3732 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3734 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3735 unsigned HOST_WIDE_INT off1, off2;
3736 aff_tree aff_e1, aff_e2;
3737 tree type;
3739 e1 = strip_offset (e1, &off1);
3740 e2 = strip_offset (e2, &off2);
3741 *offset += off1 - off2;
3743 STRIP_NOPS (e1);
3744 STRIP_NOPS (e2);
3746 if (TREE_CODE (e1) == ADDR_EXPR)
3747 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3748 offset, depends_on);
3749 *symbol_present = false;
3751 if (operand_equal_p (e1, e2, 0))
3753 *var_present = false;
3754 return no_cost;
3757 *var_present = true;
3759 if (integer_zerop (e2))
3760 return force_var_cost (data, e1, depends_on);
3762 if (integer_zerop (e1))
3764 comp_cost cost = force_var_cost (data, e2, depends_on);
3765 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3766 return cost;
3769 type = signed_type_for (TREE_TYPE (e1));
3770 tree_to_aff_combination (e1, type, &aff_e1);
3771 tree_to_aff_combination (e2, type, &aff_e2);
3772 aff_combination_scale (&aff_e2, double_int_minus_one);
3773 aff_combination_add (&aff_e1, &aff_e2);
3775 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3778 /* Returns true if AFF1 and AFF2 are identical. */
3780 static bool
3781 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3783 unsigned i;
3785 if (aff1->n != aff2->n)
3786 return false;
3788 for (i = 0; i < aff1->n; i++)
3790 if (aff1->elts[i].coef != aff2->elts[i].coef)
3791 return false;
3793 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3794 return false;
3796 return true;
3799 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
3801 static int
3802 get_expr_id (struct ivopts_data *data, tree expr)
3804 struct iv_inv_expr_ent ent;
3805 struct iv_inv_expr_ent **slot;
3807 ent.expr = expr;
3808 ent.hash = iterative_hash_expr (expr, 0);
3809 slot = (struct iv_inv_expr_ent **) htab_find_slot (data->inv_expr_tab,
3810 &ent, INSERT);
3811 if (*slot)
3812 return (*slot)->id;
3814 *slot = XNEW (struct iv_inv_expr_ent);
3815 (*slot)->expr = expr;
3816 (*slot)->hash = ent.hash;
3817 (*slot)->id = data->inv_expr_id++;
3818 return (*slot)->id;
3821 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3822 requires a new compiler generated temporary. Returns -1 otherwise.
3823 ADDRESS_P is a flag indicating if the expression is for address
3824 computation. */
3826 static int
3827 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3828 tree cbase, HOST_WIDE_INT ratio,
3829 bool address_p)
3831 aff_tree ubase_aff, cbase_aff;
3832 tree expr, ub, cb;
3834 STRIP_NOPS (ubase);
3835 STRIP_NOPS (cbase);
3836 ub = ubase;
3837 cb = cbase;
3839 if ((TREE_CODE (ubase) == INTEGER_CST)
3840 && (TREE_CODE (cbase) == INTEGER_CST))
3841 return -1;
3843 /* Strips the constant part. */
3844 if (TREE_CODE (ubase) == PLUS_EXPR
3845 || TREE_CODE (ubase) == MINUS_EXPR
3846 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3848 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3849 ubase = TREE_OPERAND (ubase, 0);
3852 /* Strips the constant part. */
3853 if (TREE_CODE (cbase) == PLUS_EXPR
3854 || TREE_CODE (cbase) == MINUS_EXPR
3855 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3857 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3858 cbase = TREE_OPERAND (cbase, 0);
3861 if (address_p)
3863 if (((TREE_CODE (ubase) == SSA_NAME)
3864 || (TREE_CODE (ubase) == ADDR_EXPR
3865 && is_gimple_min_invariant (ubase)))
3866 && (TREE_CODE (cbase) == INTEGER_CST))
3867 return -1;
3869 if (((TREE_CODE (cbase) == SSA_NAME)
3870 || (TREE_CODE (cbase) == ADDR_EXPR
3871 && is_gimple_min_invariant (cbase)))
3872 && (TREE_CODE (ubase) == INTEGER_CST))
3873 return -1;
3876 if (ratio == 1)
3878 if(operand_equal_p (ubase, cbase, 0))
3879 return -1;
3881 if (TREE_CODE (ubase) == ADDR_EXPR
3882 && TREE_CODE (cbase) == ADDR_EXPR)
3884 tree usym, csym;
3886 usym = TREE_OPERAND (ubase, 0);
3887 csym = TREE_OPERAND (cbase, 0);
3888 if (TREE_CODE (usym) == ARRAY_REF)
3890 tree ind = TREE_OPERAND (usym, 1);
3891 if (TREE_CODE (ind) == INTEGER_CST
3892 && host_integerp (ind, 0)
3893 && TREE_INT_CST_LOW (ind) == 0)
3894 usym = TREE_OPERAND (usym, 0);
3896 if (TREE_CODE (csym) == ARRAY_REF)
3898 tree ind = TREE_OPERAND (csym, 1);
3899 if (TREE_CODE (ind) == INTEGER_CST
3900 && host_integerp (ind, 0)
3901 && TREE_INT_CST_LOW (ind) == 0)
3902 csym = TREE_OPERAND (csym, 0);
3904 if (operand_equal_p (usym, csym, 0))
3905 return -1;
3907 /* Now do more complex comparison */
3908 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
3909 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
3910 if (compare_aff_trees (&ubase_aff, &cbase_aff))
3911 return -1;
3914 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
3915 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
3917 aff_combination_scale (&cbase_aff, double_int::from_shwi (-1 * ratio));
3918 aff_combination_add (&ubase_aff, &cbase_aff);
3919 expr = aff_combination_to_tree (&ubase_aff);
3920 return get_expr_id (data, expr);
3925 /* Determines the cost of the computation by that USE is expressed
3926 from induction variable CAND. If ADDRESS_P is true, we just need
3927 to create an address from it, otherwise we want to get it into
3928 register. A set of invariants we depend on is stored in
3929 DEPENDS_ON. AT is the statement at that the value is computed.
3930 If CAN_AUTOINC is nonnull, use it to record whether autoinc
3931 addressing is likely. */
3933 static comp_cost
3934 get_computation_cost_at (struct ivopts_data *data,
3935 struct iv_use *use, struct iv_cand *cand,
3936 bool address_p, bitmap *depends_on, gimple at,
3937 bool *can_autoinc,
3938 int *inv_expr_id)
3940 tree ubase = use->iv->base, ustep = use->iv->step;
3941 tree cbase, cstep;
3942 tree utype = TREE_TYPE (ubase), ctype;
3943 unsigned HOST_WIDE_INT cstepi, offset = 0;
3944 HOST_WIDE_INT ratio, aratio;
3945 bool var_present, symbol_present, stmt_is_after_inc;
3946 comp_cost cost;
3947 double_int rat;
3948 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3949 enum machine_mode mem_mode = (address_p
3950 ? TYPE_MODE (TREE_TYPE (*use->op_p))
3951 : VOIDmode);
3953 *depends_on = NULL;
3955 /* Only consider real candidates. */
3956 if (!cand->iv)
3957 return infinite_cost;
3959 cbase = cand->iv->base;
3960 cstep = cand->iv->step;
3961 ctype = TREE_TYPE (cbase);
3963 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3965 /* We do not have a precision to express the values of use. */
3966 return infinite_cost;
3969 if (address_p
3970 || (use->iv->base_object
3971 && cand->iv->base_object
3972 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
3973 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
3975 /* Do not try to express address of an object with computation based
3976 on address of a different object. This may cause problems in rtl
3977 level alias analysis (that does not expect this to be happening,
3978 as this is illegal in C), and would be unlikely to be useful
3979 anyway. */
3980 if (use->iv->base_object
3981 && cand->iv->base_object
3982 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3983 return infinite_cost;
3986 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3988 /* TODO -- add direct handling of this case. */
3989 goto fallback;
3992 /* CSTEPI is removed from the offset in case statement is after the
3993 increment. If the step is not constant, we use zero instead.
3994 This is a bit imprecise (there is the extra addition), but
3995 redundancy elimination is likely to transform the code so that
3996 it uses value of the variable before increment anyway,
3997 so it is not that much unrealistic. */
3998 if (cst_and_fits_in_hwi (cstep))
3999 cstepi = int_cst_value (cstep);
4000 else
4001 cstepi = 0;
4003 if (!constant_multiple_of (ustep, cstep, &rat))
4004 return infinite_cost;
4006 if (rat.fits_shwi ())
4007 ratio = rat.to_shwi ();
4008 else
4009 return infinite_cost;
4011 STRIP_NOPS (cbase);
4012 ctype = TREE_TYPE (cbase);
4014 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4016 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4017 or ratio == 1, it is better to handle this like
4019 ubase - ratio * cbase + ratio * var
4021 (also holds in the case ratio == -1, TODO. */
4023 if (cst_and_fits_in_hwi (cbase))
4025 offset = - ratio * int_cst_value (cbase);
4026 cost = difference_cost (data,
4027 ubase, build_int_cst (utype, 0),
4028 &symbol_present, &var_present, &offset,
4029 depends_on);
4030 cost.cost /= avg_loop_niter (data->current_loop);
4032 else if (ratio == 1)
4034 tree real_cbase = cbase;
4036 /* Check to see if any adjustment is needed. */
4037 if (cstepi == 0 && stmt_is_after_inc)
4039 aff_tree real_cbase_aff;
4040 aff_tree cstep_aff;
4042 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4043 &real_cbase_aff);
4044 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4046 aff_combination_add (&real_cbase_aff, &cstep_aff);
4047 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4050 cost = difference_cost (data,
4051 ubase, real_cbase,
4052 &symbol_present, &var_present, &offset,
4053 depends_on);
4054 cost.cost /= avg_loop_niter (data->current_loop);
4056 else if (address_p
4057 && !POINTER_TYPE_P (ctype)
4058 && multiplier_allowed_in_address_p
4059 (ratio, mem_mode,
4060 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4062 cbase
4063 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4064 cost = difference_cost (data,
4065 ubase, cbase,
4066 &symbol_present, &var_present, &offset,
4067 depends_on);
4068 cost.cost /= avg_loop_niter (data->current_loop);
4070 else
4072 cost = force_var_cost (data, cbase, depends_on);
4073 cost = add_costs (cost,
4074 difference_cost (data,
4075 ubase, build_int_cst (utype, 0),
4076 &symbol_present, &var_present,
4077 &offset, depends_on));
4078 cost.cost /= avg_loop_niter (data->current_loop);
4079 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4082 if (inv_expr_id)
4084 *inv_expr_id =
4085 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4086 /* Clear depends on. */
4087 if (*inv_expr_id != -1 && depends_on && *depends_on)
4088 bitmap_clear (*depends_on);
4091 /* If we are after the increment, the value of the candidate is higher by
4092 one iteration. */
4093 if (stmt_is_after_inc)
4094 offset -= ratio * cstepi;
4096 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4097 (symbol/var1/const parts may be omitted). If we are looking for an
4098 address, find the cost of addressing this. */
4099 if (address_p)
4100 return add_costs (cost,
4101 get_address_cost (symbol_present, var_present,
4102 offset, ratio, cstepi,
4103 mem_mode,
4104 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4105 speed, stmt_is_after_inc,
4106 can_autoinc));
4108 /* Otherwise estimate the costs for computing the expression. */
4109 if (!symbol_present && !var_present && !offset)
4111 if (ratio != 1)
4112 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4113 return cost;
4116 /* Symbol + offset should be compile-time computable so consider that they
4117 are added once to the variable, if present. */
4118 if (var_present && (symbol_present || offset))
4119 cost.cost += adjust_setup_cost (data,
4120 add_cost (speed, TYPE_MODE (ctype)));
4122 /* Having offset does not affect runtime cost in case it is added to
4123 symbol, but it increases complexity. */
4124 if (offset)
4125 cost.complexity++;
4127 cost.cost += add_cost (speed, TYPE_MODE (ctype));
4129 aratio = ratio > 0 ? ratio : -ratio;
4130 if (aratio != 1)
4131 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4132 return cost;
4134 fallback:
4135 if (can_autoinc)
4136 *can_autoinc = false;
4139 /* Just get the expression, expand it and measure the cost. */
4140 tree comp = get_computation_at (data->current_loop, use, cand, at);
4142 if (!comp)
4143 return infinite_cost;
4145 if (address_p)
4146 comp = build_simple_mem_ref (comp);
4148 return new_cost (computation_cost (comp, speed), 0);
4152 /* Determines the cost of the computation by that USE is expressed
4153 from induction variable CAND. If ADDRESS_P is true, we just need
4154 to create an address from it, otherwise we want to get it into
4155 register. A set of invariants we depend on is stored in
4156 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4157 autoinc addressing is likely. */
4159 static comp_cost
4160 get_computation_cost (struct ivopts_data *data,
4161 struct iv_use *use, struct iv_cand *cand,
4162 bool address_p, bitmap *depends_on,
4163 bool *can_autoinc, int *inv_expr_id)
4165 return get_computation_cost_at (data,
4166 use, cand, address_p, depends_on, use->stmt,
4167 can_autoinc, inv_expr_id);
4170 /* Determines cost of basing replacement of USE on CAND in a generic
4171 expression. */
4173 static bool
4174 determine_use_iv_cost_generic (struct ivopts_data *data,
4175 struct iv_use *use, struct iv_cand *cand)
4177 bitmap depends_on;
4178 comp_cost cost;
4179 int inv_expr_id = -1;
4181 /* The simple case first -- if we need to express value of the preserved
4182 original biv, the cost is 0. This also prevents us from counting the
4183 cost of increment twice -- once at this use and once in the cost of
4184 the candidate. */
4185 if (cand->pos == IP_ORIGINAL
4186 && cand->incremented_at == use->stmt)
4188 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4189 ERROR_MARK, -1);
4190 return true;
4193 cost = get_computation_cost (data, use, cand, false, &depends_on,
4194 NULL, &inv_expr_id);
4196 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4197 inv_expr_id);
4199 return !infinite_cost_p (cost);
4202 /* Determines cost of basing replacement of USE on CAND in an address. */
4204 static bool
4205 determine_use_iv_cost_address (struct ivopts_data *data,
4206 struct iv_use *use, struct iv_cand *cand)
4208 bitmap depends_on;
4209 bool can_autoinc;
4210 int inv_expr_id = -1;
4211 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4212 &can_autoinc, &inv_expr_id);
4214 if (cand->ainc_use == use)
4216 if (can_autoinc)
4217 cost.cost -= cand->cost_step;
4218 /* If we generated the candidate solely for exploiting autoincrement
4219 opportunities, and it turns out it can't be used, set the cost to
4220 infinity to make sure we ignore it. */
4221 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4222 cost = infinite_cost;
4224 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4225 inv_expr_id);
4227 return !infinite_cost_p (cost);
4230 /* Computes value of candidate CAND at position AT in iteration NITER, and
4231 stores it to VAL. */
4233 static void
4234 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4235 aff_tree *val)
4237 aff_tree step, delta, nit;
4238 struct iv *iv = cand->iv;
4239 tree type = TREE_TYPE (iv->base);
4240 tree steptype = type;
4241 if (POINTER_TYPE_P (type))
4242 steptype = sizetype;
4244 tree_to_aff_combination (iv->step, steptype, &step);
4245 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4246 aff_combination_convert (&nit, steptype);
4247 aff_combination_mult (&nit, &step, &delta);
4248 if (stmt_after_increment (loop, cand, at))
4249 aff_combination_add (&delta, &step);
4251 tree_to_aff_combination (iv->base, type, val);
4252 aff_combination_add (val, &delta);
4255 /* Returns period of induction variable iv. */
4257 static tree
4258 iv_period (struct iv *iv)
4260 tree step = iv->step, period, type;
4261 tree pow2div;
4263 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4265 type = unsigned_type_for (TREE_TYPE (step));
4266 /* Period of the iv is lcm (step, type_range)/step -1,
4267 i.e., N*type_range/step - 1. Since type range is power
4268 of two, N == (step >> num_of_ending_zeros_binary (step),
4269 so the final result is
4271 (type_range >> num_of_ending_zeros_binary (step)) - 1
4274 pow2div = num_ending_zeros (step);
4276 period = build_low_bits_mask (type,
4277 (TYPE_PRECISION (type)
4278 - tree_low_cst (pow2div, 1)));
4280 return period;
4283 /* Returns the comparison operator used when eliminating the iv USE. */
4285 static enum tree_code
4286 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4288 struct loop *loop = data->current_loop;
4289 basic_block ex_bb;
4290 edge exit;
4292 ex_bb = gimple_bb (use->stmt);
4293 exit = EDGE_SUCC (ex_bb, 0);
4294 if (flow_bb_inside_loop_p (loop, exit->dest))
4295 exit = EDGE_SUCC (ex_bb, 1);
4297 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4300 static tree
4301 strip_wrap_conserving_type_conversions (tree exp)
4303 while (tree_ssa_useless_type_conversion (exp)
4304 && (nowrap_type_p (TREE_TYPE (exp))
4305 == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0)))))
4306 exp = TREE_OPERAND (exp, 0);
4307 return exp;
4310 /* Walk the SSA form and check whether E == WHAT. Fairly simplistic, we
4311 check for an exact match. */
4313 static bool
4314 expr_equal_p (tree e, tree what)
4316 gimple stmt;
4317 enum tree_code code;
4319 e = strip_wrap_conserving_type_conversions (e);
4320 what = strip_wrap_conserving_type_conversions (what);
4322 code = TREE_CODE (what);
4323 if (TREE_TYPE (e) != TREE_TYPE (what))
4324 return false;
4326 if (operand_equal_p (e, what, 0))
4327 return true;
4329 if (TREE_CODE (e) != SSA_NAME)
4330 return false;
4332 stmt = SSA_NAME_DEF_STMT (e);
4333 if (gimple_code (stmt) != GIMPLE_ASSIGN
4334 || gimple_assign_rhs_code (stmt) != code)
4335 return false;
4337 switch (get_gimple_rhs_class (code))
4339 case GIMPLE_BINARY_RHS:
4340 if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1)))
4341 return false;
4342 /* Fallthru. */
4344 case GIMPLE_UNARY_RHS:
4345 case GIMPLE_SINGLE_RHS:
4346 return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0));
4347 default:
4348 return false;
4352 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4353 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4354 calculation is performed in non-wrapping type.
4356 TODO: More generally, we could test for the situation that
4357 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4358 This would require knowing the sign of OFFSET.
4360 Also, we only look for the first addition in the computation of BASE.
4361 More complex analysis would be better, but introducing it just for
4362 this optimization seems like an overkill. */
4364 static bool
4365 difference_cannot_overflow_p (tree base, tree offset)
4367 enum tree_code code;
4368 tree e1, e2;
4370 if (!nowrap_type_p (TREE_TYPE (base)))
4371 return false;
4373 base = expand_simple_operations (base);
4375 if (TREE_CODE (base) == SSA_NAME)
4377 gimple stmt = SSA_NAME_DEF_STMT (base);
4379 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4380 return false;
4382 code = gimple_assign_rhs_code (stmt);
4383 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4384 return false;
4386 e1 = gimple_assign_rhs1 (stmt);
4387 e2 = gimple_assign_rhs2 (stmt);
4389 else
4391 code = TREE_CODE (base);
4392 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4393 return false;
4394 e1 = TREE_OPERAND (base, 0);
4395 e2 = TREE_OPERAND (base, 1);
4398 /* TODO: deeper inspection may be necessary to prove the equality. */
4399 switch (code)
4401 case PLUS_EXPR:
4402 return expr_equal_p (e1, offset) || expr_equal_p (e2, offset);
4403 case POINTER_PLUS_EXPR:
4404 return expr_equal_p (e2, offset);
4406 default:
4407 return false;
4411 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4412 comparison with CAND. NITER describes the number of iterations of
4413 the loops. If successful, the comparison in COMP_P is altered accordingly.
4415 We aim to handle the following situation:
4417 sometype *base, *p;
4418 int a, b, i;
4420 i = a;
4421 p = p_0 = base + a;
4425 bla (*p);
4426 p++;
4427 i++;
4429 while (i < b);
4431 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4432 We aim to optimize this to
4434 p = p_0 = base + a;
4437 bla (*p);
4438 p++;
4440 while (p < p_0 - a + b);
4442 This preserves the correctness, since the pointer arithmetics does not
4443 overflow. More precisely:
4445 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4446 overflow in computing it or the values of p.
4447 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4448 overflow. To prove this, we use the fact that p_0 = base + a. */
4450 static bool
4451 iv_elimination_compare_lt (struct ivopts_data *data,
4452 struct iv_cand *cand, enum tree_code *comp_p,
4453 struct tree_niter_desc *niter)
4455 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4456 struct affine_tree_combination nit, tmpa, tmpb;
4457 enum tree_code comp;
4458 HOST_WIDE_INT step;
4460 /* We need to know that the candidate induction variable does not overflow.
4461 While more complex analysis may be used to prove this, for now just
4462 check that the variable appears in the original program and that it
4463 is computed in a type that guarantees no overflows. */
4464 cand_type = TREE_TYPE (cand->iv->base);
4465 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4466 return false;
4468 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4469 the calculation of the BOUND could overflow, making the comparison
4470 invalid. */
4471 if (!data->loop_single_exit_p)
4472 return false;
4474 /* We need to be able to decide whether candidate is increasing or decreasing
4475 in order to choose the right comparison operator. */
4476 if (!cst_and_fits_in_hwi (cand->iv->step))
4477 return false;
4478 step = int_cst_value (cand->iv->step);
4480 /* Check that the number of iterations matches the expected pattern:
4481 a + 1 > b ? 0 : b - a - 1. */
4482 mbz = niter->may_be_zero;
4483 if (TREE_CODE (mbz) == GT_EXPR)
4485 /* Handle a + 1 > b. */
4486 tree op0 = TREE_OPERAND (mbz, 0);
4487 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4489 a = TREE_OPERAND (op0, 0);
4490 b = TREE_OPERAND (mbz, 1);
4492 else
4493 return false;
4495 else if (TREE_CODE (mbz) == LT_EXPR)
4497 tree op1 = TREE_OPERAND (mbz, 1);
4499 /* Handle b < a + 1. */
4500 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4502 a = TREE_OPERAND (op1, 0);
4503 b = TREE_OPERAND (mbz, 0);
4505 else
4506 return false;
4508 else
4509 return false;
4511 /* Expected number of iterations is B - A - 1. Check that it matches
4512 the actual number, i.e., that B - A - NITER = 1. */
4513 tree_to_aff_combination (niter->niter, nit_type, &nit);
4514 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4515 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4516 aff_combination_scale (&nit, double_int_minus_one);
4517 aff_combination_scale (&tmpa, double_int_minus_one);
4518 aff_combination_add (&tmpb, &tmpa);
4519 aff_combination_add (&tmpb, &nit);
4520 if (tmpb.n != 0 || tmpb.offset != double_int_one)
4521 return false;
4523 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4524 overflow. */
4525 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4526 cand->iv->step,
4527 fold_convert (TREE_TYPE (cand->iv->step), a));
4528 if (!difference_cannot_overflow_p (cand->iv->base, offset))
4529 return false;
4531 /* Determine the new comparison operator. */
4532 comp = step < 0 ? GT_EXPR : LT_EXPR;
4533 if (*comp_p == NE_EXPR)
4534 *comp_p = comp;
4535 else if (*comp_p == EQ_EXPR)
4536 *comp_p = invert_tree_comparison (comp, false);
4537 else
4538 gcc_unreachable ();
4540 return true;
4543 /* Check whether it is possible to express the condition in USE by comparison
4544 of candidate CAND. If so, store the value compared with to BOUND, and the
4545 comparison operator to COMP. */
4547 static bool
4548 may_eliminate_iv (struct ivopts_data *data,
4549 struct iv_use *use, struct iv_cand *cand, tree *bound,
4550 enum tree_code *comp)
4552 basic_block ex_bb;
4553 edge exit;
4554 tree period;
4555 struct loop *loop = data->current_loop;
4556 aff_tree bnd;
4557 struct tree_niter_desc *desc = NULL;
4559 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4560 return false;
4562 /* For now works only for exits that dominate the loop latch.
4563 TODO: extend to other conditions inside loop body. */
4564 ex_bb = gimple_bb (use->stmt);
4565 if (use->stmt != last_stmt (ex_bb)
4566 || gimple_code (use->stmt) != GIMPLE_COND
4567 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4568 return false;
4570 exit = EDGE_SUCC (ex_bb, 0);
4571 if (flow_bb_inside_loop_p (loop, exit->dest))
4572 exit = EDGE_SUCC (ex_bb, 1);
4573 if (flow_bb_inside_loop_p (loop, exit->dest))
4574 return false;
4576 desc = niter_for_exit (data, exit);
4577 if (!desc)
4578 return false;
4580 /* Determine whether we can use the variable to test the exit condition.
4581 This is the case iff the period of the induction variable is greater
4582 than the number of iterations for which the exit condition is true. */
4583 period = iv_period (cand->iv);
4585 /* If the number of iterations is constant, compare against it directly. */
4586 if (TREE_CODE (desc->niter) == INTEGER_CST)
4588 /* See cand_value_at. */
4589 if (stmt_after_increment (loop, cand, use->stmt))
4591 if (!tree_int_cst_lt (desc->niter, period))
4592 return false;
4594 else
4596 if (tree_int_cst_lt (period, desc->niter))
4597 return false;
4601 /* If not, and if this is the only possible exit of the loop, see whether
4602 we can get a conservative estimate on the number of iterations of the
4603 entire loop and compare against that instead. */
4604 else
4606 double_int period_value, max_niter;
4608 max_niter = desc->max;
4609 if (stmt_after_increment (loop, cand, use->stmt))
4610 max_niter += double_int_one;
4611 period_value = tree_to_double_int (period);
4612 if (max_niter.ugt (period_value))
4614 /* See if we can take advantage of inferred loop bound information. */
4615 if (data->loop_single_exit_p)
4617 if (!max_loop_iterations (loop, &max_niter))
4618 return false;
4619 /* The loop bound is already adjusted by adding 1. */
4620 if (max_niter.ugt (period_value))
4621 return false;
4623 else
4624 return false;
4628 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4630 *bound = aff_combination_to_tree (&bnd);
4631 *comp = iv_elimination_compare (data, use);
4633 /* It is unlikely that computing the number of iterations using division
4634 would be more profitable than keeping the original induction variable. */
4635 if (expression_expensive_p (*bound))
4636 return false;
4638 /* Sometimes, it is possible to handle the situation that the number of
4639 iterations may be zero unless additional assumtions by using <
4640 instead of != in the exit condition.
4642 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4643 base the exit condition on it. However, that is often too
4644 expensive. */
4645 if (!integer_zerop (desc->may_be_zero))
4646 return iv_elimination_compare_lt (data, cand, comp, desc);
4648 return true;
4651 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4652 be copied, if is is used in the loop body and DATA->body_includes_call. */
4654 static int
4655 parm_decl_cost (struct ivopts_data *data, tree bound)
4657 tree sbound = bound;
4658 STRIP_NOPS (sbound);
4660 if (TREE_CODE (sbound) == SSA_NAME
4661 && SSA_NAME_IS_DEFAULT_DEF (sbound)
4662 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4663 && data->body_includes_call)
4664 return COSTS_N_INSNS (1);
4666 return 0;
4669 /* Determines cost of basing replacement of USE on CAND in a condition. */
4671 static bool
4672 determine_use_iv_cost_condition (struct ivopts_data *data,
4673 struct iv_use *use, struct iv_cand *cand)
4675 tree bound = NULL_TREE;
4676 struct iv *cmp_iv;
4677 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4678 comp_cost elim_cost, express_cost, cost, bound_cost;
4679 bool ok;
4680 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4681 tree *control_var, *bound_cst;
4682 enum tree_code comp = ERROR_MARK;
4684 /* Only consider real candidates. */
4685 if (!cand->iv)
4687 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4688 ERROR_MARK, -1);
4689 return false;
4692 /* Try iv elimination. */
4693 if (may_eliminate_iv (data, use, cand, &bound, &comp))
4695 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4696 if (elim_cost.cost == 0)
4697 elim_cost.cost = parm_decl_cost (data, bound);
4698 else if (TREE_CODE (bound) == INTEGER_CST)
4699 elim_cost.cost = 0;
4700 /* If we replace a loop condition 'i < n' with 'p < base + n',
4701 depends_on_elim will have 'base' and 'n' set, which implies
4702 that both 'base' and 'n' will be live during the loop. More likely,
4703 'base + n' will be loop invariant, resulting in only one live value
4704 during the loop. So in that case we clear depends_on_elim and set
4705 elim_inv_expr_id instead. */
4706 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4708 elim_inv_expr_id = get_expr_id (data, bound);
4709 bitmap_clear (depends_on_elim);
4711 /* The bound is a loop invariant, so it will be only computed
4712 once. */
4713 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4715 else
4716 elim_cost = infinite_cost;
4718 /* Try expressing the original giv. If it is compared with an invariant,
4719 note that we cannot get rid of it. */
4720 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4721 NULL, &cmp_iv);
4722 gcc_assert (ok);
4724 /* When the condition is a comparison of the candidate IV against
4725 zero, prefer this IV.
4727 TODO: The constant that we're subtracting from the cost should
4728 be target-dependent. This information should be added to the
4729 target costs for each backend. */
4730 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4731 && integer_zerop (*bound_cst)
4732 && (operand_equal_p (*control_var, cand->var_after, 0)
4733 || operand_equal_p (*control_var, cand->var_before, 0)))
4734 elim_cost.cost -= 1;
4736 express_cost = get_computation_cost (data, use, cand, false,
4737 &depends_on_express, NULL,
4738 &express_inv_expr_id);
4739 fd_ivopts_data = data;
4740 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4742 /* Count the cost of the original bound as well. */
4743 bound_cost = force_var_cost (data, *bound_cst, NULL);
4744 if (bound_cost.cost == 0)
4745 bound_cost.cost = parm_decl_cost (data, *bound_cst);
4746 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4747 bound_cost.cost = 0;
4748 express_cost.cost += bound_cost.cost;
4750 /* Choose the better approach, preferring the eliminated IV. */
4751 if (compare_costs (elim_cost, express_cost) <= 0)
4753 cost = elim_cost;
4754 depends_on = depends_on_elim;
4755 depends_on_elim = NULL;
4756 inv_expr_id = elim_inv_expr_id;
4758 else
4760 cost = express_cost;
4761 depends_on = depends_on_express;
4762 depends_on_express = NULL;
4763 bound = NULL_TREE;
4764 comp = ERROR_MARK;
4765 inv_expr_id = express_inv_expr_id;
4768 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4770 if (depends_on_elim)
4771 BITMAP_FREE (depends_on_elim);
4772 if (depends_on_express)
4773 BITMAP_FREE (depends_on_express);
4775 return !infinite_cost_p (cost);
4778 /* Determines cost of basing replacement of USE on CAND. Returns false
4779 if USE cannot be based on CAND. */
4781 static bool
4782 determine_use_iv_cost (struct ivopts_data *data,
4783 struct iv_use *use, struct iv_cand *cand)
4785 switch (use->type)
4787 case USE_NONLINEAR_EXPR:
4788 return determine_use_iv_cost_generic (data, use, cand);
4790 case USE_ADDRESS:
4791 return determine_use_iv_cost_address (data, use, cand);
4793 case USE_COMPARE:
4794 return determine_use_iv_cost_condition (data, use, cand);
4796 default:
4797 gcc_unreachable ();
4801 /* Return true if get_computation_cost indicates that autoincrement is
4802 a possibility for the pair of USE and CAND, false otherwise. */
4804 static bool
4805 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4806 struct iv_cand *cand)
4808 bitmap depends_on;
4809 bool can_autoinc;
4810 comp_cost cost;
4812 if (use->type != USE_ADDRESS)
4813 return false;
4815 cost = get_computation_cost (data, use, cand, true, &depends_on,
4816 &can_autoinc, NULL);
4818 BITMAP_FREE (depends_on);
4820 return !infinite_cost_p (cost) && can_autoinc;
4823 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4824 use that allows autoincrement, and set their AINC_USE if possible. */
4826 static void
4827 set_autoinc_for_original_candidates (struct ivopts_data *data)
4829 unsigned i, j;
4831 for (i = 0; i < n_iv_cands (data); i++)
4833 struct iv_cand *cand = iv_cand (data, i);
4834 struct iv_use *closest = NULL;
4835 if (cand->pos != IP_ORIGINAL)
4836 continue;
4837 for (j = 0; j < n_iv_uses (data); j++)
4839 struct iv_use *use = iv_use (data, j);
4840 unsigned uid = gimple_uid (use->stmt);
4841 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4842 || uid > gimple_uid (cand->incremented_at))
4843 continue;
4844 if (closest == NULL || uid > gimple_uid (closest->stmt))
4845 closest = use;
4847 if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4848 continue;
4849 cand->ainc_use = closest;
4853 /* Finds the candidates for the induction variables. */
4855 static void
4856 find_iv_candidates (struct ivopts_data *data)
4858 /* Add commonly used ivs. */
4859 add_standard_iv_candidates (data);
4861 /* Add old induction variables. */
4862 add_old_ivs_candidates (data);
4864 /* Add induction variables derived from uses. */
4865 add_derived_ivs_candidates (data);
4867 set_autoinc_for_original_candidates (data);
4869 /* Record the important candidates. */
4870 record_important_candidates (data);
4873 /* Determines costs of basing the use of the iv on an iv candidate. */
4875 static void
4876 determine_use_iv_costs (struct ivopts_data *data)
4878 unsigned i, j;
4879 struct iv_use *use;
4880 struct iv_cand *cand;
4881 bitmap to_clear = BITMAP_ALLOC (NULL);
4883 alloc_use_cost_map (data);
4885 for (i = 0; i < n_iv_uses (data); i++)
4887 use = iv_use (data, i);
4889 if (data->consider_all_candidates)
4891 for (j = 0; j < n_iv_cands (data); j++)
4893 cand = iv_cand (data, j);
4894 determine_use_iv_cost (data, use, cand);
4897 else
4899 bitmap_iterator bi;
4901 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4903 cand = iv_cand (data, j);
4904 if (!determine_use_iv_cost (data, use, cand))
4905 bitmap_set_bit (to_clear, j);
4908 /* Remove the candidates for that the cost is infinite from
4909 the list of related candidates. */
4910 bitmap_and_compl_into (use->related_cands, to_clear);
4911 bitmap_clear (to_clear);
4915 BITMAP_FREE (to_clear);
4917 if (dump_file && (dump_flags & TDF_DETAILS))
4919 fprintf (dump_file, "Use-candidate costs:\n");
4921 for (i = 0; i < n_iv_uses (data); i++)
4923 use = iv_use (data, i);
4925 fprintf (dump_file, "Use %d:\n", i);
4926 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
4927 for (j = 0; j < use->n_map_members; j++)
4929 if (!use->cost_map[j].cand
4930 || infinite_cost_p (use->cost_map[j].cost))
4931 continue;
4933 fprintf (dump_file, " %d\t%d\t%d\t",
4934 use->cost_map[j].cand->id,
4935 use->cost_map[j].cost.cost,
4936 use->cost_map[j].cost.complexity);
4937 if (use->cost_map[j].depends_on)
4938 bitmap_print (dump_file,
4939 use->cost_map[j].depends_on, "","");
4940 if (use->cost_map[j].inv_expr_id != -1)
4941 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
4942 fprintf (dump_file, "\n");
4945 fprintf (dump_file, "\n");
4947 fprintf (dump_file, "\n");
4951 /* Determines cost of the candidate CAND. */
4953 static void
4954 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4956 comp_cost cost_base;
4957 unsigned cost, cost_step;
4958 tree base;
4960 if (!cand->iv)
4962 cand->cost = 0;
4963 return;
4966 /* There are two costs associated with the candidate -- its increment
4967 and its initialization. The second is almost negligible for any loop
4968 that rolls enough, so we take it just very little into account. */
4970 base = cand->iv->base;
4971 cost_base = force_var_cost (data, base, NULL);
4972 /* It will be exceptional that the iv register happens to be initialized with
4973 the proper value at no cost. In general, there will at least be a regcopy
4974 or a const set. */
4975 if (cost_base.cost == 0)
4976 cost_base.cost = COSTS_N_INSNS (1);
4977 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
4979 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
4981 /* Prefer the original ivs unless we may gain something by replacing it.
4982 The reason is to make debugging simpler; so this is not relevant for
4983 artificial ivs created by other optimization passes. */
4984 if (cand->pos != IP_ORIGINAL
4985 || !SSA_NAME_VAR (cand->var_before)
4986 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4987 cost++;
4989 /* Prefer not to insert statements into latch unless there are some
4990 already (so that we do not create unnecessary jumps). */
4991 if (cand->pos == IP_END
4992 && empty_block_p (ip_end_pos (data->current_loop)))
4993 cost++;
4995 cand->cost = cost;
4996 cand->cost_step = cost_step;
4999 /* Determines costs of computation of the candidates. */
5001 static void
5002 determine_iv_costs (struct ivopts_data *data)
5004 unsigned i;
5006 if (dump_file && (dump_flags & TDF_DETAILS))
5008 fprintf (dump_file, "Candidate costs:\n");
5009 fprintf (dump_file, " cand\tcost\n");
5012 for (i = 0; i < n_iv_cands (data); i++)
5014 struct iv_cand *cand = iv_cand (data, i);
5016 determine_iv_cost (data, cand);
5018 if (dump_file && (dump_flags & TDF_DETAILS))
5019 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5022 if (dump_file && (dump_flags & TDF_DETAILS))
5023 fprintf (dump_file, "\n");
5026 /* Calculates cost for having SIZE induction variables. */
5028 static unsigned
5029 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5031 /* We add size to the cost, so that we prefer eliminating ivs
5032 if possible. */
5033 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5034 data->body_includes_call);
5037 /* For each size of the induction variable set determine the penalty. */
5039 static void
5040 determine_set_costs (struct ivopts_data *data)
5042 unsigned j, n;
5043 gimple phi;
5044 gimple_stmt_iterator psi;
5045 tree op;
5046 struct loop *loop = data->current_loop;
5047 bitmap_iterator bi;
5049 if (dump_file && (dump_flags & TDF_DETAILS))
5051 fprintf (dump_file, "Global costs:\n");
5052 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5053 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5054 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5055 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5058 n = 0;
5059 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5061 phi = gsi_stmt (psi);
5062 op = PHI_RESULT (phi);
5064 if (virtual_operand_p (op))
5065 continue;
5067 if (get_iv (data, op))
5068 continue;
5070 n++;
5073 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5075 struct version_info *info = ver_info (data, j);
5077 if (info->inv_id && info->has_nonlin_use)
5078 n++;
5081 data->regs_used = n;
5082 if (dump_file && (dump_flags & TDF_DETAILS))
5083 fprintf (dump_file, " regs_used %d\n", n);
5085 if (dump_file && (dump_flags & TDF_DETAILS))
5087 fprintf (dump_file, " cost for size:\n");
5088 fprintf (dump_file, " ivs\tcost\n");
5089 for (j = 0; j <= 2 * target_avail_regs; j++)
5090 fprintf (dump_file, " %d\t%d\n", j,
5091 ivopts_global_cost_for_size (data, j));
5092 fprintf (dump_file, "\n");
5096 /* Returns true if A is a cheaper cost pair than B. */
5098 static bool
5099 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5101 int cmp;
5103 if (!a)
5104 return false;
5106 if (!b)
5107 return true;
5109 cmp = compare_costs (a->cost, b->cost);
5110 if (cmp < 0)
5111 return true;
5113 if (cmp > 0)
5114 return false;
5116 /* In case the costs are the same, prefer the cheaper candidate. */
5117 if (a->cand->cost < b->cand->cost)
5118 return true;
5120 return false;
5124 /* Returns candidate by that USE is expressed in IVS. */
5126 static struct cost_pair *
5127 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5129 return ivs->cand_for_use[use->id];
5132 /* Computes the cost field of IVS structure. */
5134 static void
5135 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5137 comp_cost cost = ivs->cand_use_cost;
5139 cost.cost += ivs->cand_cost;
5141 cost.cost += ivopts_global_cost_for_size (data,
5142 ivs->n_regs + ivs->num_used_inv_expr);
5144 ivs->cost = cost;
5147 /* Remove invariants in set INVS to set IVS. */
5149 static void
5150 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5152 bitmap_iterator bi;
5153 unsigned iid;
5155 if (!invs)
5156 return;
5158 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5160 ivs->n_invariant_uses[iid]--;
5161 if (ivs->n_invariant_uses[iid] == 0)
5162 ivs->n_regs--;
5166 /* Set USE not to be expressed by any candidate in IVS. */
5168 static void
5169 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5170 struct iv_use *use)
5172 unsigned uid = use->id, cid;
5173 struct cost_pair *cp;
5175 cp = ivs->cand_for_use[uid];
5176 if (!cp)
5177 return;
5178 cid = cp->cand->id;
5180 ivs->bad_uses++;
5181 ivs->cand_for_use[uid] = NULL;
5182 ivs->n_cand_uses[cid]--;
5184 if (ivs->n_cand_uses[cid] == 0)
5186 bitmap_clear_bit (ivs->cands, cid);
5187 /* Do not count the pseudocandidates. */
5188 if (cp->cand->iv)
5189 ivs->n_regs--;
5190 ivs->n_cands--;
5191 ivs->cand_cost -= cp->cand->cost;
5193 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5196 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5198 iv_ca_set_remove_invariants (ivs, cp->depends_on);
5200 if (cp->inv_expr_id != -1)
5202 ivs->used_inv_expr[cp->inv_expr_id]--;
5203 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5204 ivs->num_used_inv_expr--;
5206 iv_ca_recount_cost (data, ivs);
5209 /* Add invariants in set INVS to set IVS. */
5211 static void
5212 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5214 bitmap_iterator bi;
5215 unsigned iid;
5217 if (!invs)
5218 return;
5220 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5222 ivs->n_invariant_uses[iid]++;
5223 if (ivs->n_invariant_uses[iid] == 1)
5224 ivs->n_regs++;
5228 /* Set cost pair for USE in set IVS to CP. */
5230 static void
5231 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5232 struct iv_use *use, struct cost_pair *cp)
5234 unsigned uid = use->id, cid;
5236 if (ivs->cand_for_use[uid] == cp)
5237 return;
5239 if (ivs->cand_for_use[uid])
5240 iv_ca_set_no_cp (data, ivs, use);
5242 if (cp)
5244 cid = cp->cand->id;
5246 ivs->bad_uses--;
5247 ivs->cand_for_use[uid] = cp;
5248 ivs->n_cand_uses[cid]++;
5249 if (ivs->n_cand_uses[cid] == 1)
5251 bitmap_set_bit (ivs->cands, cid);
5252 /* Do not count the pseudocandidates. */
5253 if (cp->cand->iv)
5254 ivs->n_regs++;
5255 ivs->n_cands++;
5256 ivs->cand_cost += cp->cand->cost;
5258 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5261 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5262 iv_ca_set_add_invariants (ivs, cp->depends_on);
5264 if (cp->inv_expr_id != -1)
5266 ivs->used_inv_expr[cp->inv_expr_id]++;
5267 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5268 ivs->num_used_inv_expr++;
5270 iv_ca_recount_cost (data, ivs);
5274 /* Extend set IVS by expressing USE by some of the candidates in it
5275 if possible. All important candidates will be considered
5276 if IMPORTANT_CANDIDATES is true. */
5278 static void
5279 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5280 struct iv_use *use, bool important_candidates)
5282 struct cost_pair *best_cp = NULL, *cp;
5283 bitmap_iterator bi;
5284 bitmap cands;
5285 unsigned i;
5287 gcc_assert (ivs->upto >= use->id);
5289 if (ivs->upto == use->id)
5291 ivs->upto++;
5292 ivs->bad_uses++;
5295 cands = (important_candidates ? data->important_candidates : ivs->cands);
5296 EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5298 struct iv_cand *cand = iv_cand (data, i);
5300 cp = get_use_iv_cost (data, use, cand);
5302 if (cheaper_cost_pair (cp, best_cp))
5303 best_cp = cp;
5306 iv_ca_set_cp (data, ivs, use, best_cp);
5309 /* Get cost for assignment IVS. */
5311 static comp_cost
5312 iv_ca_cost (struct iv_ca *ivs)
5314 /* This was a conditional expression but it triggered a bug in
5315 Sun C 5.5. */
5316 if (ivs->bad_uses)
5317 return infinite_cost;
5318 else
5319 return ivs->cost;
5322 /* Returns true if all dependences of CP are among invariants in IVS. */
5324 static bool
5325 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5327 unsigned i;
5328 bitmap_iterator bi;
5330 if (!cp->depends_on)
5331 return true;
5333 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5335 if (ivs->n_invariant_uses[i] == 0)
5336 return false;
5339 return true;
5342 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5343 it before NEXT_CHANGE. */
5345 static struct iv_ca_delta *
5346 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5347 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5349 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5351 change->use = use;
5352 change->old_cp = old_cp;
5353 change->new_cp = new_cp;
5354 change->next_change = next_change;
5356 return change;
5359 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5360 are rewritten. */
5362 static struct iv_ca_delta *
5363 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5365 struct iv_ca_delta *last;
5367 if (!l2)
5368 return l1;
5370 if (!l1)
5371 return l2;
5373 for (last = l1; last->next_change; last = last->next_change)
5374 continue;
5375 last->next_change = l2;
5377 return l1;
5380 /* Reverse the list of changes DELTA, forming the inverse to it. */
5382 static struct iv_ca_delta *
5383 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5385 struct iv_ca_delta *act, *next, *prev = NULL;
5386 struct cost_pair *tmp;
5388 for (act = delta; act; act = next)
5390 next = act->next_change;
5391 act->next_change = prev;
5392 prev = act;
5394 tmp = act->old_cp;
5395 act->old_cp = act->new_cp;
5396 act->new_cp = tmp;
5399 return prev;
5402 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5403 reverted instead. */
5405 static void
5406 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5407 struct iv_ca_delta *delta, bool forward)
5409 struct cost_pair *from, *to;
5410 struct iv_ca_delta *act;
5412 if (!forward)
5413 delta = iv_ca_delta_reverse (delta);
5415 for (act = delta; act; act = act->next_change)
5417 from = act->old_cp;
5418 to = act->new_cp;
5419 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5420 iv_ca_set_cp (data, ivs, act->use, to);
5423 if (!forward)
5424 iv_ca_delta_reverse (delta);
5427 /* Returns true if CAND is used in IVS. */
5429 static bool
5430 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5432 return ivs->n_cand_uses[cand->id] > 0;
5435 /* Returns number of induction variable candidates in the set IVS. */
5437 static unsigned
5438 iv_ca_n_cands (struct iv_ca *ivs)
5440 return ivs->n_cands;
5443 /* Free the list of changes DELTA. */
5445 static void
5446 iv_ca_delta_free (struct iv_ca_delta **delta)
5448 struct iv_ca_delta *act, *next;
5450 for (act = *delta; act; act = next)
5452 next = act->next_change;
5453 free (act);
5456 *delta = NULL;
5459 /* Allocates new iv candidates assignment. */
5461 static struct iv_ca *
5462 iv_ca_new (struct ivopts_data *data)
5464 struct iv_ca *nw = XNEW (struct iv_ca);
5466 nw->upto = 0;
5467 nw->bad_uses = 0;
5468 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5469 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5470 nw->cands = BITMAP_ALLOC (NULL);
5471 nw->n_cands = 0;
5472 nw->n_regs = 0;
5473 nw->cand_use_cost = no_cost;
5474 nw->cand_cost = 0;
5475 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5476 nw->cost = no_cost;
5477 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5478 nw->num_used_inv_expr = 0;
5480 return nw;
5483 /* Free memory occupied by the set IVS. */
5485 static void
5486 iv_ca_free (struct iv_ca **ivs)
5488 free ((*ivs)->cand_for_use);
5489 free ((*ivs)->n_cand_uses);
5490 BITMAP_FREE ((*ivs)->cands);
5491 free ((*ivs)->n_invariant_uses);
5492 free ((*ivs)->used_inv_expr);
5493 free (*ivs);
5494 *ivs = NULL;
5497 /* Dumps IVS to FILE. */
5499 static void
5500 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5502 const char *pref = " invariants ";
5503 unsigned i;
5504 comp_cost cost = iv_ca_cost (ivs);
5506 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5507 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5508 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5509 bitmap_print (file, ivs->cands, " candidates: ","\n");
5511 for (i = 0; i < ivs->upto; i++)
5513 struct iv_use *use = iv_use (data, i);
5514 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5515 if (cp)
5516 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5517 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5518 else
5519 fprintf (file, " use:%d --> ??\n", use->id);
5522 for (i = 1; i <= data->max_inv_id; i++)
5523 if (ivs->n_invariant_uses[i])
5525 fprintf (file, "%s%d", pref, i);
5526 pref = ", ";
5528 fprintf (file, "\n\n");
5531 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5532 new set, and store differences in DELTA. Number of induction variables
5533 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5534 the function will try to find a solution with mimimal iv candidates. */
5536 static comp_cost
5537 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5538 struct iv_cand *cand, struct iv_ca_delta **delta,
5539 unsigned *n_ivs, bool min_ncand)
5541 unsigned i;
5542 comp_cost cost;
5543 struct iv_use *use;
5544 struct cost_pair *old_cp, *new_cp;
5546 *delta = NULL;
5547 for (i = 0; i < ivs->upto; i++)
5549 use = iv_use (data, i);
5550 old_cp = iv_ca_cand_for_use (ivs, use);
5552 if (old_cp
5553 && old_cp->cand == cand)
5554 continue;
5556 new_cp = get_use_iv_cost (data, use, cand);
5557 if (!new_cp)
5558 continue;
5560 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5561 continue;
5563 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5564 continue;
5566 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5569 iv_ca_delta_commit (data, ivs, *delta, true);
5570 cost = iv_ca_cost (ivs);
5571 if (n_ivs)
5572 *n_ivs = iv_ca_n_cands (ivs);
5573 iv_ca_delta_commit (data, ivs, *delta, false);
5575 return cost;
5578 /* Try narrowing set IVS by removing CAND. Return the cost of
5579 the new set and store the differences in DELTA. */
5581 static comp_cost
5582 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5583 struct iv_cand *cand, struct iv_ca_delta **delta)
5585 unsigned i, ci;
5586 struct iv_use *use;
5587 struct cost_pair *old_cp, *new_cp, *cp;
5588 bitmap_iterator bi;
5589 struct iv_cand *cnd;
5590 comp_cost cost;
5592 *delta = NULL;
5593 for (i = 0; i < n_iv_uses (data); i++)
5595 use = iv_use (data, i);
5597 old_cp = iv_ca_cand_for_use (ivs, use);
5598 if (old_cp->cand != cand)
5599 continue;
5601 new_cp = NULL;
5603 if (data->consider_all_candidates)
5605 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5607 if (ci == cand->id)
5608 continue;
5610 cnd = iv_cand (data, ci);
5612 cp = get_use_iv_cost (data, use, cnd);
5613 if (!cp)
5614 continue;
5616 if (!iv_ca_has_deps (ivs, cp))
5617 continue;
5619 if (!cheaper_cost_pair (cp, new_cp))
5620 continue;
5622 new_cp = cp;
5625 else
5627 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5629 if (ci == cand->id)
5630 continue;
5632 cnd = iv_cand (data, ci);
5634 cp = get_use_iv_cost (data, use, cnd);
5635 if (!cp)
5636 continue;
5637 if (!iv_ca_has_deps (ivs, cp))
5638 continue;
5640 if (!cheaper_cost_pair (cp, new_cp))
5641 continue;
5643 new_cp = cp;
5647 if (!new_cp)
5649 iv_ca_delta_free (delta);
5650 return infinite_cost;
5653 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5656 iv_ca_delta_commit (data, ivs, *delta, true);
5657 cost = iv_ca_cost (ivs);
5658 iv_ca_delta_commit (data, ivs, *delta, false);
5660 return cost;
5663 /* Try optimizing the set of candidates IVS by removing candidates different
5664 from to EXCEPT_CAND from it. Return cost of the new set, and store
5665 differences in DELTA. */
5667 static comp_cost
5668 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5669 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5671 bitmap_iterator bi;
5672 struct iv_ca_delta *act_delta, *best_delta;
5673 unsigned i;
5674 comp_cost best_cost, acost;
5675 struct iv_cand *cand;
5677 best_delta = NULL;
5678 best_cost = iv_ca_cost (ivs);
5680 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5682 cand = iv_cand (data, i);
5684 if (cand == except_cand)
5685 continue;
5687 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5689 if (compare_costs (acost, best_cost) < 0)
5691 best_cost = acost;
5692 iv_ca_delta_free (&best_delta);
5693 best_delta = act_delta;
5695 else
5696 iv_ca_delta_free (&act_delta);
5699 if (!best_delta)
5701 *delta = NULL;
5702 return best_cost;
5705 /* Recurse to possibly remove other unnecessary ivs. */
5706 iv_ca_delta_commit (data, ivs, best_delta, true);
5707 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5708 iv_ca_delta_commit (data, ivs, best_delta, false);
5709 *delta = iv_ca_delta_join (best_delta, *delta);
5710 return best_cost;
5713 /* Tries to extend the sets IVS in the best possible way in order
5714 to express the USE. If ORIGINALP is true, prefer candidates from
5715 the original set of IVs, otherwise favor important candidates not
5716 based on any memory object. */
5718 static bool
5719 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5720 struct iv_use *use, bool originalp)
5722 comp_cost best_cost, act_cost;
5723 unsigned i;
5724 bitmap_iterator bi;
5725 struct iv_cand *cand;
5726 struct iv_ca_delta *best_delta = NULL, *act_delta;
5727 struct cost_pair *cp;
5729 iv_ca_add_use (data, ivs, use, false);
5730 best_cost = iv_ca_cost (ivs);
5732 cp = iv_ca_cand_for_use (ivs, use);
5733 if (!cp)
5735 ivs->upto--;
5736 ivs->bad_uses--;
5737 iv_ca_add_use (data, ivs, use, true);
5738 best_cost = iv_ca_cost (ivs);
5739 cp = iv_ca_cand_for_use (ivs, use);
5741 if (cp)
5743 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5744 iv_ca_set_no_cp (data, ivs, use);
5747 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5748 first try important candidates not based on any memory object. Only if
5749 this fails, try the specific ones. Rationale -- in loops with many
5750 variables the best choice often is to use just one generic biv. If we
5751 added here many ivs specific to the uses, the optimization algorithm later
5752 would be likely to get stuck in a local minimum, thus causing us to create
5753 too many ivs. The approach from few ivs to more seems more likely to be
5754 successful -- starting from few ivs, replacing an expensive use by a
5755 specific iv should always be a win. */
5756 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5758 cand = iv_cand (data, i);
5760 if (originalp && cand->pos !=IP_ORIGINAL)
5761 continue;
5763 if (!originalp && cand->iv->base_object != NULL_TREE)
5764 continue;
5766 if (iv_ca_cand_used_p (ivs, cand))
5767 continue;
5769 cp = get_use_iv_cost (data, use, cand);
5770 if (!cp)
5771 continue;
5773 iv_ca_set_cp (data, ivs, use, cp);
5774 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5775 true);
5776 iv_ca_set_no_cp (data, ivs, use);
5777 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5779 if (compare_costs (act_cost, best_cost) < 0)
5781 best_cost = act_cost;
5783 iv_ca_delta_free (&best_delta);
5784 best_delta = act_delta;
5786 else
5787 iv_ca_delta_free (&act_delta);
5790 if (infinite_cost_p (best_cost))
5792 for (i = 0; i < use->n_map_members; i++)
5794 cp = use->cost_map + i;
5795 cand = cp->cand;
5796 if (!cand)
5797 continue;
5799 /* Already tried this. */
5800 if (cand->important)
5802 if (originalp && cand->pos == IP_ORIGINAL)
5803 continue;
5804 if (!originalp && cand->iv->base_object == NULL_TREE)
5805 continue;
5808 if (iv_ca_cand_used_p (ivs, cand))
5809 continue;
5811 act_delta = NULL;
5812 iv_ca_set_cp (data, ivs, use, cp);
5813 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5814 iv_ca_set_no_cp (data, ivs, use);
5815 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5816 cp, act_delta);
5818 if (compare_costs (act_cost, best_cost) < 0)
5820 best_cost = act_cost;
5822 if (best_delta)
5823 iv_ca_delta_free (&best_delta);
5824 best_delta = act_delta;
5826 else
5827 iv_ca_delta_free (&act_delta);
5831 iv_ca_delta_commit (data, ivs, best_delta, true);
5832 iv_ca_delta_free (&best_delta);
5834 return !infinite_cost_p (best_cost);
5837 /* Finds an initial assignment of candidates to uses. */
5839 static struct iv_ca *
5840 get_initial_solution (struct ivopts_data *data, bool originalp)
5842 struct iv_ca *ivs = iv_ca_new (data);
5843 unsigned i;
5845 for (i = 0; i < n_iv_uses (data); i++)
5846 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5848 iv_ca_free (&ivs);
5849 return NULL;
5852 return ivs;
5855 /* Tries to improve set of induction variables IVS. */
5857 static bool
5858 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5860 unsigned i, n_ivs;
5861 comp_cost acost, best_cost = iv_ca_cost (ivs);
5862 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5863 struct iv_cand *cand;
5865 /* Try extending the set of induction variables by one. */
5866 for (i = 0; i < n_iv_cands (data); i++)
5868 cand = iv_cand (data, i);
5870 if (iv_ca_cand_used_p (ivs, cand))
5871 continue;
5873 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
5874 if (!act_delta)
5875 continue;
5877 /* If we successfully added the candidate and the set is small enough,
5878 try optimizing it by removing other candidates. */
5879 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5881 iv_ca_delta_commit (data, ivs, act_delta, true);
5882 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5883 iv_ca_delta_commit (data, ivs, act_delta, false);
5884 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5887 if (compare_costs (acost, best_cost) < 0)
5889 best_cost = acost;
5890 iv_ca_delta_free (&best_delta);
5891 best_delta = act_delta;
5893 else
5894 iv_ca_delta_free (&act_delta);
5897 if (!best_delta)
5899 /* Try removing the candidates from the set instead. */
5900 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5902 /* Nothing more we can do. */
5903 if (!best_delta)
5904 return false;
5907 iv_ca_delta_commit (data, ivs, best_delta, true);
5908 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5909 iv_ca_delta_free (&best_delta);
5910 return true;
5913 /* Attempts to find the optimal set of induction variables. We do simple
5914 greedy heuristic -- we try to replace at most one candidate in the selected
5915 solution and remove the unused ivs while this improves the cost. */
5917 static struct iv_ca *
5918 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
5920 struct iv_ca *set;
5922 /* Get the initial solution. */
5923 set = get_initial_solution (data, originalp);
5924 if (!set)
5926 if (dump_file && (dump_flags & TDF_DETAILS))
5927 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5928 return NULL;
5931 if (dump_file && (dump_flags & TDF_DETAILS))
5933 fprintf (dump_file, "Initial set of candidates:\n");
5934 iv_ca_dump (data, dump_file, set);
5937 while (try_improve_iv_set (data, set))
5939 if (dump_file && (dump_flags & TDF_DETAILS))
5941 fprintf (dump_file, "Improved to:\n");
5942 iv_ca_dump (data, dump_file, set);
5946 return set;
5949 static struct iv_ca *
5950 find_optimal_iv_set (struct ivopts_data *data)
5952 unsigned i;
5953 struct iv_ca *set, *origset;
5954 struct iv_use *use;
5955 comp_cost cost, origcost;
5957 /* Determine the cost based on a strategy that starts with original IVs,
5958 and try again using a strategy that prefers candidates not based
5959 on any IVs. */
5960 origset = find_optimal_iv_set_1 (data, true);
5961 set = find_optimal_iv_set_1 (data, false);
5963 if (!origset && !set)
5964 return NULL;
5966 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
5967 cost = set ? iv_ca_cost (set) : infinite_cost;
5969 if (dump_file && (dump_flags & TDF_DETAILS))
5971 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
5972 origcost.cost, origcost.complexity);
5973 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
5974 cost.cost, cost.complexity);
5977 /* Choose the one with the best cost. */
5978 if (compare_costs (origcost, cost) <= 0)
5980 if (set)
5981 iv_ca_free (&set);
5982 set = origset;
5984 else if (origset)
5985 iv_ca_free (&origset);
5987 for (i = 0; i < n_iv_uses (data); i++)
5989 use = iv_use (data, i);
5990 use->selected = iv_ca_cand_for_use (set, use)->cand;
5993 return set;
5996 /* Creates a new induction variable corresponding to CAND. */
5998 static void
5999 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6001 gimple_stmt_iterator incr_pos;
6002 tree base;
6003 bool after = false;
6005 if (!cand->iv)
6006 return;
6008 switch (cand->pos)
6010 case IP_NORMAL:
6011 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6012 break;
6014 case IP_END:
6015 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6016 after = true;
6017 break;
6019 case IP_AFTER_USE:
6020 after = true;
6021 /* fall through */
6022 case IP_BEFORE_USE:
6023 incr_pos = gsi_for_stmt (cand->incremented_at);
6024 break;
6026 case IP_ORIGINAL:
6027 /* Mark that the iv is preserved. */
6028 name_info (data, cand->var_before)->preserve_biv = true;
6029 name_info (data, cand->var_after)->preserve_biv = true;
6031 /* Rewrite the increment so that it uses var_before directly. */
6032 find_interesting_uses_op (data, cand->var_after)->selected = cand;
6033 return;
6036 gimple_add_tmp_var (cand->var_before);
6038 base = unshare_expr (cand->iv->base);
6040 create_iv (base, unshare_expr (cand->iv->step),
6041 cand->var_before, data->current_loop,
6042 &incr_pos, after, &cand->var_before, &cand->var_after);
6045 /* Creates new induction variables described in SET. */
6047 static void
6048 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6050 unsigned i;
6051 struct iv_cand *cand;
6052 bitmap_iterator bi;
6054 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6056 cand = iv_cand (data, i);
6057 create_new_iv (data, cand);
6060 if (dump_file && (dump_flags & TDF_DETAILS))
6062 fprintf (dump_file, "\nSelected IV set: \n");
6063 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6065 cand = iv_cand (data, i);
6066 dump_cand (dump_file, cand);
6068 fprintf (dump_file, "\n");
6072 /* Rewrites USE (definition of iv used in a nonlinear expression)
6073 using candidate CAND. */
6075 static void
6076 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6077 struct iv_use *use, struct iv_cand *cand)
6079 tree comp;
6080 tree op, tgt;
6081 gimple ass;
6082 gimple_stmt_iterator bsi;
6084 /* An important special case -- if we are asked to express value of
6085 the original iv by itself, just exit; there is no need to
6086 introduce a new computation (that might also need casting the
6087 variable to unsigned and back). */
6088 if (cand->pos == IP_ORIGINAL
6089 && cand->incremented_at == use->stmt)
6091 tree step, ctype, utype;
6092 enum tree_code incr_code = PLUS_EXPR, old_code;
6094 gcc_assert (is_gimple_assign (use->stmt));
6095 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6097 step = cand->iv->step;
6098 ctype = TREE_TYPE (step);
6099 utype = TREE_TYPE (cand->var_after);
6100 if (TREE_CODE (step) == NEGATE_EXPR)
6102 incr_code = MINUS_EXPR;
6103 step = TREE_OPERAND (step, 0);
6106 /* Check whether we may leave the computation unchanged.
6107 This is the case only if it does not rely on other
6108 computations in the loop -- otherwise, the computation
6109 we rely upon may be removed in remove_unused_ivs,
6110 thus leading to ICE. */
6111 old_code = gimple_assign_rhs_code (use->stmt);
6112 if (old_code == PLUS_EXPR
6113 || old_code == MINUS_EXPR
6114 || old_code == POINTER_PLUS_EXPR)
6116 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6117 op = gimple_assign_rhs2 (use->stmt);
6118 else if (old_code != MINUS_EXPR
6119 && gimple_assign_rhs2 (use->stmt) == cand->var_before)
6120 op = gimple_assign_rhs1 (use->stmt);
6121 else
6122 op = NULL_TREE;
6124 else
6125 op = NULL_TREE;
6127 if (op
6128 && (TREE_CODE (op) == INTEGER_CST
6129 || operand_equal_p (op, step, 0)))
6130 return;
6132 /* Otherwise, add the necessary computations to express
6133 the iv. */
6134 op = fold_convert (ctype, cand->var_before);
6135 comp = fold_convert (utype,
6136 build2 (incr_code, ctype, op,
6137 unshare_expr (step)));
6139 else
6141 comp = get_computation (data->current_loop, use, cand);
6142 gcc_assert (comp != NULL_TREE);
6145 switch (gimple_code (use->stmt))
6147 case GIMPLE_PHI:
6148 tgt = PHI_RESULT (use->stmt);
6150 /* If we should keep the biv, do not replace it. */
6151 if (name_info (data, tgt)->preserve_biv)
6152 return;
6154 bsi = gsi_after_labels (gimple_bb (use->stmt));
6155 break;
6157 case GIMPLE_ASSIGN:
6158 tgt = gimple_assign_lhs (use->stmt);
6159 bsi = gsi_for_stmt (use->stmt);
6160 break;
6162 default:
6163 gcc_unreachable ();
6166 if (!valid_gimple_rhs_p (comp)
6167 || (gimple_code (use->stmt) != GIMPLE_PHI
6168 /* We can't allow re-allocating the stmt as it might be pointed
6169 to still. */
6170 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6171 >= gimple_num_ops (gsi_stmt (bsi)))))
6173 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6174 true, GSI_SAME_STMT);
6175 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6177 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6178 /* As this isn't a plain copy we have to reset alignment
6179 information. */
6180 if (SSA_NAME_PTR_INFO (comp))
6181 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6185 if (gimple_code (use->stmt) == GIMPLE_PHI)
6187 ass = gimple_build_assign (tgt, comp);
6188 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6190 bsi = gsi_for_stmt (use->stmt);
6191 remove_phi_node (&bsi, false);
6193 else
6195 gimple_assign_set_rhs_from_tree (&bsi, comp);
6196 use->stmt = gsi_stmt (bsi);
6200 /* Performs a peephole optimization to reorder the iv update statement with
6201 a mem ref to enable instruction combining in later phases. The mem ref uses
6202 the iv value before the update, so the reordering transformation requires
6203 adjustment of the offset. CAND is the selected IV_CAND.
6205 Example:
6207 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6208 iv2 = iv1 + 1;
6210 if (t < val) (1)
6211 goto L;
6212 goto Head;
6215 directly propagating t over to (1) will introduce overlapping live range
6216 thus increase register pressure. This peephole transform it into:
6219 iv2 = iv1 + 1;
6220 t = MEM_REF (base, iv2, 8, 8);
6221 if (t < val)
6222 goto L;
6223 goto Head;
6226 static void
6227 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6229 tree var_after;
6230 gimple iv_update, stmt;
6231 basic_block bb;
6232 gimple_stmt_iterator gsi, gsi_iv;
6234 if (cand->pos != IP_NORMAL)
6235 return;
6237 var_after = cand->var_after;
6238 iv_update = SSA_NAME_DEF_STMT (var_after);
6240 bb = gimple_bb (iv_update);
6241 gsi = gsi_last_nondebug_bb (bb);
6242 stmt = gsi_stmt (gsi);
6244 /* Only handle conditional statement for now. */
6245 if (gimple_code (stmt) != GIMPLE_COND)
6246 return;
6248 gsi_prev_nondebug (&gsi);
6249 stmt = gsi_stmt (gsi);
6250 if (stmt != iv_update)
6251 return;
6253 gsi_prev_nondebug (&gsi);
6254 if (gsi_end_p (gsi))
6255 return;
6257 stmt = gsi_stmt (gsi);
6258 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6259 return;
6261 if (stmt != use->stmt)
6262 return;
6264 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6265 return;
6267 if (dump_file && (dump_flags & TDF_DETAILS))
6269 fprintf (dump_file, "Reordering \n");
6270 print_gimple_stmt (dump_file, iv_update, 0, 0);
6271 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6272 fprintf (dump_file, "\n");
6275 gsi = gsi_for_stmt (use->stmt);
6276 gsi_iv = gsi_for_stmt (iv_update);
6277 gsi_move_before (&gsi_iv, &gsi);
6279 cand->pos = IP_BEFORE_USE;
6280 cand->incremented_at = use->stmt;
6283 /* Rewrites USE (address that is an iv) using candidate CAND. */
6285 static void
6286 rewrite_use_address (struct ivopts_data *data,
6287 struct iv_use *use, struct iv_cand *cand)
6289 aff_tree aff;
6290 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6291 tree base_hint = NULL_TREE;
6292 tree ref, iv;
6293 bool ok;
6295 adjust_iv_update_pos (cand, use);
6296 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6297 gcc_assert (ok);
6298 unshare_aff_combination (&aff);
6300 /* To avoid undefined overflow problems, all IV candidates use unsigned
6301 integer types. The drawback is that this makes it impossible for
6302 create_mem_ref to distinguish an IV that is based on a memory object
6303 from one that represents simply an offset.
6305 To work around this problem, we pass a hint to create_mem_ref that
6306 indicates which variable (if any) in aff is an IV based on a memory
6307 object. Note that we only consider the candidate. If this is not
6308 based on an object, the base of the reference is in some subexpression
6309 of the use -- but these will use pointer types, so they are recognized
6310 by the create_mem_ref heuristics anyway. */
6311 if (cand->iv->base_object)
6312 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6314 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6315 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6316 reference_alias_ptr_type (*use->op_p),
6317 iv, base_hint, data->speed);
6318 copy_ref_info (ref, *use->op_p);
6319 *use->op_p = ref;
6322 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6323 candidate CAND. */
6325 static void
6326 rewrite_use_compare (struct ivopts_data *data,
6327 struct iv_use *use, struct iv_cand *cand)
6329 tree comp, *var_p, op, bound;
6330 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6331 enum tree_code compare;
6332 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6333 bool ok;
6335 bound = cp->value;
6336 if (bound)
6338 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6339 tree var_type = TREE_TYPE (var);
6340 gimple_seq stmts;
6342 if (dump_file && (dump_flags & TDF_DETAILS))
6344 fprintf (dump_file, "Replacing exit test: ");
6345 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6347 compare = cp->comp;
6348 bound = unshare_expr (fold_convert (var_type, bound));
6349 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6350 if (stmts)
6351 gsi_insert_seq_on_edge_immediate (
6352 loop_preheader_edge (data->current_loop),
6353 stmts);
6355 gimple_cond_set_lhs (use->stmt, var);
6356 gimple_cond_set_code (use->stmt, compare);
6357 gimple_cond_set_rhs (use->stmt, op);
6358 return;
6361 /* The induction variable elimination failed; just express the original
6362 giv. */
6363 comp = get_computation (data->current_loop, use, cand);
6364 gcc_assert (comp != NULL_TREE);
6366 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6367 gcc_assert (ok);
6369 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6370 true, GSI_SAME_STMT);
6373 /* Rewrites USE using candidate CAND. */
6375 static void
6376 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6378 switch (use->type)
6380 case USE_NONLINEAR_EXPR:
6381 rewrite_use_nonlinear_expr (data, use, cand);
6382 break;
6384 case USE_ADDRESS:
6385 rewrite_use_address (data, use, cand);
6386 break;
6388 case USE_COMPARE:
6389 rewrite_use_compare (data, use, cand);
6390 break;
6392 default:
6393 gcc_unreachable ();
6396 update_stmt (use->stmt);
6399 /* Rewrite the uses using the selected induction variables. */
6401 static void
6402 rewrite_uses (struct ivopts_data *data)
6404 unsigned i;
6405 struct iv_cand *cand;
6406 struct iv_use *use;
6408 for (i = 0; i < n_iv_uses (data); i++)
6410 use = iv_use (data, i);
6411 cand = use->selected;
6412 gcc_assert (cand);
6414 rewrite_use (data, use, cand);
6418 /* Removes the ivs that are not used after rewriting. */
6420 static void
6421 remove_unused_ivs (struct ivopts_data *data)
6423 unsigned j;
6424 bitmap_iterator bi;
6425 bitmap toremove = BITMAP_ALLOC (NULL);
6427 /* Figure out an order in which to release SSA DEFs so that we don't
6428 release something that we'd have to propagate into a debug stmt
6429 afterwards. */
6430 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6432 struct version_info *info;
6434 info = ver_info (data, j);
6435 if (info->iv
6436 && !integer_zerop (info->iv->step)
6437 && !info->inv_id
6438 && !info->iv->have_use_for
6439 && !info->preserve_biv)
6441 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6443 tree def = info->iv->ssa_name;
6445 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6447 imm_use_iterator imm_iter;
6448 use_operand_p use_p;
6449 gimple stmt;
6450 int count = 0;
6452 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6454 if (!gimple_debug_bind_p (stmt))
6455 continue;
6457 /* We just want to determine whether to do nothing
6458 (count == 0), to substitute the computed
6459 expression into a single use of the SSA DEF by
6460 itself (count == 1), or to use a debug temp
6461 because the SSA DEF is used multiple times or as
6462 part of a larger expression (count > 1). */
6463 count++;
6464 if (gimple_debug_bind_get_value (stmt) != def)
6465 count++;
6467 if (count > 1)
6468 BREAK_FROM_IMM_USE_STMT (imm_iter);
6471 if (!count)
6472 continue;
6474 struct iv_use dummy_use;
6475 struct iv_cand *best_cand = NULL, *cand;
6476 unsigned i, best_pref = 0, cand_pref;
6478 memset (&dummy_use, 0, sizeof (dummy_use));
6479 dummy_use.iv = info->iv;
6480 for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6482 cand = iv_use (data, i)->selected;
6483 if (cand == best_cand)
6484 continue;
6485 cand_pref = operand_equal_p (cand->iv->step,
6486 info->iv->step, 0)
6487 ? 4 : 0;
6488 cand_pref
6489 += TYPE_MODE (TREE_TYPE (cand->iv->base))
6490 == TYPE_MODE (TREE_TYPE (info->iv->base))
6491 ? 2 : 0;
6492 cand_pref
6493 += TREE_CODE (cand->iv->base) == INTEGER_CST
6494 ? 1 : 0;
6495 if (best_cand == NULL || best_pref < cand_pref)
6497 best_cand = cand;
6498 best_pref = cand_pref;
6502 if (!best_cand)
6503 continue;
6505 tree comp = get_computation_at (data->current_loop,
6506 &dummy_use, best_cand,
6507 SSA_NAME_DEF_STMT (def));
6508 if (!comp)
6509 continue;
6511 if (count > 1)
6513 tree vexpr = make_node (DEBUG_EXPR_DECL);
6514 DECL_ARTIFICIAL (vexpr) = 1;
6515 TREE_TYPE (vexpr) = TREE_TYPE (comp);
6516 if (SSA_NAME_VAR (def))
6517 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6518 else
6519 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6520 gimple def_temp = gimple_build_debug_bind (vexpr, comp, NULL);
6521 gimple_stmt_iterator gsi;
6523 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6524 gsi = gsi_after_labels (gimple_bb
6525 (SSA_NAME_DEF_STMT (def)));
6526 else
6527 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6529 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6530 comp = vexpr;
6533 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6535 if (!gimple_debug_bind_p (stmt))
6536 continue;
6538 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6539 SET_USE (use_p, comp);
6541 update_stmt (stmt);
6547 release_defs_bitset (toremove);
6549 BITMAP_FREE (toremove);
6552 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6553 for pointer_map_traverse. */
6555 static bool
6556 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6557 void *data ATTRIBUTE_UNUSED)
6559 struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6561 free (niter);
6562 return true;
6565 /* Frees data allocated by the optimization of a single loop. */
6567 static void
6568 free_loop_data (struct ivopts_data *data)
6570 unsigned i, j;
6571 bitmap_iterator bi;
6572 tree obj;
6574 if (data->niters)
6576 pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6577 pointer_map_destroy (data->niters);
6578 data->niters = NULL;
6581 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6583 struct version_info *info;
6585 info = ver_info (data, i);
6586 free (info->iv);
6587 info->iv = NULL;
6588 info->has_nonlin_use = false;
6589 info->preserve_biv = false;
6590 info->inv_id = 0;
6592 bitmap_clear (data->relevant);
6593 bitmap_clear (data->important_candidates);
6595 for (i = 0; i < n_iv_uses (data); i++)
6597 struct iv_use *use = iv_use (data, i);
6599 free (use->iv);
6600 BITMAP_FREE (use->related_cands);
6601 for (j = 0; j < use->n_map_members; j++)
6602 if (use->cost_map[j].depends_on)
6603 BITMAP_FREE (use->cost_map[j].depends_on);
6604 free (use->cost_map);
6605 free (use);
6607 data->iv_uses.truncate (0);
6609 for (i = 0; i < n_iv_cands (data); i++)
6611 struct iv_cand *cand = iv_cand (data, i);
6613 free (cand->iv);
6614 if (cand->depends_on)
6615 BITMAP_FREE (cand->depends_on);
6616 free (cand);
6618 data->iv_candidates.truncate (0);
6620 if (data->version_info_size < num_ssa_names)
6622 data->version_info_size = 2 * num_ssa_names;
6623 free (data->version_info);
6624 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6627 data->max_inv_id = 0;
6629 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6630 SET_DECL_RTL (obj, NULL_RTX);
6632 decl_rtl_to_reset.truncate (0);
6634 htab_empty (data->inv_expr_tab);
6635 data->inv_expr_id = 0;
6638 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6639 loop tree. */
6641 static void
6642 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6644 free_loop_data (data);
6645 free (data->version_info);
6646 BITMAP_FREE (data->relevant);
6647 BITMAP_FREE (data->important_candidates);
6649 decl_rtl_to_reset.release ();
6650 data->iv_uses.release ();
6651 data->iv_candidates.release ();
6652 htab_delete (data->inv_expr_tab);
6655 /* Returns true if the loop body BODY includes any function calls. */
6657 static bool
6658 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6660 gimple_stmt_iterator gsi;
6661 unsigned i;
6663 for (i = 0; i < num_nodes; i++)
6664 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6666 gimple stmt = gsi_stmt (gsi);
6667 if (is_gimple_call (stmt)
6668 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6669 return true;
6671 return false;
6674 /* Optimizes the LOOP. Returns true if anything changed. */
6676 static bool
6677 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6679 bool changed = false;
6680 struct iv_ca *iv_ca;
6681 edge exit = single_dom_exit (loop);
6682 basic_block *body;
6684 gcc_assert (!data->niters);
6685 data->current_loop = loop;
6686 data->speed = optimize_loop_for_speed_p (loop);
6688 if (dump_file && (dump_flags & TDF_DETAILS))
6690 fprintf (dump_file, "Processing loop %d\n", loop->num);
6692 if (exit)
6694 fprintf (dump_file, " single exit %d -> %d, exit condition ",
6695 exit->src->index, exit->dest->index);
6696 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6697 fprintf (dump_file, "\n");
6700 fprintf (dump_file, "\n");
6703 body = get_loop_body (loop);
6704 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6705 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6706 free (body);
6708 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6710 /* For each ssa name determines whether it behaves as an induction variable
6711 in some loop. */
6712 if (!find_induction_variables (data))
6713 goto finish;
6715 /* Finds interesting uses (item 1). */
6716 find_interesting_uses (data);
6717 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6718 goto finish;
6720 /* Finds candidates for the induction variables (item 2). */
6721 find_iv_candidates (data);
6723 /* Calculates the costs (item 3, part 1). */
6724 determine_iv_costs (data);
6725 determine_use_iv_costs (data);
6726 determine_set_costs (data);
6728 /* Find the optimal set of induction variables (item 3, part 2). */
6729 iv_ca = find_optimal_iv_set (data);
6730 if (!iv_ca)
6731 goto finish;
6732 changed = true;
6734 /* Create the new induction variables (item 4, part 1). */
6735 create_new_ivs (data, iv_ca);
6736 iv_ca_free (&iv_ca);
6738 /* Rewrite the uses (item 4, part 2). */
6739 rewrite_uses (data);
6741 /* Remove the ivs that are unused after rewriting. */
6742 remove_unused_ivs (data);
6744 /* We have changed the structure of induction variables; it might happen
6745 that definitions in the scev database refer to some of them that were
6746 eliminated. */
6747 scev_reset ();
6749 finish:
6750 free_loop_data (data);
6752 return changed;
6755 /* Main entry point. Optimizes induction variables in loops. */
6757 void
6758 tree_ssa_iv_optimize (void)
6760 struct loop *loop;
6761 struct ivopts_data data;
6762 loop_iterator li;
6764 tree_ssa_iv_optimize_init (&data);
6766 /* Optimize the loops starting with the innermost ones. */
6767 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
6769 if (dump_file && (dump_flags & TDF_DETAILS))
6770 flow_loop_dump (loop, dump_file, NULL, 1);
6772 tree_ssa_iv_optimize_loop (&data, loop);
6775 tree_ssa_iv_optimize_finalize (&data);