Merge aosp-toolchain/gcc/gcc-4_9 changes.
[official-gcc.git] / gcc-4_9 / gcc / tree-ssa-loop-ivopts.c
blobc5a5dd48ac38b2c6a64ecfe702905f154dba2e84
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2014 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "tm.h"
68 #include "tree.h"
69 #include "stor-layout.h"
70 #include "tm_p.h"
71 #include "basic-block.h"
72 #include "gimple-pretty-print.h"
73 #include "pointer-set.h"
74 #include "hash-table.h"
75 #include "tree-ssa-alias.h"
76 #include "internal-fn.h"
77 #include "tree-eh.h"
78 #include "gimple-expr.h"
79 #include "is-a.h"
80 #include "gimple.h"
81 #include "gimplify.h"
82 #include "gimple-iterator.h"
83 #include "gimplify-me.h"
84 #include "gimple-ssa.h"
85 #include "cgraph.h"
86 #include "tree-cfg.h"
87 #include "tree-phinodes.h"
88 #include "ssa-iterators.h"
89 #include "stringpool.h"
90 #include "tree-ssanames.h"
91 #include "tree-ssa-loop-ivopts.h"
92 #include "tree-ssa-loop-manip.h"
93 #include "tree-ssa-loop-niter.h"
94 #include "tree-ssa-loop.h"
95 #include "expr.h"
96 #include "tree-dfa.h"
97 #include "tree-ssa.h"
98 #include "cfgloop.h"
99 #include "tree-pass.h"
100 #include "insn-config.h"
101 #include "tree-chrec.h"
102 #include "tree-scalar-evolution.h"
103 #include "cfgloop.h"
104 #include "params.h"
105 #include "langhooks.h"
106 #include "tree-affine.h"
107 #include "target.h"
108 #include "tree-inline.h"
109 #include "tree-ssa-propagate.h"
110 #include "expmed.h"
111 #include "tree-ssa-address.h"
113 /* FIXME: Expressions are expanded to RTL in this pass to determine the
114 cost of different addressing modes. This should be moved to a TBD
115 interface between the GIMPLE and RTL worlds. */
116 #include "expr.h"
117 #include "recog.h"
119 /* The infinite cost. */
120 #define INFTY 10000000
122 #define AVG_LOOP_NITER(LOOP) 5
124 /* Returns the expected number of loop iterations for LOOP.
125 The average trip count is computed from profile data if it
126 exists. */
128 static inline HOST_WIDE_INT
129 avg_loop_niter (struct loop *loop)
131 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
132 if (niter == -1)
133 return AVG_LOOP_NITER (loop);
135 return niter;
138 /* Representation of the induction variable. */
139 struct iv
141 tree base; /* Initial value of the iv. */
142 tree base_object; /* A memory object to that the induction variable points. */
143 tree step; /* Step of the iv (constant only). */
144 tree ssa_name; /* The ssa name with the value. */
145 bool biv_p; /* Is it a biv? */
146 bool have_use_for; /* Do we already have a use for it? */
147 unsigned use_id; /* The identifier in the use if it is the case. */
150 /* Per-ssa version information (induction variable descriptions, etc.). */
151 struct version_info
153 tree name; /* The ssa name. */
154 struct iv *iv; /* Induction variable description. */
155 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
156 an expression that is not an induction variable. */
157 bool preserve_biv; /* For the original biv, whether to preserve it. */
158 unsigned inv_id; /* Id of an invariant. */
161 /* Types of uses. */
162 enum use_type
164 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
165 USE_ADDRESS, /* Use in an address. */
166 USE_COMPARE /* Use is a compare. */
169 /* Cost of a computation. */
170 typedef struct
172 int cost; /* The runtime cost. */
173 unsigned complexity; /* The estimate of the complexity of the code for
174 the computation (in no concrete units --
175 complexity field should be larger for more
176 complex expressions and addressing modes). */
177 } comp_cost;
179 static const comp_cost no_cost = {0, 0};
180 static const comp_cost infinite_cost = {INFTY, INFTY};
182 /* The candidate - cost pair. */
183 struct cost_pair
185 struct iv_cand *cand; /* The candidate. */
186 comp_cost cost; /* The cost. */
187 bitmap depends_on; /* The list of invariants that have to be
188 preserved. */
189 tree value; /* For final value elimination, the expression for
190 the final value of the iv. For iv elimination,
191 the new bound to compare with. */
192 enum tree_code comp; /* For iv elimination, the comparison. */
193 int inv_expr_id; /* Loop invariant expression id. */
196 /* Use. */
197 struct iv_use
199 unsigned id; /* The id of the use. */
200 enum use_type type; /* Type of the use. */
201 struct iv *iv; /* The induction variable it is based on. */
202 gimple stmt; /* Statement in that it occurs. */
203 tree *op_p; /* The place where it occurs. */
204 bitmap related_cands; /* The set of "related" iv candidates, plus the common
205 important ones. */
207 unsigned n_map_members; /* Number of candidates in the cost_map list. */
208 struct cost_pair *cost_map;
209 /* The costs wrto the iv candidates. */
211 struct iv_cand *selected;
212 /* The selected candidate. */
215 /* The position where the iv is computed. */
216 enum iv_position
218 IP_NORMAL, /* At the end, just before the exit condition. */
219 IP_END, /* At the end of the latch block. */
220 IP_BEFORE_USE, /* Immediately before a specific use. */
221 IP_AFTER_USE, /* Immediately after a specific use. */
222 IP_ORIGINAL /* The original biv. */
225 /* The induction variable candidate. */
226 struct iv_cand
228 unsigned id; /* The number of the candidate. */
229 bool important; /* Whether this is an "important" candidate, i.e. such
230 that it should be considered by all uses. */
231 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
232 gimple incremented_at;/* For original biv, the statement where it is
233 incremented. */
234 tree var_before; /* The variable used for it before increment. */
235 tree var_after; /* The variable used for it after increment. */
236 struct iv *iv; /* The value of the candidate. NULL for
237 "pseudocandidate" used to indicate the possibility
238 to replace the final value of an iv by direct
239 computation of the value. */
240 unsigned cost; /* Cost of the candidate. */
241 unsigned cost_step; /* Cost of the candidate's increment operation. */
242 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
243 where it is incremented. */
244 bitmap depends_on; /* The list of invariants that are used in step of the
245 biv. */
248 /* Loop invariant expression hashtable entry. */
249 struct iv_inv_expr_ent
251 tree expr;
252 int id;
253 hashval_t hash;
256 /* The data used by the induction variable optimizations. */
258 typedef struct iv_use *iv_use_p;
260 typedef struct iv_cand *iv_cand_p;
262 /* Hashtable helpers. */
264 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
266 typedef iv_inv_expr_ent value_type;
267 typedef iv_inv_expr_ent compare_type;
268 static inline hashval_t hash (const value_type *);
269 static inline bool equal (const value_type *, const compare_type *);
272 /* Hash function for loop invariant expressions. */
274 inline hashval_t
275 iv_inv_expr_hasher::hash (const value_type *expr)
277 return expr->hash;
280 /* Hash table equality function for expressions. */
282 inline bool
283 iv_inv_expr_hasher::equal (const value_type *expr1, const compare_type *expr2)
285 return expr1->hash == expr2->hash
286 && operand_equal_p (expr1->expr, expr2->expr, 0);
289 struct ivopts_data
291 /* The currently optimized loop. */
292 struct loop *current_loop;
294 /* Numbers of iterations for all exits of the current loop. */
295 struct pointer_map_t *niters;
297 /* Number of registers used in it. */
298 unsigned regs_used;
300 /* The size of version_info array allocated. */
301 unsigned version_info_size;
303 /* The array of information for the ssa names. */
304 struct version_info *version_info;
306 /* The hashtable of loop invariant expressions created
307 by ivopt. */
308 hash_table <iv_inv_expr_hasher> inv_expr_tab;
310 /* Loop invariant expression id. */
311 int inv_expr_id;
313 /* The bitmap of indices in version_info whose value was changed. */
314 bitmap relevant;
316 /* The uses of induction variables. */
317 vec<iv_use_p> iv_uses;
319 /* The candidates. */
320 vec<iv_cand_p> iv_candidates;
322 /* A bitmap of important candidates. */
323 bitmap important_candidates;
325 /* The maximum invariant id. */
326 unsigned max_inv_id;
328 /* Whether to consider just related and important candidates when replacing a
329 use. */
330 bool consider_all_candidates;
332 /* Are we optimizing for speed? */
333 bool speed;
335 /* Whether the loop body includes any function calls. */
336 bool body_includes_call;
338 /* Whether the loop body can only be exited via single exit. */
339 bool loop_single_exit_p;
342 /* An assignment of iv candidates to uses. */
344 struct iv_ca
346 /* The number of uses covered by the assignment. */
347 unsigned upto;
349 /* Number of uses that cannot be expressed by the candidates in the set. */
350 unsigned bad_uses;
352 /* Candidate assigned to a use, together with the related costs. */
353 struct cost_pair **cand_for_use;
355 /* Number of times each candidate is used. */
356 unsigned *n_cand_uses;
358 /* The candidates used. */
359 bitmap cands;
361 /* The number of candidates in the set. */
362 unsigned n_cands;
364 /* Total number of registers needed. */
365 unsigned n_regs;
367 /* Total cost of expressing uses. */
368 comp_cost cand_use_cost;
370 /* Total cost of candidates. */
371 unsigned cand_cost;
373 /* Number of times each invariant is used. */
374 unsigned *n_invariant_uses;
376 /* The array holding the number of uses of each loop
377 invariant expressions created by ivopt. */
378 unsigned *used_inv_expr;
380 /* The number of created loop invariants. */
381 unsigned num_used_inv_expr;
383 /* Total cost of the assignment. */
384 comp_cost cost;
387 /* Difference of two iv candidate assignments. */
389 struct iv_ca_delta
391 /* Changed use. */
392 struct iv_use *use;
394 /* An old assignment (for rollback purposes). */
395 struct cost_pair *old_cp;
397 /* A new assignment. */
398 struct cost_pair *new_cp;
400 /* Next change in the list. */
401 struct iv_ca_delta *next_change;
404 /* Bound on number of candidates below that all candidates are considered. */
406 #define CONSIDER_ALL_CANDIDATES_BOUND \
407 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
409 /* If there are more iv occurrences, we just give up (it is quite unlikely that
410 optimizing such a loop would help, and it would take ages). */
412 #define MAX_CONSIDERED_USES \
413 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
415 /* If there are at most this number of ivs in the set, try removing unnecessary
416 ivs from the set always. */
418 #define ALWAYS_PRUNE_CAND_SET_BOUND \
419 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
421 /* The list of trees for that the decl_rtl field must be reset is stored
422 here. */
424 static vec<tree> decl_rtl_to_reset;
426 static comp_cost force_expr_to_var_cost (tree, bool);
428 /* Number of uses recorded in DATA. */
430 static inline unsigned
431 n_iv_uses (struct ivopts_data *data)
433 return data->iv_uses.length ();
436 /* Ith use recorded in DATA. */
438 static inline struct iv_use *
439 iv_use (struct ivopts_data *data, unsigned i)
441 return data->iv_uses[i];
444 /* Number of candidates recorded in DATA. */
446 static inline unsigned
447 n_iv_cands (struct ivopts_data *data)
449 return data->iv_candidates.length ();
452 /* Ith candidate recorded in DATA. */
454 static inline struct iv_cand *
455 iv_cand (struct ivopts_data *data, unsigned i)
457 return data->iv_candidates[i];
460 /* The single loop exit if it dominates the latch, NULL otherwise. */
462 edge
463 single_dom_exit (struct loop *loop)
465 edge exit = single_exit (loop);
467 if (!exit)
468 return NULL;
470 if (!just_once_each_iteration_p (loop, exit->src))
471 return NULL;
473 return exit;
476 /* Dumps information about the induction variable IV to FILE. */
478 void
479 dump_iv (FILE *file, struct iv *iv)
481 if (iv->ssa_name)
483 fprintf (file, "ssa name ");
484 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
485 fprintf (file, "\n");
488 fprintf (file, " type ");
489 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
490 fprintf (file, "\n");
492 if (iv->step)
494 fprintf (file, " base ");
495 print_generic_expr (file, iv->base, TDF_SLIM);
496 fprintf (file, "\n");
498 fprintf (file, " step ");
499 print_generic_expr (file, iv->step, TDF_SLIM);
500 fprintf (file, "\n");
502 else
504 fprintf (file, " invariant ");
505 print_generic_expr (file, iv->base, TDF_SLIM);
506 fprintf (file, "\n");
509 if (iv->base_object)
511 fprintf (file, " base object ");
512 print_generic_expr (file, iv->base_object, TDF_SLIM);
513 fprintf (file, "\n");
516 if (iv->biv_p)
517 fprintf (file, " is a biv\n");
520 /* Dumps information about the USE to FILE. */
522 void
523 dump_use (FILE *file, struct iv_use *use)
525 fprintf (file, "use %d\n", use->id);
527 switch (use->type)
529 case USE_NONLINEAR_EXPR:
530 fprintf (file, " generic\n");
531 break;
533 case USE_ADDRESS:
534 fprintf (file, " address\n");
535 break;
537 case USE_COMPARE:
538 fprintf (file, " compare\n");
539 break;
541 default:
542 gcc_unreachable ();
545 fprintf (file, " in statement ");
546 print_gimple_stmt (file, use->stmt, 0, 0);
547 fprintf (file, "\n");
549 fprintf (file, " at position ");
550 if (use->op_p)
551 print_generic_expr (file, *use->op_p, TDF_SLIM);
552 fprintf (file, "\n");
554 dump_iv (file, use->iv);
556 if (use->related_cands)
558 fprintf (file, " related candidates ");
559 dump_bitmap (file, use->related_cands);
563 /* Dumps information about the uses to FILE. */
565 void
566 dump_uses (FILE *file, struct ivopts_data *data)
568 unsigned i;
569 struct iv_use *use;
571 for (i = 0; i < n_iv_uses (data); i++)
573 use = iv_use (data, i);
575 dump_use (file, use);
576 fprintf (file, "\n");
580 /* Dumps information about induction variable candidate CAND to FILE. */
582 void
583 dump_cand (FILE *file, struct iv_cand *cand)
585 struct iv *iv = cand->iv;
587 fprintf (file, "candidate %d%s\n",
588 cand->id, cand->important ? " (important)" : "");
590 if (cand->depends_on)
592 fprintf (file, " depends on ");
593 dump_bitmap (file, cand->depends_on);
596 if (!iv)
598 fprintf (file, " final value replacement\n");
599 return;
602 if (cand->var_before)
604 fprintf (file, " var_before ");
605 print_generic_expr (file, cand->var_before, TDF_SLIM);
606 fprintf (file, "\n");
608 if (cand->var_after)
610 fprintf (file, " var_after ");
611 print_generic_expr (file, cand->var_after, TDF_SLIM);
612 fprintf (file, "\n");
615 switch (cand->pos)
617 case IP_NORMAL:
618 fprintf (file, " incremented before exit test\n");
619 break;
621 case IP_BEFORE_USE:
622 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
623 break;
625 case IP_AFTER_USE:
626 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
627 break;
629 case IP_END:
630 fprintf (file, " incremented at end\n");
631 break;
633 case IP_ORIGINAL:
634 fprintf (file, " original biv\n");
635 break;
638 dump_iv (file, iv);
641 /* Returns the info for ssa version VER. */
643 static inline struct version_info *
644 ver_info (struct ivopts_data *data, unsigned ver)
646 return data->version_info + ver;
649 /* Returns the info for ssa name NAME. */
651 static inline struct version_info *
652 name_info (struct ivopts_data *data, tree name)
654 return ver_info (data, SSA_NAME_VERSION (name));
657 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
658 emitted in LOOP. */
660 static bool
661 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
663 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
665 gcc_assert (bb);
667 if (sbb == loop->latch)
668 return true;
670 if (sbb != bb)
671 return false;
673 return stmt == last_stmt (bb);
676 /* Returns true if STMT if after the place where the original induction
677 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
678 if the positions are identical. */
680 static bool
681 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
683 basic_block cand_bb = gimple_bb (cand->incremented_at);
684 basic_block stmt_bb = gimple_bb (stmt);
686 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
687 return false;
689 if (stmt_bb != cand_bb)
690 return true;
692 if (true_if_equal
693 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
694 return true;
695 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
698 /* Returns true if STMT if after the place where the induction variable
699 CAND is incremented in LOOP. */
701 static bool
702 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
704 switch (cand->pos)
706 case IP_END:
707 return false;
709 case IP_NORMAL:
710 return stmt_after_ip_normal_pos (loop, stmt);
712 case IP_ORIGINAL:
713 case IP_AFTER_USE:
714 return stmt_after_inc_pos (cand, stmt, false);
716 case IP_BEFORE_USE:
717 return stmt_after_inc_pos (cand, stmt, true);
719 default:
720 gcc_unreachable ();
724 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
726 static bool
727 abnormal_ssa_name_p (tree exp)
729 if (!exp)
730 return false;
732 if (TREE_CODE (exp) != SSA_NAME)
733 return false;
735 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
738 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
739 abnormal phi node. Callback for for_each_index. */
741 static bool
742 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
743 void *data ATTRIBUTE_UNUSED)
745 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
747 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
748 return false;
749 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
750 return false;
753 return !abnormal_ssa_name_p (*index);
756 /* Returns true if EXPR contains a ssa name that occurs in an
757 abnormal phi node. */
759 bool
760 contains_abnormal_ssa_name_p (tree expr)
762 enum tree_code code;
763 enum tree_code_class codeclass;
765 if (!expr)
766 return false;
768 code = TREE_CODE (expr);
769 codeclass = TREE_CODE_CLASS (code);
771 if (code == SSA_NAME)
772 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
774 if (code == INTEGER_CST
775 || is_gimple_min_invariant (expr))
776 return false;
778 if (code == ADDR_EXPR)
779 return !for_each_index (&TREE_OPERAND (expr, 0),
780 idx_contains_abnormal_ssa_name_p,
781 NULL);
783 if (code == COND_EXPR)
784 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
785 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
786 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
788 switch (codeclass)
790 case tcc_binary:
791 case tcc_comparison:
792 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
793 return true;
795 /* Fallthru. */
796 case tcc_unary:
797 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
798 return true;
800 break;
802 default:
803 gcc_unreachable ();
806 return false;
809 /* Returns the structure describing number of iterations determined from
810 EXIT of DATA->current_loop, or NULL if something goes wrong. */
812 static struct tree_niter_desc *
813 niter_for_exit (struct ivopts_data *data, edge exit)
815 struct tree_niter_desc *desc;
816 void **slot;
818 if (!data->niters)
820 data->niters = pointer_map_create ();
821 slot = NULL;
823 else
824 slot = pointer_map_contains (data->niters, exit);
826 if (!slot)
828 /* Try to determine number of iterations. We cannot safely work with ssa
829 names that appear in phi nodes on abnormal edges, so that we do not
830 create overlapping life ranges for them (PR 27283). */
831 desc = XNEW (struct tree_niter_desc);
832 if (!number_of_iterations_exit (data->current_loop,
833 exit, desc, true)
834 || contains_abnormal_ssa_name_p (desc->niter))
836 XDELETE (desc);
837 desc = NULL;
839 slot = pointer_map_insert (data->niters, exit);
840 *slot = desc;
842 else
843 desc = (struct tree_niter_desc *) *slot;
845 return desc;
848 /* Returns the structure describing number of iterations determined from
849 single dominating exit of DATA->current_loop, or NULL if something
850 goes wrong. */
852 static struct tree_niter_desc *
853 niter_for_single_dom_exit (struct ivopts_data *data)
855 edge exit = single_dom_exit (data->current_loop);
857 if (!exit)
858 return NULL;
860 return niter_for_exit (data, exit);
863 /* Initializes data structures used by the iv optimization pass, stored
864 in DATA. */
866 static void
867 tree_ssa_iv_optimize_init (struct ivopts_data *data)
869 data->version_info_size = 2 * num_ssa_names;
870 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
871 data->relevant = BITMAP_ALLOC (NULL);
872 data->important_candidates = BITMAP_ALLOC (NULL);
873 data->max_inv_id = 0;
874 data->niters = NULL;
875 data->iv_uses.create (20);
876 data->iv_candidates.create (20);
877 data->inv_expr_tab.create (10);
878 data->inv_expr_id = 0;
879 decl_rtl_to_reset.create (20);
882 /* Returns a memory object to that EXPR points. In case we are able to
883 determine that it does not point to any such object, NULL is returned. */
885 static tree
886 determine_base_object (tree expr)
888 enum tree_code code = TREE_CODE (expr);
889 tree base, obj;
891 /* If this is a pointer casted to any type, we need to determine
892 the base object for the pointer; so handle conversions before
893 throwing away non-pointer expressions. */
894 if (CONVERT_EXPR_P (expr))
895 return determine_base_object (TREE_OPERAND (expr, 0));
897 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
898 return NULL_TREE;
900 switch (code)
902 case INTEGER_CST:
903 return NULL_TREE;
905 case ADDR_EXPR:
906 obj = TREE_OPERAND (expr, 0);
907 base = get_base_address (obj);
909 if (!base)
910 return expr;
912 if (TREE_CODE (base) == MEM_REF)
913 return determine_base_object (TREE_OPERAND (base, 0));
915 return fold_convert (ptr_type_node,
916 build_fold_addr_expr (base));
918 case POINTER_PLUS_EXPR:
919 return determine_base_object (TREE_OPERAND (expr, 0));
921 case PLUS_EXPR:
922 case MINUS_EXPR:
923 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
924 gcc_unreachable ();
926 default:
927 return fold_convert (ptr_type_node, expr);
931 /* Allocates an induction variable with given initial value BASE and step STEP
932 for loop LOOP. */
934 static struct iv *
935 alloc_iv (tree base, tree step)
937 tree base_object = base;
938 struct iv *iv = XCNEW (struct iv);
939 gcc_assert (step != NULL_TREE);
941 /* Lower all address expressions except ones with DECL_P as operand.
942 By doing this:
943 1) More accurate cost can be computed for address expressions;
944 2) Duplicate candidates won't be created for bases in different
945 forms, like &a[0] and &a. */
946 STRIP_NOPS (base_object);
947 if (TREE_CODE (base_object) == ADDR_EXPR
948 && !DECL_P (TREE_OPERAND (base_object, 0)))
950 aff_tree comb;
951 double_int size;
952 base_object = get_inner_reference_aff (TREE_OPERAND (base_object, 0),
953 &comb, &size);
954 gcc_assert (base_object != NULL_TREE);
955 base_object = build_fold_addr_expr (base_object);
956 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
959 iv->base = base;
960 iv->base_object = determine_base_object (base_object);
961 iv->step = step;
962 iv->biv_p = false;
963 iv->have_use_for = false;
964 iv->use_id = 0;
965 iv->ssa_name = NULL_TREE;
967 return iv;
970 /* Sets STEP and BASE for induction variable IV. */
972 static void
973 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
975 struct version_info *info = name_info (data, iv);
977 gcc_assert (!info->iv);
979 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
980 info->iv = alloc_iv (base, step);
981 info->iv->ssa_name = iv;
984 /* Finds induction variable declaration for VAR. */
986 static struct iv *
987 get_iv (struct ivopts_data *data, tree var)
989 basic_block bb;
990 tree type = TREE_TYPE (var);
992 if (!POINTER_TYPE_P (type)
993 && !INTEGRAL_TYPE_P (type))
994 return NULL;
996 if (!name_info (data, var)->iv)
998 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1000 if (!bb
1001 || !flow_bb_inside_loop_p (data->current_loop, bb))
1002 set_iv (data, var, var, build_int_cst (type, 0));
1005 return name_info (data, var)->iv;
1008 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
1009 not define a simple affine biv with nonzero step. */
1011 static tree
1012 determine_biv_step (gimple phi)
1014 struct loop *loop = gimple_bb (phi)->loop_father;
1015 tree name = PHI_RESULT (phi);
1016 affine_iv iv;
1018 if (virtual_operand_p (name))
1019 return NULL_TREE;
1021 if (!simple_iv (loop, loop, name, &iv, true))
1022 return NULL_TREE;
1024 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1027 /* Finds basic ivs. */
1029 static bool
1030 find_bivs (struct ivopts_data *data)
1032 gimple phi;
1033 tree step, type, base;
1034 bool found = false;
1035 struct loop *loop = data->current_loop;
1036 gimple_stmt_iterator psi;
1038 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1040 phi = gsi_stmt (psi);
1042 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1043 continue;
1045 step = determine_biv_step (phi);
1046 if (!step)
1047 continue;
1049 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1050 base = expand_simple_operations (base);
1051 if (contains_abnormal_ssa_name_p (base)
1052 || contains_abnormal_ssa_name_p (step))
1053 continue;
1055 type = TREE_TYPE (PHI_RESULT (phi));
1056 base = fold_convert (type, base);
1057 if (step)
1059 if (POINTER_TYPE_P (type))
1060 step = convert_to_ptrofftype (step);
1061 else
1062 step = fold_convert (type, step);
1065 set_iv (data, PHI_RESULT (phi), base, step);
1066 found = true;
1069 return found;
1072 /* Marks basic ivs. */
1074 static void
1075 mark_bivs (struct ivopts_data *data)
1077 gimple phi, def;
1078 tree var;
1079 struct iv *iv, *incr_iv;
1080 struct loop *loop = data->current_loop;
1081 basic_block incr_bb;
1082 gimple_stmt_iterator psi;
1084 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1086 phi = gsi_stmt (psi);
1088 iv = get_iv (data, PHI_RESULT (phi));
1089 if (!iv)
1090 continue;
1092 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1093 def = SSA_NAME_DEF_STMT (var);
1094 /* Don't mark iv peeled from other one as biv. */
1095 if (def
1096 && gimple_code (def) == GIMPLE_PHI
1097 && gimple_bb (def) == loop->header)
1098 continue;
1100 incr_iv = get_iv (data, var);
1101 if (!incr_iv)
1102 continue;
1104 /* If the increment is in the subloop, ignore it. */
1105 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1106 if (incr_bb->loop_father != data->current_loop
1107 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1108 continue;
1110 iv->biv_p = true;
1111 incr_iv->biv_p = true;
1115 /* Checks whether STMT defines a linear induction variable and stores its
1116 parameters to IV. */
1118 static bool
1119 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1121 tree lhs;
1122 struct loop *loop = data->current_loop;
1124 iv->base = NULL_TREE;
1125 iv->step = NULL_TREE;
1127 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1128 return false;
1130 lhs = gimple_assign_lhs (stmt);
1131 if (TREE_CODE (lhs) != SSA_NAME)
1132 return false;
1134 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1135 return false;
1136 iv->base = expand_simple_operations (iv->base);
1138 if (contains_abnormal_ssa_name_p (iv->base)
1139 || contains_abnormal_ssa_name_p (iv->step))
1140 return false;
1142 /* If STMT could throw, then do not consider STMT as defining a GIV.
1143 While this will suppress optimizations, we can not safely delete this
1144 GIV and associated statements, even if it appears it is not used. */
1145 if (stmt_could_throw_p (stmt))
1146 return false;
1148 return true;
1151 /* Finds general ivs in statement STMT. */
1153 static void
1154 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1156 affine_iv iv;
1158 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1159 return;
1161 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1164 /* Finds general ivs in basic block BB. */
1166 static void
1167 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1169 gimple_stmt_iterator bsi;
1171 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1172 find_givs_in_stmt (data, gsi_stmt (bsi));
1175 /* Finds general ivs. */
1177 static void
1178 find_givs (struct ivopts_data *data)
1180 struct loop *loop = data->current_loop;
1181 basic_block *body = get_loop_body_in_dom_order (loop);
1182 unsigned i;
1184 for (i = 0; i < loop->num_nodes; i++)
1185 find_givs_in_bb (data, body[i]);
1186 free (body);
1189 /* For each ssa name defined in LOOP determines whether it is an induction
1190 variable and if so, its initial value and step. */
1192 static bool
1193 find_induction_variables (struct ivopts_data *data)
1195 unsigned i;
1196 bitmap_iterator bi;
1198 if (!find_bivs (data))
1199 return false;
1201 find_givs (data);
1202 mark_bivs (data);
1204 if (dump_file && (dump_flags & TDF_DETAILS))
1206 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1208 if (niter)
1210 fprintf (dump_file, " number of iterations ");
1211 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1212 if (!integer_zerop (niter->may_be_zero))
1214 fprintf (dump_file, "; zero if ");
1215 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1217 fprintf (dump_file, "\n\n");
1220 fprintf (dump_file, "Induction variables:\n\n");
1222 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1224 if (ver_info (data, i)->iv)
1225 dump_iv (dump_file, ver_info (data, i)->iv);
1229 return true;
1232 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1234 static struct iv_use *
1235 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1236 gimple stmt, enum use_type use_type)
1238 struct iv_use *use = XCNEW (struct iv_use);
1240 use->id = n_iv_uses (data);
1241 use->type = use_type;
1242 use->iv = iv;
1243 use->stmt = stmt;
1244 use->op_p = use_p;
1245 use->related_cands = BITMAP_ALLOC (NULL);
1247 /* To avoid showing ssa name in the dumps, if it was not reset by the
1248 caller. */
1249 iv->ssa_name = NULL_TREE;
1251 if (dump_file && (dump_flags & TDF_DETAILS))
1252 dump_use (dump_file, use);
1254 data->iv_uses.safe_push (use);
1256 return use;
1259 /* Checks whether OP is a loop-level invariant and if so, records it.
1260 NONLINEAR_USE is true if the invariant is used in a way we do not
1261 handle specially. */
1263 static void
1264 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1266 basic_block bb;
1267 struct version_info *info;
1269 if (TREE_CODE (op) != SSA_NAME
1270 || virtual_operand_p (op))
1271 return;
1273 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1274 if (bb
1275 && flow_bb_inside_loop_p (data->current_loop, bb))
1276 return;
1278 info = name_info (data, op);
1279 info->name = op;
1280 info->has_nonlin_use |= nonlinear_use;
1281 if (!info->inv_id)
1282 info->inv_id = ++data->max_inv_id;
1283 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1286 /* Checks whether the use OP is interesting and if so, records it. */
1288 static struct iv_use *
1289 find_interesting_uses_op (struct ivopts_data *data, tree op)
1291 struct iv *iv;
1292 struct iv *civ;
1293 gimple stmt;
1294 struct iv_use *use;
1296 if (TREE_CODE (op) != SSA_NAME)
1297 return NULL;
1299 iv = get_iv (data, op);
1300 if (!iv)
1301 return NULL;
1303 if (iv->have_use_for)
1305 use = iv_use (data, iv->use_id);
1307 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1308 return use;
1311 if (integer_zerop (iv->step))
1313 record_invariant (data, op, true);
1314 return NULL;
1316 iv->have_use_for = true;
1318 civ = XNEW (struct iv);
1319 *civ = *iv;
1321 stmt = SSA_NAME_DEF_STMT (op);
1322 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1323 || is_gimple_assign (stmt));
1325 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1326 iv->use_id = use->id;
1328 return use;
1331 /* Given a condition in statement STMT, checks whether it is a compare
1332 of an induction variable and an invariant. If this is the case,
1333 CONTROL_VAR is set to location of the iv, BOUND to the location of
1334 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1335 induction variable descriptions, and true is returned. If this is not
1336 the case, CONTROL_VAR and BOUND are set to the arguments of the
1337 condition and false is returned. */
1339 static bool
1340 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1341 tree **control_var, tree **bound,
1342 struct iv **iv_var, struct iv **iv_bound)
1344 /* The objects returned when COND has constant operands. */
1345 static struct iv const_iv;
1346 static tree zero;
1347 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1348 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1349 bool ret = false;
1351 if (gimple_code (stmt) == GIMPLE_COND)
1353 op0 = gimple_cond_lhs_ptr (stmt);
1354 op1 = gimple_cond_rhs_ptr (stmt);
1356 else
1358 op0 = gimple_assign_rhs1_ptr (stmt);
1359 op1 = gimple_assign_rhs2_ptr (stmt);
1362 zero = integer_zero_node;
1363 const_iv.step = integer_zero_node;
1365 if (TREE_CODE (*op0) == SSA_NAME)
1366 iv0 = get_iv (data, *op0);
1367 if (TREE_CODE (*op1) == SSA_NAME)
1368 iv1 = get_iv (data, *op1);
1370 /* Exactly one of the compared values must be an iv, and the other one must
1371 be an invariant. */
1372 if (!iv0 || !iv1)
1373 goto end;
1375 if (integer_zerop (iv0->step))
1377 /* Control variable may be on the other side. */
1378 tmp_op = op0; op0 = op1; op1 = tmp_op;
1379 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1381 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1383 end:
1384 if (control_var)
1385 *control_var = op0;;
1386 if (iv_var)
1387 *iv_var = iv0;;
1388 if (bound)
1389 *bound = op1;
1390 if (iv_bound)
1391 *iv_bound = iv1;
1393 return ret;
1396 /* Checks whether the condition in STMT is interesting and if so,
1397 records it. */
1399 static void
1400 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1402 tree *var_p, *bound_p;
1403 struct iv *var_iv, *civ;
1405 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1407 find_interesting_uses_op (data, *var_p);
1408 find_interesting_uses_op (data, *bound_p);
1409 return;
1412 civ = XNEW (struct iv);
1413 *civ = *var_iv;
1414 record_use (data, NULL, civ, stmt, USE_COMPARE);
1417 /* Returns the outermost loop EXPR is obviously invariant in
1418 relative to the loop LOOP, i.e. if all its operands are defined
1419 outside of the returned loop. Returns NULL if EXPR is not
1420 even obviously invariant in LOOP. */
1422 struct loop *
1423 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1425 basic_block def_bb;
1426 unsigned i, len;
1428 if (is_gimple_min_invariant (expr))
1429 return current_loops->tree_root;
1431 if (TREE_CODE (expr) == SSA_NAME)
1433 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1434 if (def_bb)
1436 if (flow_bb_inside_loop_p (loop, def_bb))
1437 return NULL;
1438 return superloop_at_depth (loop,
1439 loop_depth (def_bb->loop_father) + 1);
1442 return current_loops->tree_root;
1445 if (!EXPR_P (expr))
1446 return NULL;
1448 unsigned maxdepth = 0;
1449 len = TREE_OPERAND_LENGTH (expr);
1450 for (i = 0; i < len; i++)
1452 struct loop *ivloop;
1453 if (!TREE_OPERAND (expr, i))
1454 continue;
1456 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1457 if (!ivloop)
1458 return NULL;
1459 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1462 return superloop_at_depth (loop, maxdepth);
1465 /* Returns true if expression EXPR is obviously invariant in LOOP,
1466 i.e. if all its operands are defined outside of the LOOP. LOOP
1467 should not be the function body. */
1469 bool
1470 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1472 basic_block def_bb;
1473 unsigned i, len;
1475 gcc_assert (loop_depth (loop) > 0);
1477 if (is_gimple_min_invariant (expr))
1478 return true;
1480 if (TREE_CODE (expr) == SSA_NAME)
1482 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1483 if (def_bb
1484 && flow_bb_inside_loop_p (loop, def_bb))
1485 return false;
1487 return true;
1490 if (!EXPR_P (expr))
1491 return false;
1493 len = TREE_OPERAND_LENGTH (expr);
1494 for (i = 0; i < len; i++)
1495 if (TREE_OPERAND (expr, i)
1496 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1497 return false;
1499 return true;
1502 /* Cumulates the steps of indices into DATA and replaces their values with the
1503 initial ones. Returns false when the value of the index cannot be determined.
1504 Callback for for_each_index. */
1506 struct ifs_ivopts_data
1508 struct ivopts_data *ivopts_data;
1509 gimple stmt;
1510 tree step;
1513 static bool
1514 idx_find_step (tree base, tree *idx, void *data)
1516 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1517 struct iv *iv;
1518 tree step, iv_base, iv_step, lbound, off;
1519 struct loop *loop = dta->ivopts_data->current_loop;
1521 /* If base is a component ref, require that the offset of the reference
1522 be invariant. */
1523 if (TREE_CODE (base) == COMPONENT_REF)
1525 off = component_ref_field_offset (base);
1526 return expr_invariant_in_loop_p (loop, off);
1529 /* If base is array, first check whether we will be able to move the
1530 reference out of the loop (in order to take its address in strength
1531 reduction). In order for this to work we need both lower bound
1532 and step to be loop invariants. */
1533 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1535 /* Moreover, for a range, the size needs to be invariant as well. */
1536 if (TREE_CODE (base) == ARRAY_RANGE_REF
1537 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1538 return false;
1540 step = array_ref_element_size (base);
1541 lbound = array_ref_low_bound (base);
1543 if (!expr_invariant_in_loop_p (loop, step)
1544 || !expr_invariant_in_loop_p (loop, lbound))
1545 return false;
1548 if (TREE_CODE (*idx) != SSA_NAME)
1549 return true;
1551 iv = get_iv (dta->ivopts_data, *idx);
1552 if (!iv)
1553 return false;
1555 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1556 *&x[0], which is not folded and does not trigger the
1557 ARRAY_REF path below. */
1558 *idx = iv->base;
1560 if (integer_zerop (iv->step))
1561 return true;
1563 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1565 step = array_ref_element_size (base);
1567 /* We only handle addresses whose step is an integer constant. */
1568 if (TREE_CODE (step) != INTEGER_CST)
1569 return false;
1571 else
1572 /* The step for pointer arithmetics already is 1 byte. */
1573 step = size_one_node;
1575 iv_base = iv->base;
1576 iv_step = iv->step;
1577 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1578 sizetype, &iv_base, &iv_step, dta->stmt,
1579 false))
1581 /* The index might wrap. */
1582 return false;
1585 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1586 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1588 return true;
1591 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1592 object is passed to it in DATA. */
1594 static bool
1595 idx_record_use (tree base, tree *idx,
1596 void *vdata)
1598 struct ivopts_data *data = (struct ivopts_data *) vdata;
1599 find_interesting_uses_op (data, *idx);
1600 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1602 find_interesting_uses_op (data, array_ref_element_size (base));
1603 find_interesting_uses_op (data, array_ref_low_bound (base));
1605 return true;
1608 /* If we can prove that TOP = cst * BOT for some constant cst,
1609 store cst to MUL and return true. Otherwise return false.
1610 The returned value is always sign-extended, regardless of the
1611 signedness of TOP and BOT. */
1613 static bool
1614 constant_multiple_of (tree top, tree bot, double_int *mul)
1616 tree mby;
1617 enum tree_code code;
1618 double_int res, p0, p1;
1619 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1621 STRIP_NOPS (top);
1622 STRIP_NOPS (bot);
1624 if (operand_equal_p (top, bot, 0))
1626 *mul = double_int_one;
1627 return true;
1630 code = TREE_CODE (top);
1631 switch (code)
1633 case MULT_EXPR:
1634 mby = TREE_OPERAND (top, 1);
1635 if (TREE_CODE (mby) != INTEGER_CST)
1636 return false;
1638 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1639 return false;
1641 *mul = (res * tree_to_double_int (mby)).sext (precision);
1642 return true;
1644 case PLUS_EXPR:
1645 case MINUS_EXPR:
1646 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1647 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1648 return false;
1650 if (code == MINUS_EXPR)
1651 p1 = -p1;
1652 *mul = (p0 + p1).sext (precision);
1653 return true;
1655 case INTEGER_CST:
1656 if (TREE_CODE (bot) != INTEGER_CST)
1657 return false;
1659 p0 = tree_to_double_int (top).sext (precision);
1660 p1 = tree_to_double_int (bot).sext (precision);
1661 if (p1.is_zero ())
1662 return false;
1663 *mul = p0.sdivmod (p1, FLOOR_DIV_EXPR, &res).sext (precision);
1664 return res.is_zero ();
1666 default:
1667 return false;
1671 /* Return true if memory reference REF with step STEP may be unaligned. */
1673 static bool
1674 may_be_unaligned_p (tree ref, tree step)
1676 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1677 thus they are not misaligned. */
1678 if (TREE_CODE (ref) == TARGET_MEM_REF)
1679 return false;
1681 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1682 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1683 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1685 unsigned HOST_WIDE_INT bitpos;
1686 unsigned int ref_align;
1687 get_object_alignment_1 (ref, &ref_align, &bitpos);
1688 if (ref_align < align
1689 || (bitpos % align) != 0
1690 || (bitpos % BITS_PER_UNIT) != 0)
1691 return true;
1693 unsigned int trailing_zeros = tree_ctz (step);
1694 if (trailing_zeros < HOST_BITS_PER_INT
1695 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1696 return true;
1698 return false;
1701 /* Return true if EXPR may be non-addressable. */
1703 bool
1704 may_be_nonaddressable_p (tree expr)
1706 switch (TREE_CODE (expr))
1708 case TARGET_MEM_REF:
1709 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1710 target, thus they are always addressable. */
1711 return false;
1713 case COMPONENT_REF:
1714 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1715 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1717 case VIEW_CONVERT_EXPR:
1718 /* This kind of view-conversions may wrap non-addressable objects
1719 and make them look addressable. After some processing the
1720 non-addressability may be uncovered again, causing ADDR_EXPRs
1721 of inappropriate objects to be built. */
1722 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1723 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1724 return true;
1726 /* ... fall through ... */
1728 case ARRAY_REF:
1729 case ARRAY_RANGE_REF:
1730 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1732 CASE_CONVERT:
1733 return true;
1735 default:
1736 break;
1739 return false;
1742 /* Finds addresses in *OP_P inside STMT. */
1744 static void
1745 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1747 tree base = *op_p, step = size_zero_node;
1748 struct iv *civ;
1749 struct ifs_ivopts_data ifs_ivopts_data;
1751 /* Do not play with volatile memory references. A bit too conservative,
1752 perhaps, but safe. */
1753 if (gimple_has_volatile_ops (stmt))
1754 goto fail;
1756 /* Ignore bitfields for now. Not really something terribly complicated
1757 to handle. TODO. */
1758 if (TREE_CODE (base) == BIT_FIELD_REF)
1759 goto fail;
1761 base = unshare_expr (base);
1763 if (TREE_CODE (base) == TARGET_MEM_REF)
1765 tree type = build_pointer_type (TREE_TYPE (base));
1766 tree astep;
1768 if (TMR_BASE (base)
1769 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1771 civ = get_iv (data, TMR_BASE (base));
1772 if (!civ)
1773 goto fail;
1775 TMR_BASE (base) = civ->base;
1776 step = civ->step;
1778 if (TMR_INDEX2 (base)
1779 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1781 civ = get_iv (data, TMR_INDEX2 (base));
1782 if (!civ)
1783 goto fail;
1785 TMR_INDEX2 (base) = civ->base;
1786 step = civ->step;
1788 if (TMR_INDEX (base)
1789 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1791 civ = get_iv (data, TMR_INDEX (base));
1792 if (!civ)
1793 goto fail;
1795 TMR_INDEX (base) = civ->base;
1796 astep = civ->step;
1798 if (astep)
1800 if (TMR_STEP (base))
1801 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1803 step = fold_build2 (PLUS_EXPR, type, step, astep);
1807 if (integer_zerop (step))
1808 goto fail;
1809 base = tree_mem_ref_addr (type, base);
1811 else
1813 ifs_ivopts_data.ivopts_data = data;
1814 ifs_ivopts_data.stmt = stmt;
1815 ifs_ivopts_data.step = size_zero_node;
1816 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1817 || integer_zerop (ifs_ivopts_data.step))
1818 goto fail;
1819 step = ifs_ivopts_data.step;
1821 /* Check that the base expression is addressable. This needs
1822 to be done after substituting bases of IVs into it. */
1823 if (may_be_nonaddressable_p (base))
1824 goto fail;
1826 /* Moreover, on strict alignment platforms, check that it is
1827 sufficiently aligned. */
1828 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1829 goto fail;
1831 base = build_fold_addr_expr (base);
1833 /* Substituting bases of IVs into the base expression might
1834 have caused folding opportunities. */
1835 if (TREE_CODE (base) == ADDR_EXPR)
1837 tree *ref = &TREE_OPERAND (base, 0);
1838 while (handled_component_p (*ref))
1839 ref = &TREE_OPERAND (*ref, 0);
1840 if (TREE_CODE (*ref) == MEM_REF)
1842 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1843 TREE_OPERAND (*ref, 0),
1844 TREE_OPERAND (*ref, 1));
1845 if (tem)
1846 *ref = tem;
1851 civ = alloc_iv (base, step);
1852 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1853 return;
1855 fail:
1856 for_each_index (op_p, idx_record_use, data);
1859 /* Finds and records invariants used in STMT. */
1861 static void
1862 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1864 ssa_op_iter iter;
1865 use_operand_p use_p;
1866 tree op;
1868 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1870 op = USE_FROM_PTR (use_p);
1871 record_invariant (data, op, false);
1875 /* Finds interesting uses of induction variables in the statement STMT. */
1877 static void
1878 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1880 struct iv *iv;
1881 tree op, *lhs, *rhs;
1882 ssa_op_iter iter;
1883 use_operand_p use_p;
1884 enum tree_code code;
1886 find_invariants_stmt (data, stmt);
1888 if (gimple_code (stmt) == GIMPLE_COND)
1890 find_interesting_uses_cond (data, stmt);
1891 return;
1894 if (is_gimple_assign (stmt))
1896 lhs = gimple_assign_lhs_ptr (stmt);
1897 rhs = gimple_assign_rhs1_ptr (stmt);
1899 if (TREE_CODE (*lhs) == SSA_NAME)
1901 /* If the statement defines an induction variable, the uses are not
1902 interesting by themselves. */
1904 iv = get_iv (data, *lhs);
1906 if (iv && !integer_zerop (iv->step))
1907 return;
1910 code = gimple_assign_rhs_code (stmt);
1911 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1912 && (REFERENCE_CLASS_P (*rhs)
1913 || is_gimple_val (*rhs)))
1915 if (REFERENCE_CLASS_P (*rhs))
1916 find_interesting_uses_address (data, stmt, rhs);
1917 else
1918 find_interesting_uses_op (data, *rhs);
1920 if (REFERENCE_CLASS_P (*lhs))
1921 find_interesting_uses_address (data, stmt, lhs);
1922 return;
1924 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1926 find_interesting_uses_cond (data, stmt);
1927 return;
1930 /* TODO -- we should also handle address uses of type
1932 memory = call (whatever);
1936 call (memory). */
1939 if (gimple_code (stmt) == GIMPLE_PHI
1940 && gimple_bb (stmt) == data->current_loop->header)
1942 iv = get_iv (data, PHI_RESULT (stmt));
1944 if (iv && !integer_zerop (iv->step))
1945 return;
1948 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1950 op = USE_FROM_PTR (use_p);
1952 if (TREE_CODE (op) != SSA_NAME)
1953 continue;
1955 iv = get_iv (data, op);
1956 if (!iv)
1957 continue;
1959 find_interesting_uses_op (data, op);
1963 /* Finds interesting uses of induction variables outside of loops
1964 on loop exit edge EXIT. */
1966 static void
1967 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1969 gimple phi;
1970 gimple_stmt_iterator psi;
1971 tree def;
1973 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1975 phi = gsi_stmt (psi);
1976 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1977 if (!virtual_operand_p (def))
1978 find_interesting_uses_op (data, def);
1982 /* Finds uses of the induction variables that are interesting. */
1984 static void
1985 find_interesting_uses (struct ivopts_data *data)
1987 basic_block bb;
1988 gimple_stmt_iterator bsi;
1989 basic_block *body = get_loop_body (data->current_loop);
1990 unsigned i;
1991 struct version_info *info;
1992 edge e;
1994 if (dump_file && (dump_flags & TDF_DETAILS))
1995 fprintf (dump_file, "Uses:\n\n");
1997 for (i = 0; i < data->current_loop->num_nodes; i++)
1999 edge_iterator ei;
2000 bb = body[i];
2002 FOR_EACH_EDGE (e, ei, bb->succs)
2003 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2004 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2005 find_interesting_uses_outside (data, e);
2007 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2008 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2009 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2010 if (!is_gimple_debug (gsi_stmt (bsi)))
2011 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2014 if (dump_file && (dump_flags & TDF_DETAILS))
2016 bitmap_iterator bi;
2018 fprintf (dump_file, "\n");
2020 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2022 info = ver_info (data, i);
2023 if (info->inv_id)
2025 fprintf (dump_file, " ");
2026 print_generic_expr (dump_file, info->name, TDF_SLIM);
2027 fprintf (dump_file, " is invariant (%d)%s\n",
2028 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2032 fprintf (dump_file, "\n");
2035 free (body);
2038 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2039 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2040 we are at the top-level of the processed address. */
2042 static tree
2043 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2044 HOST_WIDE_INT *offset)
2046 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2047 enum tree_code code;
2048 tree type, orig_type = TREE_TYPE (expr);
2049 HOST_WIDE_INT off0, off1, st;
2050 tree orig_expr = expr;
2052 STRIP_NOPS (expr);
2054 type = TREE_TYPE (expr);
2055 code = TREE_CODE (expr);
2056 *offset = 0;
2058 switch (code)
2060 case INTEGER_CST:
2061 if (!cst_and_fits_in_hwi (expr)
2062 || integer_zerop (expr))
2063 return orig_expr;
2065 *offset = int_cst_value (expr);
2066 return build_int_cst (orig_type, 0);
2068 case POINTER_PLUS_EXPR:
2069 case PLUS_EXPR:
2070 case MINUS_EXPR:
2071 op0 = TREE_OPERAND (expr, 0);
2072 op1 = TREE_OPERAND (expr, 1);
2074 op0 = strip_offset_1 (op0, false, false, &off0);
2075 op1 = strip_offset_1 (op1, false, false, &off1);
2077 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2078 if (op0 == TREE_OPERAND (expr, 0)
2079 && op1 == TREE_OPERAND (expr, 1))
2080 return orig_expr;
2082 if (integer_zerop (op1))
2083 expr = op0;
2084 else if (integer_zerop (op0))
2086 if (code == MINUS_EXPR)
2087 expr = fold_build1 (NEGATE_EXPR, type, op1);
2088 else
2089 expr = op1;
2091 else
2092 expr = fold_build2 (code, type, op0, op1);
2094 return fold_convert (orig_type, expr);
2096 case MULT_EXPR:
2097 op1 = TREE_OPERAND (expr, 1);
2098 if (!cst_and_fits_in_hwi (op1))
2099 return orig_expr;
2101 op0 = TREE_OPERAND (expr, 0);
2102 op0 = strip_offset_1 (op0, false, false, &off0);
2103 if (op0 == TREE_OPERAND (expr, 0))
2104 return orig_expr;
2106 *offset = off0 * int_cst_value (op1);
2107 if (integer_zerop (op0))
2108 expr = op0;
2109 else
2110 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2112 return fold_convert (orig_type, expr);
2114 case ARRAY_REF:
2115 case ARRAY_RANGE_REF:
2116 if (!inside_addr)
2117 return orig_expr;
2119 step = array_ref_element_size (expr);
2120 if (!cst_and_fits_in_hwi (step))
2121 break;
2123 st = int_cst_value (step);
2124 op1 = TREE_OPERAND (expr, 1);
2125 op1 = strip_offset_1 (op1, false, false, &off1);
2126 *offset = off1 * st;
2128 if (top_compref
2129 && integer_zerop (op1))
2131 /* Strip the component reference completely. */
2132 op0 = TREE_OPERAND (expr, 0);
2133 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2134 *offset += off0;
2135 return op0;
2137 break;
2139 case COMPONENT_REF:
2141 tree field;
2143 if (!inside_addr)
2144 return orig_expr;
2146 tmp = component_ref_field_offset (expr);
2147 field = TREE_OPERAND (expr, 1);
2148 if (top_compref
2149 && cst_and_fits_in_hwi (tmp)
2150 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2152 HOST_WIDE_INT boffset, abs_off;
2154 /* Strip the component reference completely. */
2155 op0 = TREE_OPERAND (expr, 0);
2156 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2157 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2158 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2159 if (boffset < 0)
2160 abs_off = -abs_off;
2162 *offset = off0 + int_cst_value (tmp) + abs_off;
2163 return op0;
2166 break;
2168 case ADDR_EXPR:
2169 op0 = TREE_OPERAND (expr, 0);
2170 op0 = strip_offset_1 (op0, true, true, &off0);
2171 *offset += off0;
2173 if (op0 == TREE_OPERAND (expr, 0))
2174 return orig_expr;
2176 expr = build_fold_addr_expr (op0);
2177 return fold_convert (orig_type, expr);
2179 case MEM_REF:
2180 /* ??? Offset operand? */
2181 inside_addr = false;
2182 break;
2184 default:
2185 return orig_expr;
2188 /* Default handling of expressions for that we want to recurse into
2189 the first operand. */
2190 op0 = TREE_OPERAND (expr, 0);
2191 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2192 *offset += off0;
2194 if (op0 == TREE_OPERAND (expr, 0)
2195 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2196 return orig_expr;
2198 expr = copy_node (expr);
2199 TREE_OPERAND (expr, 0) = op0;
2200 if (op1)
2201 TREE_OPERAND (expr, 1) = op1;
2203 /* Inside address, we might strip the top level component references,
2204 thus changing type of the expression. Handling of ADDR_EXPR
2205 will fix that. */
2206 expr = fold_convert (orig_type, expr);
2208 return expr;
2211 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2213 static tree
2214 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2216 HOST_WIDE_INT off;
2217 tree core = strip_offset_1 (expr, false, false, &off);
2218 *offset = off;
2219 return core;
2222 /* Returns variant of TYPE that can be used as base for different uses.
2223 We return unsigned type with the same precision, which avoids problems
2224 with overflows. */
2226 static tree
2227 generic_type_for (tree type)
2229 if (POINTER_TYPE_P (type))
2230 return unsigned_type_for (type);
2232 if (TYPE_UNSIGNED (type))
2233 return type;
2235 return unsigned_type_for (type);
2238 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2239 the bitmap to that we should store it. */
2241 static struct ivopts_data *fd_ivopts_data;
2242 static tree
2243 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2245 bitmap *depends_on = (bitmap *) data;
2246 struct version_info *info;
2248 if (TREE_CODE (*expr_p) != SSA_NAME)
2249 return NULL_TREE;
2250 info = name_info (fd_ivopts_data, *expr_p);
2252 if (!info->inv_id || info->has_nonlin_use)
2253 return NULL_TREE;
2255 if (!*depends_on)
2256 *depends_on = BITMAP_ALLOC (NULL);
2257 bitmap_set_bit (*depends_on, info->inv_id);
2259 return NULL_TREE;
2262 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2263 position to POS. If USE is not NULL, the candidate is set as related to
2264 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2265 replacement of the final value of the iv by a direct computation. */
2267 static struct iv_cand *
2268 add_candidate_1 (struct ivopts_data *data,
2269 tree base, tree step, bool important, enum iv_position pos,
2270 struct iv_use *use, gimple incremented_at)
2272 unsigned i;
2273 struct iv_cand *cand = NULL;
2274 tree type, orig_type;
2276 /* For non-original variables, make sure their values are computed in a type
2277 that does not invoke undefined behavior on overflows (since in general,
2278 we cannot prove that these induction variables are non-wrapping). */
2279 if (pos != IP_ORIGINAL)
2281 orig_type = TREE_TYPE (base);
2282 type = generic_type_for (orig_type);
2283 if (type != orig_type)
2285 base = fold_convert (type, base);
2286 step = fold_convert (type, step);
2290 for (i = 0; i < n_iv_cands (data); i++)
2292 cand = iv_cand (data, i);
2294 if (cand->pos != pos)
2295 continue;
2297 if (cand->incremented_at != incremented_at
2298 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2299 && cand->ainc_use != use))
2300 continue;
2302 if (!cand->iv)
2304 if (!base && !step)
2305 break;
2307 continue;
2310 if (!base && !step)
2311 continue;
2313 if (operand_equal_p (base, cand->iv->base, 0)
2314 && operand_equal_p (step, cand->iv->step, 0)
2315 && (TYPE_PRECISION (TREE_TYPE (base))
2316 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2317 break;
2320 if (i == n_iv_cands (data))
2322 cand = XCNEW (struct iv_cand);
2323 cand->id = i;
2325 if (!base && !step)
2326 cand->iv = NULL;
2327 else
2328 cand->iv = alloc_iv (base, step);
2330 cand->pos = pos;
2331 if (pos != IP_ORIGINAL && cand->iv)
2333 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2334 cand->var_after = cand->var_before;
2336 cand->important = important;
2337 cand->incremented_at = incremented_at;
2338 data->iv_candidates.safe_push (cand);
2340 if (step
2341 && TREE_CODE (step) != INTEGER_CST)
2343 fd_ivopts_data = data;
2344 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2347 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2348 cand->ainc_use = use;
2349 else
2350 cand->ainc_use = NULL;
2352 if (dump_file && (dump_flags & TDF_DETAILS))
2353 dump_cand (dump_file, cand);
2356 if (important && !cand->important)
2358 cand->important = true;
2359 if (dump_file && (dump_flags & TDF_DETAILS))
2360 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2363 if (use)
2365 bitmap_set_bit (use->related_cands, i);
2366 if (dump_file && (dump_flags & TDF_DETAILS))
2367 fprintf (dump_file, "Candidate %d is related to use %d\n",
2368 cand->id, use->id);
2371 return cand;
2374 /* Returns true if incrementing the induction variable at the end of the LOOP
2375 is allowed.
2377 The purpose is to avoid splitting latch edge with a biv increment, thus
2378 creating a jump, possibly confusing other optimization passes and leaving
2379 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2380 is not available (so we do not have a better alternative), or if the latch
2381 edge is already nonempty. */
2383 static bool
2384 allow_ip_end_pos_p (struct loop *loop)
2386 if (!ip_normal_pos (loop))
2387 return true;
2389 if (!empty_block_p (ip_end_pos (loop)))
2390 return true;
2392 return false;
2395 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2396 Important field is set to IMPORTANT. */
2398 static void
2399 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2400 bool important, struct iv_use *use)
2402 basic_block use_bb = gimple_bb (use->stmt);
2403 enum machine_mode mem_mode;
2404 unsigned HOST_WIDE_INT cstepi;
2406 /* If we insert the increment in any position other than the standard
2407 ones, we must ensure that it is incremented once per iteration.
2408 It must not be in an inner nested loop, or one side of an if
2409 statement. */
2410 if (use_bb->loop_father != data->current_loop
2411 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2412 || stmt_could_throw_p (use->stmt)
2413 || !cst_and_fits_in_hwi (step))
2414 return;
2416 cstepi = int_cst_value (step);
2418 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2419 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2420 || USE_STORE_PRE_INCREMENT (mem_mode))
2421 && GET_MODE_SIZE (mem_mode) == cstepi)
2422 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2423 || USE_STORE_PRE_DECREMENT (mem_mode))
2424 && GET_MODE_SIZE (mem_mode) == -cstepi))
2426 enum tree_code code = MINUS_EXPR;
2427 tree new_base;
2428 tree new_step = step;
2430 if (POINTER_TYPE_P (TREE_TYPE (base)))
2432 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2433 code = POINTER_PLUS_EXPR;
2435 else
2436 new_step = fold_convert (TREE_TYPE (base), new_step);
2437 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2438 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2439 use->stmt);
2441 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2442 || USE_STORE_POST_INCREMENT (mem_mode))
2443 && GET_MODE_SIZE (mem_mode) == cstepi)
2444 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2445 || USE_STORE_POST_DECREMENT (mem_mode))
2446 && GET_MODE_SIZE (mem_mode) == -cstepi))
2448 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2449 use->stmt);
2453 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2454 position to POS. If USE is not NULL, the candidate is set as related to
2455 it. The candidate computation is scheduled on all available positions. */
2457 static void
2458 add_candidate (struct ivopts_data *data,
2459 tree base, tree step, bool important, struct iv_use *use)
2461 if (ip_normal_pos (data->current_loop))
2462 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2463 if (ip_end_pos (data->current_loop)
2464 && allow_ip_end_pos_p (data->current_loop))
2465 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2467 if (use != NULL && use->type == USE_ADDRESS)
2468 add_autoinc_candidates (data, base, step, important, use);
2471 /* Adds standard iv candidates. */
2473 static void
2474 add_standard_iv_candidates (struct ivopts_data *data)
2476 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2478 /* The same for a double-integer type if it is still fast enough. */
2479 if (TYPE_PRECISION
2480 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2481 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2482 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2483 build_int_cst (long_integer_type_node, 1), true, NULL);
2485 /* The same for a double-integer type if it is still fast enough. */
2486 if (TYPE_PRECISION
2487 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2488 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2489 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2490 build_int_cst (long_long_integer_type_node, 1), true, NULL);
2494 /* Adds candidates bases on the old induction variable IV. */
2496 static void
2497 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2499 gimple phi;
2500 tree def;
2501 struct iv_cand *cand;
2503 add_candidate (data, iv->base, iv->step, true, NULL);
2505 /* The same, but with initial value zero. */
2506 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2507 add_candidate (data, size_int (0), iv->step, true, NULL);
2508 else
2509 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2510 iv->step, true, NULL);
2512 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2513 if (gimple_code (phi) == GIMPLE_PHI)
2515 /* Additionally record the possibility of leaving the original iv
2516 untouched. */
2517 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2518 /* Don't add candidate if it's from another PHI node because
2519 it's an affine iv appearing in the form of PEELED_CHREC. */
2520 phi = SSA_NAME_DEF_STMT (def);
2521 if (gimple_code (phi) != GIMPLE_PHI)
2523 cand = add_candidate_1 (data,
2524 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2525 SSA_NAME_DEF_STMT (def));
2526 cand->var_before = iv->ssa_name;
2527 cand->var_after = def;
2529 else
2530 gcc_assert (gimple_bb (phi) == data->current_loop->header);
2534 /* Adds candidates based on the old induction variables. */
2536 static void
2537 add_old_ivs_candidates (struct ivopts_data *data)
2539 unsigned i;
2540 struct iv *iv;
2541 bitmap_iterator bi;
2543 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2545 iv = ver_info (data, i)->iv;
2546 if (iv && iv->biv_p && !integer_zerop (iv->step))
2547 add_old_iv_candidates (data, iv);
2551 /* Adds candidates based on the value of the induction variable IV and USE. */
2553 static void
2554 add_iv_value_candidates (struct ivopts_data *data,
2555 struct iv *iv, struct iv_use *use)
2557 unsigned HOST_WIDE_INT offset;
2558 tree base;
2559 tree basetype;
2561 add_candidate (data, iv->base, iv->step, false, use);
2563 /* The same, but with initial value zero. Make such variable important,
2564 since it is generic enough so that possibly many uses may be based
2565 on it. */
2566 basetype = TREE_TYPE (iv->base);
2567 if (POINTER_TYPE_P (basetype))
2568 basetype = sizetype;
2569 add_candidate (data, build_int_cst (basetype, 0),
2570 iv->step, true, use);
2572 /* Third, try removing the constant offset. Make sure to even
2573 add a candidate for &a[0] vs. (T *)&a. */
2574 base = strip_offset (iv->base, &offset);
2575 if (offset
2576 || base != iv->base)
2577 add_candidate (data, base, iv->step, false, use);
2580 /* Adds candidates based on the uses. */
2582 static void
2583 add_derived_ivs_candidates (struct ivopts_data *data)
2585 unsigned i;
2587 for (i = 0; i < n_iv_uses (data); i++)
2589 struct iv_use *use = iv_use (data, i);
2591 if (!use)
2592 continue;
2594 switch (use->type)
2596 case USE_NONLINEAR_EXPR:
2597 case USE_COMPARE:
2598 case USE_ADDRESS:
2599 /* Just add the ivs based on the value of the iv used here. */
2600 add_iv_value_candidates (data, use->iv, use);
2601 break;
2603 default:
2604 gcc_unreachable ();
2609 /* Record important candidates and add them to related_cands bitmaps
2610 if needed. */
2612 static void
2613 record_important_candidates (struct ivopts_data *data)
2615 unsigned i;
2616 struct iv_use *use;
2618 for (i = 0; i < n_iv_cands (data); i++)
2620 struct iv_cand *cand = iv_cand (data, i);
2622 if (cand->important)
2623 bitmap_set_bit (data->important_candidates, i);
2626 data->consider_all_candidates = (n_iv_cands (data)
2627 <= CONSIDER_ALL_CANDIDATES_BOUND);
2629 if (data->consider_all_candidates)
2631 /* We will not need "related_cands" bitmaps in this case,
2632 so release them to decrease peak memory consumption. */
2633 for (i = 0; i < n_iv_uses (data); i++)
2635 use = iv_use (data, i);
2636 BITMAP_FREE (use->related_cands);
2639 else
2641 /* Add important candidates to the related_cands bitmaps. */
2642 for (i = 0; i < n_iv_uses (data); i++)
2643 bitmap_ior_into (iv_use (data, i)->related_cands,
2644 data->important_candidates);
2648 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2649 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2650 we allocate a simple list to every use. */
2652 static void
2653 alloc_use_cost_map (struct ivopts_data *data)
2655 unsigned i, size, s;
2657 for (i = 0; i < n_iv_uses (data); i++)
2659 struct iv_use *use = iv_use (data, i);
2661 if (data->consider_all_candidates)
2662 size = n_iv_cands (data);
2663 else
2665 s = bitmap_count_bits (use->related_cands);
2667 /* Round up to the power of two, so that moduling by it is fast. */
2668 size = s ? (1 << ceil_log2 (s)) : 1;
2671 use->n_map_members = size;
2672 use->cost_map = XCNEWVEC (struct cost_pair, size);
2676 /* Returns description of computation cost of expression whose runtime
2677 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2679 static comp_cost
2680 new_cost (unsigned runtime, unsigned complexity)
2682 comp_cost cost;
2684 cost.cost = runtime;
2685 cost.complexity = complexity;
2687 return cost;
2690 /* Adds costs COST1 and COST2. */
2692 static comp_cost
2693 add_costs (comp_cost cost1, comp_cost cost2)
2695 cost1.cost += cost2.cost;
2696 cost1.complexity += cost2.complexity;
2698 return cost1;
2700 /* Subtracts costs COST1 and COST2. */
2702 static comp_cost
2703 sub_costs (comp_cost cost1, comp_cost cost2)
2705 cost1.cost -= cost2.cost;
2706 cost1.complexity -= cost2.complexity;
2708 return cost1;
2711 /* Returns a negative number if COST1 < COST2, a positive number if
2712 COST1 > COST2, and 0 if COST1 = COST2. */
2714 static int
2715 compare_costs (comp_cost cost1, comp_cost cost2)
2717 if (cost1.cost == cost2.cost)
2718 return cost1.complexity - cost2.complexity;
2720 return cost1.cost - cost2.cost;
2723 /* Returns true if COST is infinite. */
2725 static bool
2726 infinite_cost_p (comp_cost cost)
2728 return cost.cost == INFTY;
2731 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2732 on invariants DEPENDS_ON and that the value used in expressing it
2733 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2735 static void
2736 set_use_iv_cost (struct ivopts_data *data,
2737 struct iv_use *use, struct iv_cand *cand,
2738 comp_cost cost, bitmap depends_on, tree value,
2739 enum tree_code comp, int inv_expr_id)
2741 unsigned i, s;
2743 if (infinite_cost_p (cost))
2745 BITMAP_FREE (depends_on);
2746 return;
2749 if (data->consider_all_candidates)
2751 use->cost_map[cand->id].cand = cand;
2752 use->cost_map[cand->id].cost = cost;
2753 use->cost_map[cand->id].depends_on = depends_on;
2754 use->cost_map[cand->id].value = value;
2755 use->cost_map[cand->id].comp = comp;
2756 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2757 return;
2760 /* n_map_members is a power of two, so this computes modulo. */
2761 s = cand->id & (use->n_map_members - 1);
2762 for (i = s; i < use->n_map_members; i++)
2763 if (!use->cost_map[i].cand)
2764 goto found;
2765 for (i = 0; i < s; i++)
2766 if (!use->cost_map[i].cand)
2767 goto found;
2769 gcc_unreachable ();
2771 found:
2772 use->cost_map[i].cand = cand;
2773 use->cost_map[i].cost = cost;
2774 use->cost_map[i].depends_on = depends_on;
2775 use->cost_map[i].value = value;
2776 use->cost_map[i].comp = comp;
2777 use->cost_map[i].inv_expr_id = inv_expr_id;
2780 /* Gets cost of (USE, CANDIDATE) pair. */
2782 static struct cost_pair *
2783 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2784 struct iv_cand *cand)
2786 unsigned i, s;
2787 struct cost_pair *ret;
2789 if (!cand)
2790 return NULL;
2792 if (data->consider_all_candidates)
2794 ret = use->cost_map + cand->id;
2795 if (!ret->cand)
2796 return NULL;
2798 return ret;
2801 /* n_map_members is a power of two, so this computes modulo. */
2802 s = cand->id & (use->n_map_members - 1);
2803 for (i = s; i < use->n_map_members; i++)
2804 if (use->cost_map[i].cand == cand)
2805 return use->cost_map + i;
2806 else if (use->cost_map[i].cand == NULL)
2807 return NULL;
2808 for (i = 0; i < s; i++)
2809 if (use->cost_map[i].cand == cand)
2810 return use->cost_map + i;
2811 else if (use->cost_map[i].cand == NULL)
2812 return NULL;
2814 return NULL;
2817 /* Returns estimate on cost of computing SEQ. */
2819 static unsigned
2820 seq_cost (rtx seq, bool speed)
2822 unsigned cost = 0;
2823 rtx set;
2825 for (; seq; seq = NEXT_INSN (seq))
2827 set = single_set (seq);
2828 if (set)
2829 cost += set_src_cost (SET_SRC (set), speed);
2830 else
2831 cost++;
2834 return cost;
2837 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2838 static rtx
2839 produce_memory_decl_rtl (tree obj, int *regno)
2841 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2842 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2843 rtx x;
2845 gcc_assert (obj);
2846 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2848 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2849 x = gen_rtx_SYMBOL_REF (address_mode, name);
2850 SET_SYMBOL_REF_DECL (x, obj);
2851 x = gen_rtx_MEM (DECL_MODE (obj), x);
2852 set_mem_addr_space (x, as);
2853 targetm.encode_section_info (obj, x, true);
2855 else
2857 x = gen_raw_REG (address_mode, (*regno)++);
2858 x = gen_rtx_MEM (DECL_MODE (obj), x);
2859 set_mem_addr_space (x, as);
2862 return x;
2865 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2866 walk_tree. DATA contains the actual fake register number. */
2868 static tree
2869 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2871 tree obj = NULL_TREE;
2872 rtx x = NULL_RTX;
2873 int *regno = (int *) data;
2875 switch (TREE_CODE (*expr_p))
2877 case ADDR_EXPR:
2878 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2879 handled_component_p (*expr_p);
2880 expr_p = &TREE_OPERAND (*expr_p, 0))
2881 continue;
2882 obj = *expr_p;
2883 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2884 x = produce_memory_decl_rtl (obj, regno);
2885 break;
2887 case SSA_NAME:
2888 *ws = 0;
2889 obj = SSA_NAME_VAR (*expr_p);
2890 /* Defer handling of anonymous SSA_NAMEs to the expander. */
2891 if (!obj)
2892 return NULL_TREE;
2893 if (!DECL_RTL_SET_P (obj))
2894 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2895 break;
2897 case VAR_DECL:
2898 case PARM_DECL:
2899 case RESULT_DECL:
2900 *ws = 0;
2901 obj = *expr_p;
2903 if (DECL_RTL_SET_P (obj))
2904 break;
2906 if (DECL_MODE (obj) == BLKmode)
2907 x = produce_memory_decl_rtl (obj, regno);
2908 else
2909 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2911 break;
2913 default:
2914 break;
2917 if (x)
2919 decl_rtl_to_reset.safe_push (obj);
2920 SET_DECL_RTL (obj, x);
2923 return NULL_TREE;
2926 /* Determines cost of the computation of EXPR. */
2928 static unsigned
2929 computation_cost (tree expr, bool speed)
2931 rtx seq, rslt;
2932 tree type = TREE_TYPE (expr);
2933 unsigned cost;
2934 /* Avoid using hard regs in ways which may be unsupported. */
2935 int regno = LAST_VIRTUAL_REGISTER + 1;
2936 struct cgraph_node *node = cgraph_get_node (current_function_decl);
2937 enum node_frequency real_frequency = node->frequency;
2939 node->frequency = NODE_FREQUENCY_NORMAL;
2940 crtl->maybe_hot_insn_p = speed;
2941 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2942 start_sequence ();
2943 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2944 seq = get_insns ();
2945 end_sequence ();
2946 default_rtl_profile ();
2947 node->frequency = real_frequency;
2949 cost = seq_cost (seq, speed);
2950 if (MEM_P (rslt))
2951 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2952 TYPE_ADDR_SPACE (type), speed);
2953 else if (!REG_P (rslt))
2954 cost += set_src_cost (rslt, speed);
2956 return cost;
2959 /* Returns variable containing the value of candidate CAND at statement AT. */
2961 static tree
2962 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2964 if (stmt_after_increment (loop, cand, stmt))
2965 return cand->var_after;
2966 else
2967 return cand->var_before;
2970 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2971 same precision that is at least as wide as the precision of TYPE, stores
2972 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2973 type of A and B. */
2975 static tree
2976 determine_common_wider_type (tree *a, tree *b)
2978 tree wider_type = NULL;
2979 tree suba, subb;
2980 tree atype = TREE_TYPE (*a);
2982 if (CONVERT_EXPR_P (*a))
2984 suba = TREE_OPERAND (*a, 0);
2985 wider_type = TREE_TYPE (suba);
2986 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2987 return atype;
2989 else
2990 return atype;
2992 if (CONVERT_EXPR_P (*b))
2994 subb = TREE_OPERAND (*b, 0);
2995 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2996 return atype;
2998 else
2999 return atype;
3001 *a = suba;
3002 *b = subb;
3003 return wider_type;
3006 /* Determines the expression by that USE is expressed from induction variable
3007 CAND at statement AT in LOOP. The expression is stored in a decomposed
3008 form into AFF. Returns false if USE cannot be expressed using CAND. */
3010 static bool
3011 get_computation_aff (struct loop *loop,
3012 struct iv_use *use, struct iv_cand *cand, gimple at,
3013 struct aff_tree *aff)
3015 tree ubase = use->iv->base;
3016 tree ustep = use->iv->step;
3017 tree cbase = cand->iv->base;
3018 tree cstep = cand->iv->step, cstep_common;
3019 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3020 tree common_type, var;
3021 tree uutype;
3022 aff_tree cbase_aff, var_aff;
3023 double_int rat;
3025 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3027 /* We do not have a precision to express the values of use. */
3028 return false;
3031 var = var_at_stmt (loop, cand, at);
3032 uutype = unsigned_type_for (utype);
3034 /* If the conversion is not noop, perform it. */
3035 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3037 cstep = fold_convert (uutype, cstep);
3038 cbase = fold_convert (uutype, cbase);
3039 var = fold_convert (uutype, var);
3042 if (!constant_multiple_of (ustep, cstep, &rat))
3043 return false;
3045 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3046 type, we achieve better folding by computing their difference in this
3047 wider type, and cast the result to UUTYPE. We do not need to worry about
3048 overflows, as all the arithmetics will in the end be performed in UUTYPE
3049 anyway. */
3050 common_type = determine_common_wider_type (&ubase, &cbase);
3052 /* use = ubase - ratio * cbase + ratio * var. */
3053 tree_to_aff_combination (ubase, common_type, aff);
3054 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3055 tree_to_aff_combination (var, uutype, &var_aff);
3057 /* We need to shift the value if we are after the increment. */
3058 if (stmt_after_increment (loop, cand, at))
3060 aff_tree cstep_aff;
3062 if (common_type != uutype)
3063 cstep_common = fold_convert (common_type, cstep);
3064 else
3065 cstep_common = cstep;
3067 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3068 aff_combination_add (&cbase_aff, &cstep_aff);
3071 aff_combination_scale (&cbase_aff, -rat);
3072 aff_combination_add (aff, &cbase_aff);
3073 if (common_type != uutype)
3074 aff_combination_convert (aff, uutype);
3076 aff_combination_scale (&var_aff, rat);
3077 aff_combination_add (aff, &var_aff);
3079 return true;
3082 /* Return the type of USE. */
3084 static tree
3085 get_use_type (struct iv_use *use)
3087 tree base_type = TREE_TYPE (use->iv->base);
3088 tree type;
3090 if (use->type == USE_ADDRESS)
3092 /* The base_type may be a void pointer. Create a pointer type based on
3093 the mem_ref instead. */
3094 type = build_pointer_type (TREE_TYPE (*use->op_p));
3095 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3096 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3098 else
3099 type = base_type;
3101 return type;
3104 /* Determines the expression by that USE is expressed from induction variable
3105 CAND at statement AT in LOOP. The computation is unshared. */
3107 static tree
3108 get_computation_at (struct loop *loop,
3109 struct iv_use *use, struct iv_cand *cand, gimple at)
3111 aff_tree aff;
3112 tree type = get_use_type (use);
3114 if (!get_computation_aff (loop, use, cand, at, &aff))
3115 return NULL_TREE;
3116 unshare_aff_combination (&aff);
3117 return fold_convert (type, aff_combination_to_tree (&aff));
3120 /* Determines the expression by that USE is expressed from induction variable
3121 CAND in LOOP. The computation is unshared. */
3123 static tree
3124 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3126 return get_computation_at (loop, use, cand, use->stmt);
3129 /* Adjust the cost COST for being in loop setup rather than loop body.
3130 If we're optimizing for space, the loop setup overhead is constant;
3131 if we're optimizing for speed, amortize it over the per-iteration cost. */
3132 static unsigned
3133 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3135 if (cost == INFTY)
3136 return cost;
3137 else if (optimize_loop_for_speed_p (data->current_loop))
3138 return cost / avg_loop_niter (data->current_loop);
3139 else
3140 return cost;
3143 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3144 validity for a memory reference accessing memory of mode MODE in
3145 address space AS. */
3148 bool
3149 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3150 addr_space_t as)
3152 #define MAX_RATIO 128
3153 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3154 static vec<sbitmap> valid_mult_list;
3155 sbitmap valid_mult;
3157 if (data_index >= valid_mult_list.length ())
3158 valid_mult_list.safe_grow_cleared (data_index + 1);
3160 valid_mult = valid_mult_list[data_index];
3161 if (!valid_mult)
3163 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3164 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3165 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3166 rtx addr, scaled;
3167 HOST_WIDE_INT i;
3169 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3170 bitmap_clear (valid_mult);
3171 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3172 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3173 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3175 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3176 if (memory_address_addr_space_p (mode, addr, as)
3177 || memory_address_addr_space_p (mode, scaled, as))
3178 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3181 if (dump_file && (dump_flags & TDF_DETAILS))
3183 fprintf (dump_file, " allowed multipliers:");
3184 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3185 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3186 fprintf (dump_file, " %d", (int) i);
3187 fprintf (dump_file, "\n");
3188 fprintf (dump_file, "\n");
3191 valid_mult_list[data_index] = valid_mult;
3194 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3195 return false;
3197 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3200 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3201 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3202 variable is omitted. Compute the cost for a memory reference that accesses
3203 a memory location of mode MEM_MODE in address space AS.
3205 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3206 size of MEM_MODE / RATIO) is available. To make this determination, we
3207 look at the size of the increment to be made, which is given in CSTEP.
3208 CSTEP may be zero if the step is unknown.
3209 STMT_AFTER_INC is true iff the statement we're looking at is after the
3210 increment of the original biv.
3212 TODO -- there must be some better way. This all is quite crude. */
3214 enum ainc_type
3216 AINC_PRE_INC, /* Pre increment. */
3217 AINC_PRE_DEC, /* Pre decrement. */
3218 AINC_POST_INC, /* Post increment. */
3219 AINC_POST_DEC, /* Post decrement. */
3220 AINC_NONE /* Also the number of auto increment types. */
3223 typedef struct address_cost_data_s
3225 HOST_WIDE_INT min_offset, max_offset;
3226 unsigned costs[2][2][2][2];
3227 unsigned ainc_costs[AINC_NONE];
3228 } *address_cost_data;
3231 static comp_cost
3232 get_address_cost (bool symbol_present, bool var_present,
3233 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3234 HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3235 addr_space_t as, bool speed,
3236 bool stmt_after_inc, bool *may_autoinc)
3238 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3239 static vec<address_cost_data> address_cost_data_list;
3240 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3241 address_cost_data data;
3242 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3243 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3244 unsigned cost, acost, complexity;
3245 enum ainc_type autoinc_type;
3246 bool offset_p, ratio_p, autoinc;
3247 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3248 unsigned HOST_WIDE_INT mask;
3249 unsigned bits;
3251 if (data_index >= address_cost_data_list.length ())
3252 address_cost_data_list.safe_grow_cleared (data_index + 1);
3254 data = address_cost_data_list[data_index];
3255 if (!data)
3257 HOST_WIDE_INT i;
3258 HOST_WIDE_INT rat, off = 0;
3259 int old_cse_not_expected, width;
3260 unsigned sym_p, var_p, off_p, rat_p, add_c;
3261 rtx seq, addr, base;
3262 rtx reg0, reg1;
3264 data = (address_cost_data) xcalloc (1, sizeof (*data));
3266 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3268 width = GET_MODE_BITSIZE (address_mode) - 1;
3269 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3270 width = HOST_BITS_PER_WIDE_INT - 1;
3271 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3273 for (i = width; i >= 0; i--)
3275 off = -((unsigned HOST_WIDE_INT) 1 << i);
3276 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3277 if (memory_address_addr_space_p (mem_mode, addr, as))
3278 break;
3280 data->min_offset = (i == -1? 0 : off);
3282 for (i = width; i >= 0; i--)
3284 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3285 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3286 if (memory_address_addr_space_p (mem_mode, addr, as))
3287 break;
3289 if (i == -1)
3290 off = 0;
3291 data->max_offset = off;
3293 if (dump_file && (dump_flags & TDF_DETAILS))
3295 fprintf (dump_file, "get_address_cost:\n");
3296 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3297 GET_MODE_NAME (mem_mode),
3298 data->min_offset);
3299 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3300 GET_MODE_NAME (mem_mode),
3301 data->max_offset);
3304 rat = 1;
3305 for (i = 2; i <= MAX_RATIO; i++)
3306 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3308 rat = i;
3309 break;
3312 /* Compute the cost of various addressing modes. */
3313 acost = 0;
3314 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3315 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3317 if (USE_LOAD_PRE_DECREMENT (mem_mode)
3318 || USE_STORE_PRE_DECREMENT (mem_mode))
3320 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3321 has_predec[mem_mode]
3322 = memory_address_addr_space_p (mem_mode, addr, as);
3324 if (has_predec[mem_mode])
3325 data->ainc_costs[AINC_PRE_DEC]
3326 = address_cost (addr, mem_mode, as, speed);
3328 if (USE_LOAD_POST_DECREMENT (mem_mode)
3329 || USE_STORE_POST_DECREMENT (mem_mode))
3331 addr = gen_rtx_POST_DEC (address_mode, reg0);
3332 has_postdec[mem_mode]
3333 = memory_address_addr_space_p (mem_mode, addr, as);
3335 if (has_postdec[mem_mode])
3336 data->ainc_costs[AINC_POST_DEC]
3337 = address_cost (addr, mem_mode, as, speed);
3339 if (USE_LOAD_PRE_INCREMENT (mem_mode)
3340 || USE_STORE_PRE_DECREMENT (mem_mode))
3342 addr = gen_rtx_PRE_INC (address_mode, reg0);
3343 has_preinc[mem_mode]
3344 = memory_address_addr_space_p (mem_mode, addr, as);
3346 if (has_preinc[mem_mode])
3347 data->ainc_costs[AINC_PRE_INC]
3348 = address_cost (addr, mem_mode, as, speed);
3350 if (USE_LOAD_POST_INCREMENT (mem_mode)
3351 || USE_STORE_POST_INCREMENT (mem_mode))
3353 addr = gen_rtx_POST_INC (address_mode, reg0);
3354 has_postinc[mem_mode]
3355 = memory_address_addr_space_p (mem_mode, addr, as);
3357 if (has_postinc[mem_mode])
3358 data->ainc_costs[AINC_POST_INC]
3359 = address_cost (addr, mem_mode, as, speed);
3361 for (i = 0; i < 16; i++)
3363 sym_p = i & 1;
3364 var_p = (i >> 1) & 1;
3365 off_p = (i >> 2) & 1;
3366 rat_p = (i >> 3) & 1;
3368 addr = reg0;
3369 if (rat_p)
3370 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3371 gen_int_mode (rat, address_mode));
3373 if (var_p)
3374 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3376 if (sym_p)
3378 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3379 /* ??? We can run into trouble with some backends by presenting
3380 it with symbols which haven't been properly passed through
3381 targetm.encode_section_info. By setting the local bit, we
3382 enhance the probability of things working. */
3383 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3385 if (off_p)
3386 base = gen_rtx_fmt_e (CONST, address_mode,
3387 gen_rtx_fmt_ee
3388 (PLUS, address_mode, base,
3389 gen_int_mode (off, address_mode)));
3391 else if (off_p)
3392 base = gen_int_mode (off, address_mode);
3393 else
3394 base = NULL_RTX;
3396 if (base)
3397 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3399 start_sequence ();
3400 /* To avoid splitting addressing modes, pretend that no cse will
3401 follow. */
3402 old_cse_not_expected = cse_not_expected;
3403 cse_not_expected = true;
3404 addr = memory_address_addr_space (mem_mode, addr, as);
3405 cse_not_expected = old_cse_not_expected;
3406 seq = get_insns ();
3407 end_sequence ();
3409 acost = seq_cost (seq, speed);
3410 acost += address_cost (addr, mem_mode, as, speed);
3412 if (!acost)
3413 acost = 1;
3414 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3417 /* On some targets, it is quite expensive to load symbol to a register,
3418 which makes addresses that contain symbols look much more expensive.
3419 However, the symbol will have to be loaded in any case before the
3420 loop (and quite likely we have it in register already), so it does not
3421 make much sense to penalize them too heavily. So make some final
3422 tweaks for the SYMBOL_PRESENT modes:
3424 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3425 var is cheaper, use this mode with small penalty.
3426 If VAR_PRESENT is true, try whether the mode with
3427 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3428 if this is the case, use it. */
3429 add_c = add_cost (speed, address_mode);
3430 for (i = 0; i < 8; i++)
3432 var_p = i & 1;
3433 off_p = (i >> 1) & 1;
3434 rat_p = (i >> 2) & 1;
3436 acost = data->costs[0][1][off_p][rat_p] + 1;
3437 if (var_p)
3438 acost += add_c;
3440 if (acost < data->costs[1][var_p][off_p][rat_p])
3441 data->costs[1][var_p][off_p][rat_p] = acost;
3444 if (dump_file && (dump_flags & TDF_DETAILS))
3446 fprintf (dump_file, "Address costs:\n");
3448 for (i = 0; i < 16; i++)
3450 sym_p = i & 1;
3451 var_p = (i >> 1) & 1;
3452 off_p = (i >> 2) & 1;
3453 rat_p = (i >> 3) & 1;
3455 fprintf (dump_file, " ");
3456 if (sym_p)
3457 fprintf (dump_file, "sym + ");
3458 if (var_p)
3459 fprintf (dump_file, "var + ");
3460 if (off_p)
3461 fprintf (dump_file, "cst + ");
3462 if (rat_p)
3463 fprintf (dump_file, "rat * ");
3465 acost = data->costs[sym_p][var_p][off_p][rat_p];
3466 fprintf (dump_file, "index costs %d\n", acost);
3468 if (has_predec[mem_mode] || has_postdec[mem_mode]
3469 || has_preinc[mem_mode] || has_postinc[mem_mode])
3470 fprintf (dump_file, " May include autoinc/dec\n");
3471 fprintf (dump_file, "\n");
3474 address_cost_data_list[data_index] = data;
3477 bits = GET_MODE_BITSIZE (address_mode);
3478 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3479 offset &= mask;
3480 if ((offset >> (bits - 1) & 1))
3481 offset |= ~mask;
3482 s_offset = offset;
3484 autoinc = false;
3485 autoinc_type = AINC_NONE;
3486 msize = GET_MODE_SIZE (mem_mode);
3487 autoinc_offset = offset;
3488 if (stmt_after_inc)
3489 autoinc_offset += ratio * cstep;
3490 if (symbol_present || var_present || ratio != 1)
3491 autoinc = false;
3492 else
3494 if (has_postinc[mem_mode] && autoinc_offset == 0
3495 && msize == cstep)
3496 autoinc_type = AINC_POST_INC;
3497 else if (has_postdec[mem_mode] && autoinc_offset == 0
3498 && msize == -cstep)
3499 autoinc_type = AINC_POST_DEC;
3500 else if (has_preinc[mem_mode] && autoinc_offset == msize
3501 && msize == cstep)
3502 autoinc_type = AINC_PRE_INC;
3503 else if (has_predec[mem_mode] && autoinc_offset == -msize
3504 && msize == -cstep)
3505 autoinc_type = AINC_PRE_DEC;
3507 if (autoinc_type != AINC_NONE)
3508 autoinc = true;
3511 cost = 0;
3512 offset_p = (s_offset != 0
3513 && data->min_offset <= s_offset
3514 && s_offset <= data->max_offset);
3515 ratio_p = (ratio != 1
3516 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3518 if (ratio != 1 && !ratio_p)
3519 cost += mult_by_coeff_cost (ratio, address_mode, speed);
3521 if (s_offset && !offset_p && !symbol_present)
3522 cost += add_cost (speed, address_mode);
3524 if (may_autoinc)
3525 *may_autoinc = autoinc;
3526 if (autoinc)
3527 acost = data->ainc_costs[autoinc_type];
3528 else
3529 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3530 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3531 return new_cost (cost + acost, complexity);
3534 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3535 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3536 calculating the operands of EXPR. Returns true if successful, and returns
3537 the cost in COST. */
3539 static bool
3540 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3541 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3543 comp_cost res;
3544 tree op1 = TREE_OPERAND (expr, 1);
3545 tree cst = TREE_OPERAND (mult, 1);
3546 tree multop = TREE_OPERAND (mult, 0);
3547 int m = exact_log2 (int_cst_value (cst));
3548 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3549 int sa_cost;
3550 bool equal_p = false;
3552 if (!(m >= 0 && m < maxm))
3553 return false;
3555 if (operand_equal_p (op1, mult, 0))
3556 equal_p = true;
3558 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3559 ? shiftadd_cost (speed, mode, m)
3560 : (equal_p
3561 ? shiftsub1_cost (speed, mode, m)
3562 : shiftsub0_cost (speed, mode, m)));
3563 res = new_cost (sa_cost, 0);
3564 res = add_costs (res, equal_p ? cost0 : cost1);
3566 STRIP_NOPS (multop);
3567 if (!is_gimple_val (multop))
3568 res = add_costs (res, force_expr_to_var_cost (multop, speed));
3570 *cost = res;
3571 return true;
3574 /* Estimates cost of forcing expression EXPR into a variable. */
3576 static comp_cost
3577 force_expr_to_var_cost (tree expr, bool speed)
3579 static bool costs_initialized = false;
3580 static unsigned integer_cost [2];
3581 static unsigned symbol_cost [2];
3582 static unsigned address_cost [2];
3583 tree op0, op1;
3584 comp_cost cost0, cost1, cost;
3585 enum machine_mode mode;
3587 if (!costs_initialized)
3589 tree type = build_pointer_type (integer_type_node);
3590 tree var, addr;
3591 rtx x;
3592 int i;
3594 var = create_tmp_var_raw (integer_type_node, "test_var");
3595 TREE_STATIC (var) = 1;
3596 x = produce_memory_decl_rtl (var, NULL);
3597 SET_DECL_RTL (var, x);
3599 addr = build1 (ADDR_EXPR, type, var);
3602 for (i = 0; i < 2; i++)
3604 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3605 2000), i);
3607 symbol_cost[i] = computation_cost (addr, i) + 1;
3609 address_cost[i]
3610 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3611 if (dump_file && (dump_flags & TDF_DETAILS))
3613 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3614 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3615 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3616 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3617 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3618 fprintf (dump_file, "\n");
3622 costs_initialized = true;
3625 STRIP_NOPS (expr);
3627 if (SSA_VAR_P (expr))
3628 return no_cost;
3630 if (is_gimple_min_invariant (expr))
3632 if (TREE_CODE (expr) == INTEGER_CST)
3633 return new_cost (integer_cost [speed], 0);
3635 if (TREE_CODE (expr) == ADDR_EXPR)
3637 tree obj = TREE_OPERAND (expr, 0);
3639 if (TREE_CODE (obj) == VAR_DECL
3640 || TREE_CODE (obj) == PARM_DECL
3641 || TREE_CODE (obj) == RESULT_DECL)
3642 return new_cost (symbol_cost [speed], 0);
3645 return new_cost (address_cost [speed], 0);
3648 switch (TREE_CODE (expr))
3650 case POINTER_PLUS_EXPR:
3651 case PLUS_EXPR:
3652 case MINUS_EXPR:
3653 case MULT_EXPR:
3654 op0 = TREE_OPERAND (expr, 0);
3655 op1 = TREE_OPERAND (expr, 1);
3656 STRIP_NOPS (op0);
3657 STRIP_NOPS (op1);
3658 break;
3660 CASE_CONVERT:
3661 case NEGATE_EXPR:
3662 op0 = TREE_OPERAND (expr, 0);
3663 STRIP_NOPS (op0);
3664 op1 = NULL_TREE;
3665 break;
3667 default:
3668 /* Just an arbitrary value, FIXME. */
3669 return new_cost (target_spill_cost[speed], 0);
3672 if (op0 == NULL_TREE
3673 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
3674 cost0 = no_cost;
3675 else
3676 cost0 = force_expr_to_var_cost (op0, speed);
3678 if (op1 == NULL_TREE
3679 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
3680 cost1 = no_cost;
3681 else
3682 cost1 = force_expr_to_var_cost (op1, speed);
3684 mode = TYPE_MODE (TREE_TYPE (expr));
3685 switch (TREE_CODE (expr))
3687 case POINTER_PLUS_EXPR:
3688 case PLUS_EXPR:
3689 case MINUS_EXPR:
3690 case NEGATE_EXPR:
3691 cost = new_cost (add_cost (speed, mode), 0);
3692 if (TREE_CODE (expr) != NEGATE_EXPR)
3694 tree mult = NULL_TREE;
3695 comp_cost sa_cost;
3696 if (TREE_CODE (op1) == MULT_EXPR)
3697 mult = op1;
3698 else if (TREE_CODE (op0) == MULT_EXPR)
3699 mult = op0;
3701 if (mult != NULL_TREE
3702 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3703 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3704 speed, &sa_cost))
3705 return sa_cost;
3707 break;
3709 CASE_CONVERT:
3711 tree inner_mode, outer_mode;
3712 outer_mode = TREE_TYPE (expr);
3713 inner_mode = TREE_TYPE (op0);
3714 cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
3715 TYPE_MODE (inner_mode), speed), 0);
3717 break;
3719 case MULT_EXPR:
3720 if (cst_and_fits_in_hwi (op0))
3721 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3722 mode, speed), 0);
3723 else if (cst_and_fits_in_hwi (op1))
3724 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3725 mode, speed), 0);
3726 else
3727 return new_cost (target_spill_cost [speed], 0);
3728 break;
3730 default:
3731 gcc_unreachable ();
3734 cost = add_costs (cost, cost0);
3735 cost = add_costs (cost, cost1);
3737 /* Bound the cost by target_spill_cost. The parts of complicated
3738 computations often are either loop invariant or at least can
3739 be shared between several iv uses, so letting this grow without
3740 limits would not give reasonable results. */
3741 if (cost.cost > (int) target_spill_cost [speed])
3742 cost.cost = target_spill_cost [speed];
3744 return cost;
3747 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3748 invariants the computation depends on. */
3750 static comp_cost
3751 force_var_cost (struct ivopts_data *data,
3752 tree expr, bitmap *depends_on)
3754 if (depends_on)
3756 fd_ivopts_data = data;
3757 walk_tree (&expr, find_depends, depends_on, NULL);
3760 return force_expr_to_var_cost (expr, data->speed);
3763 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3764 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3765 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3766 invariants the computation depends on. */
3768 static comp_cost
3769 split_address_cost (struct ivopts_data *data,
3770 tree addr, bool *symbol_present, bool *var_present,
3771 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3773 tree core;
3774 HOST_WIDE_INT bitsize;
3775 HOST_WIDE_INT bitpos;
3776 tree toffset;
3777 enum machine_mode mode;
3778 int unsignedp, volatilep;
3780 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3781 &unsignedp, &volatilep, false);
3783 if (toffset != 0
3784 || bitpos % BITS_PER_UNIT != 0
3785 || TREE_CODE (core) != VAR_DECL)
3787 *symbol_present = false;
3788 *var_present = true;
3789 fd_ivopts_data = data;
3790 walk_tree (&addr, find_depends, depends_on, NULL);
3791 return new_cost (target_spill_cost[data->speed], 0);
3794 *offset += bitpos / BITS_PER_UNIT;
3795 if (TREE_STATIC (core)
3796 || DECL_EXTERNAL (core))
3798 *symbol_present = true;
3799 *var_present = false;
3800 return no_cost;
3803 *symbol_present = false;
3804 *var_present = true;
3805 return no_cost;
3808 /* Estimates cost of expressing difference of addresses E1 - E2 as
3809 var + symbol + offset. The value of offset is added to OFFSET,
3810 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3811 part is missing. DEPENDS_ON is a set of the invariants the computation
3812 depends on. */
3814 static comp_cost
3815 ptr_difference_cost (struct ivopts_data *data,
3816 tree e1, tree e2, bool *symbol_present, bool *var_present,
3817 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3819 HOST_WIDE_INT diff = 0;
3820 aff_tree aff_e1, aff_e2;
3821 tree type;
3823 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3825 if (ptr_difference_const (e1, e2, &diff))
3827 *offset += diff;
3828 *symbol_present = false;
3829 *var_present = false;
3830 return no_cost;
3833 if (integer_zerop (e2))
3834 return split_address_cost (data, TREE_OPERAND (e1, 0),
3835 symbol_present, var_present, offset, depends_on);
3837 *symbol_present = false;
3838 *var_present = true;
3840 type = signed_type_for (TREE_TYPE (e1));
3841 tree_to_aff_combination (e1, type, &aff_e1);
3842 tree_to_aff_combination (e2, type, &aff_e2);
3843 aff_combination_scale (&aff_e2, double_int_minus_one);
3844 aff_combination_add (&aff_e1, &aff_e2);
3846 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3849 /* Estimates cost of expressing difference E1 - E2 as
3850 var + symbol + offset. The value of offset is added to OFFSET,
3851 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3852 part is missing. DEPENDS_ON is a set of the invariants the computation
3853 depends on. */
3855 static comp_cost
3856 difference_cost (struct ivopts_data *data,
3857 tree e1, tree e2, bool *symbol_present, bool *var_present,
3858 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3860 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3861 unsigned HOST_WIDE_INT off1, off2;
3862 aff_tree aff_e1, aff_e2;
3863 tree type;
3865 e1 = strip_offset (e1, &off1);
3866 e2 = strip_offset (e2, &off2);
3867 *offset += off1 - off2;
3869 STRIP_NOPS (e1);
3870 STRIP_NOPS (e2);
3872 if (TREE_CODE (e1) == ADDR_EXPR)
3873 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3874 offset, depends_on);
3875 *symbol_present = false;
3877 if (operand_equal_p (e1, e2, 0))
3879 *var_present = false;
3880 return no_cost;
3883 *var_present = true;
3885 if (integer_zerop (e2))
3886 return force_var_cost (data, e1, depends_on);
3888 if (integer_zerop (e1))
3890 comp_cost cost = force_var_cost (data, e2, depends_on);
3891 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3892 return cost;
3895 type = signed_type_for (TREE_TYPE (e1));
3896 tree_to_aff_combination (e1, type, &aff_e1);
3897 tree_to_aff_combination (e2, type, &aff_e2);
3898 aff_combination_scale (&aff_e2, double_int_minus_one);
3899 aff_combination_add (&aff_e1, &aff_e2);
3901 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3904 /* Returns true if AFF1 and AFF2 are identical. */
3906 static bool
3907 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3909 unsigned i;
3911 if (aff1->n != aff2->n)
3912 return false;
3914 for (i = 0; i < aff1->n; i++)
3916 if (aff1->elts[i].coef != aff2->elts[i].coef)
3917 return false;
3919 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3920 return false;
3922 return true;
3925 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
3927 static int
3928 get_expr_id (struct ivopts_data *data, tree expr)
3930 struct iv_inv_expr_ent ent;
3931 struct iv_inv_expr_ent **slot;
3933 ent.expr = expr;
3934 ent.hash = iterative_hash_expr (expr, 0);
3935 slot = data->inv_expr_tab.find_slot (&ent, INSERT);
3936 if (*slot)
3937 return (*slot)->id;
3939 *slot = XNEW (struct iv_inv_expr_ent);
3940 (*slot)->expr = expr;
3941 (*slot)->hash = ent.hash;
3942 (*slot)->id = data->inv_expr_id++;
3943 return (*slot)->id;
3946 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3947 requires a new compiler generated temporary. Returns -1 otherwise.
3948 ADDRESS_P is a flag indicating if the expression is for address
3949 computation. */
3951 static int
3952 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3953 tree cbase, HOST_WIDE_INT ratio,
3954 bool address_p)
3956 aff_tree ubase_aff, cbase_aff;
3957 tree expr, ub, cb;
3959 STRIP_NOPS (ubase);
3960 STRIP_NOPS (cbase);
3961 ub = ubase;
3962 cb = cbase;
3964 if ((TREE_CODE (ubase) == INTEGER_CST)
3965 && (TREE_CODE (cbase) == INTEGER_CST))
3966 return -1;
3968 /* Strips the constant part. */
3969 if (TREE_CODE (ubase) == PLUS_EXPR
3970 || TREE_CODE (ubase) == MINUS_EXPR
3971 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3973 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3974 ubase = TREE_OPERAND (ubase, 0);
3977 /* Strips the constant part. */
3978 if (TREE_CODE (cbase) == PLUS_EXPR
3979 || TREE_CODE (cbase) == MINUS_EXPR
3980 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3982 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3983 cbase = TREE_OPERAND (cbase, 0);
3986 if (address_p)
3988 if (((TREE_CODE (ubase) == SSA_NAME)
3989 || (TREE_CODE (ubase) == ADDR_EXPR
3990 && is_gimple_min_invariant (ubase)))
3991 && (TREE_CODE (cbase) == INTEGER_CST))
3992 return -1;
3994 if (((TREE_CODE (cbase) == SSA_NAME)
3995 || (TREE_CODE (cbase) == ADDR_EXPR
3996 && is_gimple_min_invariant (cbase)))
3997 && (TREE_CODE (ubase) == INTEGER_CST))
3998 return -1;
4001 if (ratio == 1)
4003 if (operand_equal_p (ubase, cbase, 0))
4004 return -1;
4006 if (TREE_CODE (ubase) == ADDR_EXPR
4007 && TREE_CODE (cbase) == ADDR_EXPR)
4009 tree usym, csym;
4011 usym = TREE_OPERAND (ubase, 0);
4012 csym = TREE_OPERAND (cbase, 0);
4013 if (TREE_CODE (usym) == ARRAY_REF)
4015 tree ind = TREE_OPERAND (usym, 1);
4016 if (TREE_CODE (ind) == INTEGER_CST
4017 && tree_fits_shwi_p (ind)
4018 && tree_to_shwi (ind) == 0)
4019 usym = TREE_OPERAND (usym, 0);
4021 if (TREE_CODE (csym) == ARRAY_REF)
4023 tree ind = TREE_OPERAND (csym, 1);
4024 if (TREE_CODE (ind) == INTEGER_CST
4025 && tree_fits_shwi_p (ind)
4026 && tree_to_shwi (ind) == 0)
4027 csym = TREE_OPERAND (csym, 0);
4029 if (operand_equal_p (usym, csym, 0))
4030 return -1;
4032 /* Now do more complex comparison */
4033 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4034 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4035 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4036 return -1;
4039 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4040 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4042 aff_combination_scale (&cbase_aff, double_int::from_shwi (-1 * ratio));
4043 aff_combination_add (&ubase_aff, &cbase_aff);
4044 expr = aff_combination_to_tree (&ubase_aff);
4045 return get_expr_id (data, expr);
4050 /* Determines the cost of the computation by that USE is expressed
4051 from induction variable CAND. If ADDRESS_P is true, we just need
4052 to create an address from it, otherwise we want to get it into
4053 register. A set of invariants we depend on is stored in
4054 DEPENDS_ON. AT is the statement at that the value is computed.
4055 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4056 addressing is likely. */
4058 static comp_cost
4059 get_computation_cost_at (struct ivopts_data *data,
4060 struct iv_use *use, struct iv_cand *cand,
4061 bool address_p, bitmap *depends_on, gimple at,
4062 bool *can_autoinc,
4063 int *inv_expr_id)
4065 tree ubase = use->iv->base, ustep = use->iv->step;
4066 tree cbase, cstep;
4067 tree utype = TREE_TYPE (ubase), ctype;
4068 unsigned HOST_WIDE_INT cstepi, offset = 0;
4069 HOST_WIDE_INT ratio, aratio;
4070 bool var_present, symbol_present, stmt_is_after_inc;
4071 comp_cost cost;
4072 double_int rat;
4073 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4074 enum machine_mode mem_mode = (address_p
4075 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4076 : VOIDmode);
4078 *depends_on = NULL;
4080 /* Only consider real candidates. */
4081 if (!cand->iv)
4082 return infinite_cost;
4084 cbase = cand->iv->base;
4085 cstep = cand->iv->step;
4086 ctype = TREE_TYPE (cbase);
4088 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4090 /* We do not have a precision to express the values of use. */
4091 return infinite_cost;
4094 if (address_p
4095 || (use->iv->base_object
4096 && cand->iv->base_object
4097 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4098 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4100 /* Do not try to express address of an object with computation based
4101 on address of a different object. This may cause problems in rtl
4102 level alias analysis (that does not expect this to be happening,
4103 as this is illegal in C), and would be unlikely to be useful
4104 anyway. */
4105 if (use->iv->base_object
4106 && cand->iv->base_object
4107 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4108 return infinite_cost;
4111 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4113 /* TODO -- add direct handling of this case. */
4114 goto fallback;
4117 /* CSTEPI is removed from the offset in case statement is after the
4118 increment. If the step is not constant, we use zero instead.
4119 This is a bit imprecise (there is the extra addition), but
4120 redundancy elimination is likely to transform the code so that
4121 it uses value of the variable before increment anyway,
4122 so it is not that much unrealistic. */
4123 if (cst_and_fits_in_hwi (cstep))
4124 cstepi = int_cst_value (cstep);
4125 else
4126 cstepi = 0;
4128 if (!constant_multiple_of (ustep, cstep, &rat))
4129 return infinite_cost;
4131 if (rat.fits_shwi ())
4132 ratio = rat.to_shwi ();
4133 else
4134 return infinite_cost;
4136 STRIP_NOPS (cbase);
4137 ctype = TREE_TYPE (cbase);
4139 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4141 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4142 or ratio == 1, it is better to handle this like
4144 ubase - ratio * cbase + ratio * var
4146 (also holds in the case ratio == -1, TODO. */
4148 if (cst_and_fits_in_hwi (cbase))
4150 offset = - ratio * int_cst_value (cbase);
4151 cost = difference_cost (data,
4152 ubase, build_int_cst (utype, 0),
4153 &symbol_present, &var_present, &offset,
4154 depends_on);
4155 cost.cost /= avg_loop_niter (data->current_loop);
4157 else if (ratio == 1)
4159 tree real_cbase = cbase;
4161 /* Check to see if any adjustment is needed. */
4162 if (cstepi == 0 && stmt_is_after_inc)
4164 aff_tree real_cbase_aff;
4165 aff_tree cstep_aff;
4167 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4168 &real_cbase_aff);
4169 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4171 aff_combination_add (&real_cbase_aff, &cstep_aff);
4172 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4175 cost = difference_cost (data,
4176 ubase, real_cbase,
4177 &symbol_present, &var_present, &offset,
4178 depends_on);
4179 cost.cost /= avg_loop_niter (data->current_loop);
4181 else if (address_p
4182 && !POINTER_TYPE_P (ctype)
4183 && multiplier_allowed_in_address_p
4184 (ratio, mem_mode,
4185 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4187 cbase
4188 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4189 cost = difference_cost (data,
4190 ubase, cbase,
4191 &symbol_present, &var_present, &offset,
4192 depends_on);
4193 cost.cost /= avg_loop_niter (data->current_loop);
4195 else
4197 cost = force_var_cost (data, cbase, depends_on);
4198 cost = add_costs (cost,
4199 difference_cost (data,
4200 ubase, build_int_cst (utype, 0),
4201 &symbol_present, &var_present,
4202 &offset, depends_on));
4203 cost.cost /= avg_loop_niter (data->current_loop);
4204 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4207 if (inv_expr_id)
4209 *inv_expr_id =
4210 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4211 /* Clear depends on. */
4212 if (*inv_expr_id != -1 && depends_on && *depends_on)
4213 bitmap_clear (*depends_on);
4216 /* If we are after the increment, the value of the candidate is higher by
4217 one iteration. */
4218 if (stmt_is_after_inc)
4219 offset -= ratio * cstepi;
4221 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4222 (symbol/var1/const parts may be omitted). If we are looking for an
4223 address, find the cost of addressing this. */
4224 if (address_p)
4225 return add_costs (cost,
4226 get_address_cost (symbol_present, var_present,
4227 offset, ratio, cstepi,
4228 mem_mode,
4229 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4230 speed, stmt_is_after_inc,
4231 can_autoinc));
4233 /* Otherwise estimate the costs for computing the expression. */
4234 if (!symbol_present && !var_present && !offset)
4236 if (ratio != 1)
4237 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4238 return cost;
4241 /* Symbol + offset should be compile-time computable so consider that they
4242 are added once to the variable, if present. */
4243 if (var_present && (symbol_present || offset))
4244 cost.cost += adjust_setup_cost (data,
4245 add_cost (speed, TYPE_MODE (ctype)));
4247 /* Having offset does not affect runtime cost in case it is added to
4248 symbol, but it increases complexity. */
4249 if (offset)
4250 cost.complexity++;
4252 cost.cost += add_cost (speed, TYPE_MODE (ctype));
4254 aratio = ratio > 0 ? ratio : -ratio;
4255 if (aratio != 1)
4256 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4257 return cost;
4259 fallback:
4260 if (can_autoinc)
4261 *can_autoinc = false;
4264 /* Just get the expression, expand it and measure the cost. */
4265 tree comp = get_computation_at (data->current_loop, use, cand, at);
4267 if (!comp)
4268 return infinite_cost;
4270 if (address_p)
4271 comp = build_simple_mem_ref (comp);
4273 return new_cost (computation_cost (comp, speed), 0);
4277 /* Determines the cost of the computation by that USE is expressed
4278 from induction variable CAND. If ADDRESS_P is true, we just need
4279 to create an address from it, otherwise we want to get it into
4280 register. A set of invariants we depend on is stored in
4281 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4282 autoinc addressing is likely. */
4284 static comp_cost
4285 get_computation_cost (struct ivopts_data *data,
4286 struct iv_use *use, struct iv_cand *cand,
4287 bool address_p, bitmap *depends_on,
4288 bool *can_autoinc, int *inv_expr_id)
4290 return get_computation_cost_at (data,
4291 use, cand, address_p, depends_on, use->stmt,
4292 can_autoinc, inv_expr_id);
4295 /* Determines cost of basing replacement of USE on CAND in a generic
4296 expression. */
4298 static bool
4299 determine_use_iv_cost_generic (struct ivopts_data *data,
4300 struct iv_use *use, struct iv_cand *cand)
4302 bitmap depends_on;
4303 comp_cost cost;
4304 int inv_expr_id = -1;
4306 /* The simple case first -- if we need to express value of the preserved
4307 original biv, the cost is 0. This also prevents us from counting the
4308 cost of increment twice -- once at this use and once in the cost of
4309 the candidate. */
4310 if (cand->pos == IP_ORIGINAL
4311 && cand->incremented_at == use->stmt)
4313 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4314 ERROR_MARK, -1);
4315 return true;
4318 cost = get_computation_cost (data, use, cand, false, &depends_on,
4319 NULL, &inv_expr_id);
4321 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4322 inv_expr_id);
4324 return !infinite_cost_p (cost);
4327 /* Determines cost of basing replacement of USE on CAND in an address. */
4329 static bool
4330 determine_use_iv_cost_address (struct ivopts_data *data,
4331 struct iv_use *use, struct iv_cand *cand)
4333 bitmap depends_on;
4334 bool can_autoinc;
4335 int inv_expr_id = -1;
4336 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4337 &can_autoinc, &inv_expr_id);
4339 if (cand->ainc_use == use)
4341 if (can_autoinc)
4342 cost.cost -= cand->cost_step;
4343 /* If we generated the candidate solely for exploiting autoincrement
4344 opportunities, and it turns out it can't be used, set the cost to
4345 infinity to make sure we ignore it. */
4346 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4347 cost = infinite_cost;
4349 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4350 inv_expr_id);
4352 return !infinite_cost_p (cost);
4355 /* Computes value of candidate CAND at position AT in iteration NITER, and
4356 stores it to VAL. */
4358 static void
4359 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4360 aff_tree *val)
4362 aff_tree step, delta, nit;
4363 struct iv *iv = cand->iv;
4364 tree type = TREE_TYPE (iv->base);
4365 tree steptype = type;
4366 if (POINTER_TYPE_P (type))
4367 steptype = sizetype;
4368 steptype = unsigned_type_for (type);
4370 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4371 aff_combination_convert (&step, steptype);
4372 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4373 aff_combination_convert (&nit, steptype);
4374 aff_combination_mult (&nit, &step, &delta);
4375 if (stmt_after_increment (loop, cand, at))
4376 aff_combination_add (&delta, &step);
4378 tree_to_aff_combination (iv->base, type, val);
4379 if (!POINTER_TYPE_P (type))
4380 aff_combination_convert (val, steptype);
4381 aff_combination_add (val, &delta);
4384 /* Returns period of induction variable iv. */
4386 static tree
4387 iv_period (struct iv *iv)
4389 tree step = iv->step, period, type;
4390 tree pow2div;
4392 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4394 type = unsigned_type_for (TREE_TYPE (step));
4395 /* Period of the iv is lcm (step, type_range)/step -1,
4396 i.e., N*type_range/step - 1. Since type range is power
4397 of two, N == (step >> num_of_ending_zeros_binary (step),
4398 so the final result is
4400 (type_range >> num_of_ending_zeros_binary (step)) - 1
4403 pow2div = num_ending_zeros (step);
4405 period = build_low_bits_mask (type,
4406 (TYPE_PRECISION (type)
4407 - tree_to_uhwi (pow2div)));
4409 return period;
4412 /* Returns the comparison operator used when eliminating the iv USE. */
4414 static enum tree_code
4415 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4417 struct loop *loop = data->current_loop;
4418 basic_block ex_bb;
4419 edge exit;
4421 ex_bb = gimple_bb (use->stmt);
4422 exit = EDGE_SUCC (ex_bb, 0);
4423 if (flow_bb_inside_loop_p (loop, exit->dest))
4424 exit = EDGE_SUCC (ex_bb, 1);
4426 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4429 static tree
4430 strip_wrap_conserving_type_conversions (tree exp)
4432 while (tree_ssa_useless_type_conversion (exp)
4433 && (nowrap_type_p (TREE_TYPE (exp))
4434 == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0)))))
4435 exp = TREE_OPERAND (exp, 0);
4436 return exp;
4439 /* Walk the SSA form and check whether E == WHAT. Fairly simplistic, we
4440 check for an exact match. */
4442 static bool
4443 expr_equal_p (tree e, tree what)
4445 gimple stmt;
4446 enum tree_code code;
4448 e = strip_wrap_conserving_type_conversions (e);
4449 what = strip_wrap_conserving_type_conversions (what);
4451 code = TREE_CODE (what);
4452 if (TREE_TYPE (e) != TREE_TYPE (what))
4453 return false;
4455 if (operand_equal_p (e, what, 0))
4456 return true;
4458 if (TREE_CODE (e) != SSA_NAME)
4459 return false;
4461 stmt = SSA_NAME_DEF_STMT (e);
4462 if (gimple_code (stmt) != GIMPLE_ASSIGN
4463 || gimple_assign_rhs_code (stmt) != code)
4464 return false;
4466 switch (get_gimple_rhs_class (code))
4468 case GIMPLE_BINARY_RHS:
4469 if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1)))
4470 return false;
4471 /* Fallthru. */
4473 case GIMPLE_UNARY_RHS:
4474 case GIMPLE_SINGLE_RHS:
4475 return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0));
4476 default:
4477 return false;
4481 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4482 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4483 calculation is performed in non-wrapping type.
4485 TODO: More generally, we could test for the situation that
4486 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4487 This would require knowing the sign of OFFSET.
4489 Also, we only look for the first addition in the computation of BASE.
4490 More complex analysis would be better, but introducing it just for
4491 this optimization seems like an overkill. */
4493 static bool
4494 difference_cannot_overflow_p (tree base, tree offset)
4496 enum tree_code code;
4497 tree e1, e2;
4499 if (!nowrap_type_p (TREE_TYPE (base)))
4500 return false;
4502 base = expand_simple_operations (base);
4504 if (TREE_CODE (base) == SSA_NAME)
4506 gimple stmt = SSA_NAME_DEF_STMT (base);
4508 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4509 return false;
4511 code = gimple_assign_rhs_code (stmt);
4512 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4513 return false;
4515 e1 = gimple_assign_rhs1 (stmt);
4516 e2 = gimple_assign_rhs2 (stmt);
4518 else
4520 code = TREE_CODE (base);
4521 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4522 return false;
4523 e1 = TREE_OPERAND (base, 0);
4524 e2 = TREE_OPERAND (base, 1);
4527 /* TODO: deeper inspection may be necessary to prove the equality. */
4528 switch (code)
4530 case PLUS_EXPR:
4531 return expr_equal_p (e1, offset) || expr_equal_p (e2, offset);
4532 case POINTER_PLUS_EXPR:
4533 return expr_equal_p (e2, offset);
4535 default:
4536 return false;
4540 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4541 comparison with CAND. NITER describes the number of iterations of
4542 the loops. If successful, the comparison in COMP_P is altered accordingly.
4544 We aim to handle the following situation:
4546 sometype *base, *p;
4547 int a, b, i;
4549 i = a;
4550 p = p_0 = base + a;
4554 bla (*p);
4555 p++;
4556 i++;
4558 while (i < b);
4560 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4561 We aim to optimize this to
4563 p = p_0 = base + a;
4566 bla (*p);
4567 p++;
4569 while (p < p_0 - a + b);
4571 This preserves the correctness, since the pointer arithmetics does not
4572 overflow. More precisely:
4574 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4575 overflow in computing it or the values of p.
4576 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4577 overflow. To prove this, we use the fact that p_0 = base + a. */
4579 static bool
4580 iv_elimination_compare_lt (struct ivopts_data *data,
4581 struct iv_cand *cand, enum tree_code *comp_p,
4582 struct tree_niter_desc *niter)
4584 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4585 struct aff_tree nit, tmpa, tmpb;
4586 enum tree_code comp;
4587 HOST_WIDE_INT step;
4589 /* We need to know that the candidate induction variable does not overflow.
4590 While more complex analysis may be used to prove this, for now just
4591 check that the variable appears in the original program and that it
4592 is computed in a type that guarantees no overflows. */
4593 cand_type = TREE_TYPE (cand->iv->base);
4594 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4595 return false;
4597 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4598 the calculation of the BOUND could overflow, making the comparison
4599 invalid. */
4600 if (!data->loop_single_exit_p)
4601 return false;
4603 /* We need to be able to decide whether candidate is increasing or decreasing
4604 in order to choose the right comparison operator. */
4605 if (!cst_and_fits_in_hwi (cand->iv->step))
4606 return false;
4607 step = int_cst_value (cand->iv->step);
4609 /* Check that the number of iterations matches the expected pattern:
4610 a + 1 > b ? 0 : b - a - 1. */
4611 mbz = niter->may_be_zero;
4612 if (TREE_CODE (mbz) == GT_EXPR)
4614 /* Handle a + 1 > b. */
4615 tree op0 = TREE_OPERAND (mbz, 0);
4616 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4618 a = TREE_OPERAND (op0, 0);
4619 b = TREE_OPERAND (mbz, 1);
4621 else
4622 return false;
4624 else if (TREE_CODE (mbz) == LT_EXPR)
4626 tree op1 = TREE_OPERAND (mbz, 1);
4628 /* Handle b < a + 1. */
4629 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4631 a = TREE_OPERAND (op1, 0);
4632 b = TREE_OPERAND (mbz, 0);
4634 else
4635 return false;
4637 else
4638 return false;
4640 /* Expected number of iterations is B - A - 1. Check that it matches
4641 the actual number, i.e., that B - A - NITER = 1. */
4642 tree_to_aff_combination (niter->niter, nit_type, &nit);
4643 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4644 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4645 aff_combination_scale (&nit, double_int_minus_one);
4646 aff_combination_scale (&tmpa, double_int_minus_one);
4647 aff_combination_add (&tmpb, &tmpa);
4648 aff_combination_add (&tmpb, &nit);
4649 if (tmpb.n != 0 || tmpb.offset != double_int_one)
4650 return false;
4652 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4653 overflow. */
4654 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4655 cand->iv->step,
4656 fold_convert (TREE_TYPE (cand->iv->step), a));
4657 if (!difference_cannot_overflow_p (cand->iv->base, offset))
4658 return false;
4660 /* Determine the new comparison operator. */
4661 comp = step < 0 ? GT_EXPR : LT_EXPR;
4662 if (*comp_p == NE_EXPR)
4663 *comp_p = comp;
4664 else if (*comp_p == EQ_EXPR)
4665 *comp_p = invert_tree_comparison (comp, false);
4666 else
4667 gcc_unreachable ();
4669 return true;
4672 /* Check whether it is possible to express the condition in USE by comparison
4673 of candidate CAND. If so, store the value compared with to BOUND, and the
4674 comparison operator to COMP. */
4676 static bool
4677 may_eliminate_iv (struct ivopts_data *data,
4678 struct iv_use *use, struct iv_cand *cand, tree *bound,
4679 enum tree_code *comp)
4681 basic_block ex_bb;
4682 edge exit;
4683 tree period;
4684 struct loop *loop = data->current_loop;
4685 aff_tree bnd;
4686 struct tree_niter_desc *desc = NULL;
4688 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4689 return false;
4691 /* For now works only for exits that dominate the loop latch.
4692 TODO: extend to other conditions inside loop body. */
4693 ex_bb = gimple_bb (use->stmt);
4694 if (use->stmt != last_stmt (ex_bb)
4695 || gimple_code (use->stmt) != GIMPLE_COND
4696 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4697 return false;
4699 exit = EDGE_SUCC (ex_bb, 0);
4700 if (flow_bb_inside_loop_p (loop, exit->dest))
4701 exit = EDGE_SUCC (ex_bb, 1);
4702 if (flow_bb_inside_loop_p (loop, exit->dest))
4703 return false;
4705 desc = niter_for_exit (data, exit);
4706 if (!desc)
4707 return false;
4709 /* Determine whether we can use the variable to test the exit condition.
4710 This is the case iff the period of the induction variable is greater
4711 than the number of iterations for which the exit condition is true. */
4712 period = iv_period (cand->iv);
4714 /* If the number of iterations is constant, compare against it directly. */
4715 if (TREE_CODE (desc->niter) == INTEGER_CST)
4717 /* See cand_value_at. */
4718 if (stmt_after_increment (loop, cand, use->stmt))
4720 if (!tree_int_cst_lt (desc->niter, period))
4721 return false;
4723 else
4725 if (tree_int_cst_lt (period, desc->niter))
4726 return false;
4730 /* If not, and if this is the only possible exit of the loop, see whether
4731 we can get a conservative estimate on the number of iterations of the
4732 entire loop and compare against that instead. */
4733 else
4735 double_int period_value, max_niter;
4737 max_niter = desc->max;
4738 if (stmt_after_increment (loop, cand, use->stmt))
4739 max_niter += double_int_one;
4740 period_value = tree_to_double_int (period);
4741 if (max_niter.ugt (period_value))
4743 /* See if we can take advantage of inferred loop bound information. */
4744 if (data->loop_single_exit_p)
4746 if (!max_loop_iterations (loop, &max_niter))
4747 return false;
4748 /* The loop bound is already adjusted by adding 1. */
4749 if (max_niter.ugt (period_value))
4750 return false;
4752 else
4753 return false;
4757 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4759 *bound = fold_convert (TREE_TYPE (cand->iv->base),
4760 aff_combination_to_tree (&bnd));
4761 *comp = iv_elimination_compare (data, use);
4763 /* It is unlikely that computing the number of iterations using division
4764 would be more profitable than keeping the original induction variable. */
4765 if (expression_expensive_p (*bound))
4766 return false;
4768 /* Sometimes, it is possible to handle the situation that the number of
4769 iterations may be zero unless additional assumtions by using <
4770 instead of != in the exit condition.
4772 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4773 base the exit condition on it. However, that is often too
4774 expensive. */
4775 if (!integer_zerop (desc->may_be_zero))
4776 return iv_elimination_compare_lt (data, cand, comp, desc);
4778 return true;
4781 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4782 be copied, if is is used in the loop body and DATA->body_includes_call. */
4784 static int
4785 parm_decl_cost (struct ivopts_data *data, tree bound)
4787 tree sbound = bound;
4788 STRIP_NOPS (sbound);
4790 if (TREE_CODE (sbound) == SSA_NAME
4791 && SSA_NAME_IS_DEFAULT_DEF (sbound)
4792 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4793 && data->body_includes_call)
4794 return COSTS_N_INSNS (1);
4796 return 0;
4799 /* Determines cost of basing replacement of USE on CAND in a condition. */
4801 static bool
4802 determine_use_iv_cost_condition (struct ivopts_data *data,
4803 struct iv_use *use, struct iv_cand *cand)
4805 tree bound = NULL_TREE;
4806 struct iv *cmp_iv;
4807 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4808 comp_cost elim_cost, express_cost, cost, bound_cost;
4809 bool ok;
4810 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4811 tree *control_var, *bound_cst;
4812 enum tree_code comp = ERROR_MARK;
4814 /* Only consider real candidates. */
4815 if (!cand->iv)
4817 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4818 ERROR_MARK, -1);
4819 return false;
4822 /* Try iv elimination. */
4823 if (may_eliminate_iv (data, use, cand, &bound, &comp))
4825 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4826 if (elim_cost.cost == 0)
4827 elim_cost.cost = parm_decl_cost (data, bound);
4828 else if (TREE_CODE (bound) == INTEGER_CST)
4829 elim_cost.cost = 0;
4830 /* If we replace a loop condition 'i < n' with 'p < base + n',
4831 depends_on_elim will have 'base' and 'n' set, which implies
4832 that both 'base' and 'n' will be live during the loop. More likely,
4833 'base + n' will be loop invariant, resulting in only one live value
4834 during the loop. So in that case we clear depends_on_elim and set
4835 elim_inv_expr_id instead. */
4836 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4838 elim_inv_expr_id = get_expr_id (data, bound);
4839 bitmap_clear (depends_on_elim);
4841 /* The bound is a loop invariant, so it will be only computed
4842 once. */
4843 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4845 else
4846 elim_cost = infinite_cost;
4848 /* Try expressing the original giv. If it is compared with an invariant,
4849 note that we cannot get rid of it. */
4850 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4851 NULL, &cmp_iv);
4852 gcc_assert (ok);
4854 /* When the condition is a comparison of the candidate IV against
4855 zero, prefer this IV.
4857 TODO: The constant that we're subtracting from the cost should
4858 be target-dependent. This information should be added to the
4859 target costs for each backend. */
4860 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4861 && integer_zerop (*bound_cst)
4862 && (operand_equal_p (*control_var, cand->var_after, 0)
4863 || operand_equal_p (*control_var, cand->var_before, 0)))
4864 elim_cost.cost -= 1;
4866 express_cost = get_computation_cost (data, use, cand, false,
4867 &depends_on_express, NULL,
4868 &express_inv_expr_id);
4869 fd_ivopts_data = data;
4870 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4872 /* Count the cost of the original bound as well. */
4873 bound_cost = force_var_cost (data, *bound_cst, NULL);
4874 if (bound_cost.cost == 0)
4875 bound_cost.cost = parm_decl_cost (data, *bound_cst);
4876 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4877 bound_cost.cost = 0;
4878 express_cost.cost += bound_cost.cost;
4880 /* Choose the better approach, preferring the eliminated IV. */
4881 if (compare_costs (elim_cost, express_cost) <= 0)
4883 cost = elim_cost;
4884 depends_on = depends_on_elim;
4885 depends_on_elim = NULL;
4886 inv_expr_id = elim_inv_expr_id;
4888 else
4890 cost = express_cost;
4891 depends_on = depends_on_express;
4892 depends_on_express = NULL;
4893 bound = NULL_TREE;
4894 comp = ERROR_MARK;
4895 inv_expr_id = express_inv_expr_id;
4898 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4900 if (depends_on_elim)
4901 BITMAP_FREE (depends_on_elim);
4902 if (depends_on_express)
4903 BITMAP_FREE (depends_on_express);
4905 return !infinite_cost_p (cost);
4908 /* Determines cost of basing replacement of USE on CAND. Returns false
4909 if USE cannot be based on CAND. */
4911 static bool
4912 determine_use_iv_cost (struct ivopts_data *data,
4913 struct iv_use *use, struct iv_cand *cand)
4915 switch (use->type)
4917 case USE_NONLINEAR_EXPR:
4918 return determine_use_iv_cost_generic (data, use, cand);
4920 case USE_ADDRESS:
4921 return determine_use_iv_cost_address (data, use, cand);
4923 case USE_COMPARE:
4924 return determine_use_iv_cost_condition (data, use, cand);
4926 default:
4927 gcc_unreachable ();
4931 /* Return true if get_computation_cost indicates that autoincrement is
4932 a possibility for the pair of USE and CAND, false otherwise. */
4934 static bool
4935 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4936 struct iv_cand *cand)
4938 bitmap depends_on;
4939 bool can_autoinc;
4940 comp_cost cost;
4942 if (use->type != USE_ADDRESS)
4943 return false;
4945 cost = get_computation_cost (data, use, cand, true, &depends_on,
4946 &can_autoinc, NULL);
4948 BITMAP_FREE (depends_on);
4950 return !infinite_cost_p (cost) && can_autoinc;
4953 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4954 use that allows autoincrement, and set their AINC_USE if possible. */
4956 static void
4957 set_autoinc_for_original_candidates (struct ivopts_data *data)
4959 unsigned i, j;
4961 for (i = 0; i < n_iv_cands (data); i++)
4963 struct iv_cand *cand = iv_cand (data, i);
4964 struct iv_use *closest_before = NULL;
4965 struct iv_use *closest_after = NULL;
4966 if (cand->pos != IP_ORIGINAL)
4967 continue;
4969 for (j = 0; j < n_iv_uses (data); j++)
4971 struct iv_use *use = iv_use (data, j);
4972 unsigned uid = gimple_uid (use->stmt);
4974 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
4975 continue;
4977 if (uid < gimple_uid (cand->incremented_at)
4978 && (closest_before == NULL
4979 || uid > gimple_uid (closest_before->stmt)))
4980 closest_before = use;
4982 if (uid > gimple_uid (cand->incremented_at)
4983 && (closest_after == NULL
4984 || uid < gimple_uid (closest_after->stmt)))
4985 closest_after = use;
4988 if (closest_before != NULL
4989 && autoinc_possible_for_pair (data, closest_before, cand))
4990 cand->ainc_use = closest_before;
4991 else if (closest_after != NULL
4992 && autoinc_possible_for_pair (data, closest_after, cand))
4993 cand->ainc_use = closest_after;
4997 /* Finds the candidates for the induction variables. */
4999 static void
5000 find_iv_candidates (struct ivopts_data *data)
5002 /* Add commonly used ivs. */
5003 add_standard_iv_candidates (data);
5005 /* Add old induction variables. */
5006 add_old_ivs_candidates (data);
5008 /* Add induction variables derived from uses. */
5009 add_derived_ivs_candidates (data);
5011 set_autoinc_for_original_candidates (data);
5013 /* Record the important candidates. */
5014 record_important_candidates (data);
5017 /* Determines costs of basing the use of the iv on an iv candidate. */
5019 static void
5020 determine_use_iv_costs (struct ivopts_data *data)
5022 unsigned i, j;
5023 struct iv_use *use;
5024 struct iv_cand *cand;
5025 bitmap to_clear = BITMAP_ALLOC (NULL);
5027 alloc_use_cost_map (data);
5029 for (i = 0; i < n_iv_uses (data); i++)
5031 use = iv_use (data, i);
5033 if (data->consider_all_candidates)
5035 for (j = 0; j < n_iv_cands (data); j++)
5037 cand = iv_cand (data, j);
5038 determine_use_iv_cost (data, use, cand);
5041 else
5043 bitmap_iterator bi;
5045 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5047 cand = iv_cand (data, j);
5048 if (!determine_use_iv_cost (data, use, cand))
5049 bitmap_set_bit (to_clear, j);
5052 /* Remove the candidates for that the cost is infinite from
5053 the list of related candidates. */
5054 bitmap_and_compl_into (use->related_cands, to_clear);
5055 bitmap_clear (to_clear);
5059 BITMAP_FREE (to_clear);
5061 if (dump_file && (dump_flags & TDF_DETAILS))
5063 fprintf (dump_file, "Use-candidate costs:\n");
5065 for (i = 0; i < n_iv_uses (data); i++)
5067 use = iv_use (data, i);
5069 fprintf (dump_file, "Use %d:\n", i);
5070 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
5071 for (j = 0; j < use->n_map_members; j++)
5073 if (!use->cost_map[j].cand
5074 || infinite_cost_p (use->cost_map[j].cost))
5075 continue;
5077 fprintf (dump_file, " %d\t%d\t%d\t",
5078 use->cost_map[j].cand->id,
5079 use->cost_map[j].cost.cost,
5080 use->cost_map[j].cost.complexity);
5081 if (use->cost_map[j].depends_on)
5082 bitmap_print (dump_file,
5083 use->cost_map[j].depends_on, "","");
5084 if (use->cost_map[j].inv_expr_id != -1)
5085 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5086 fprintf (dump_file, "\n");
5089 fprintf (dump_file, "\n");
5091 fprintf (dump_file, "\n");
5095 /* Determines cost of the candidate CAND. */
5097 static void
5098 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5100 comp_cost cost_base;
5101 unsigned cost, cost_step;
5102 tree base;
5104 if (!cand->iv)
5106 cand->cost = 0;
5107 return;
5110 /* There are two costs associated with the candidate -- its increment
5111 and its initialization. The second is almost negligible for any loop
5112 that rolls enough, so we take it just very little into account. */
5114 base = cand->iv->base;
5115 cost_base = force_var_cost (data, base, NULL);
5116 /* It will be exceptional that the iv register happens to be initialized with
5117 the proper value at no cost. In general, there will at least be a regcopy
5118 or a const set. */
5119 if (cost_base.cost == 0)
5120 cost_base.cost = COSTS_N_INSNS (1);
5121 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5123 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5125 /* Prefer the original ivs unless we may gain something by replacing it.
5126 The reason is to make debugging simpler; so this is not relevant for
5127 artificial ivs created by other optimization passes. */
5128 if (cand->pos != IP_ORIGINAL
5129 || !SSA_NAME_VAR (cand->var_before)
5130 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5131 cost++;
5133 /* Prefer not to insert statements into latch unless there are some
5134 already (so that we do not create unnecessary jumps). */
5135 if (cand->pos == IP_END
5136 && empty_block_p (ip_end_pos (data->current_loop)))
5137 cost++;
5139 cand->cost = cost;
5140 cand->cost_step = cost_step;
5143 /* Determines costs of computation of the candidates. */
5145 static void
5146 determine_iv_costs (struct ivopts_data *data)
5148 unsigned i;
5150 if (dump_file && (dump_flags & TDF_DETAILS))
5152 fprintf (dump_file, "Candidate costs:\n");
5153 fprintf (dump_file, " cand\tcost\n");
5156 for (i = 0; i < n_iv_cands (data); i++)
5158 struct iv_cand *cand = iv_cand (data, i);
5160 determine_iv_cost (data, cand);
5162 if (dump_file && (dump_flags & TDF_DETAILS))
5163 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5166 if (dump_file && (dump_flags & TDF_DETAILS))
5167 fprintf (dump_file, "\n");
5170 /* Calculates cost for having SIZE induction variables. */
5172 static unsigned
5173 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5175 /* We add size to the cost, so that we prefer eliminating ivs
5176 if possible. */
5177 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5178 data->body_includes_call);
5181 /* For each size of the induction variable set determine the penalty. */
5183 static void
5184 determine_set_costs (struct ivopts_data *data)
5186 unsigned j, n;
5187 gimple phi;
5188 gimple_stmt_iterator psi;
5189 tree op;
5190 struct loop *loop = data->current_loop;
5191 bitmap_iterator bi;
5193 if (dump_file && (dump_flags & TDF_DETAILS))
5195 fprintf (dump_file, "Global costs:\n");
5196 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5197 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5198 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5199 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5202 n = 0;
5203 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5205 phi = gsi_stmt (psi);
5206 op = PHI_RESULT (phi);
5208 if (virtual_operand_p (op))
5209 continue;
5211 if (get_iv (data, op))
5212 continue;
5214 n++;
5217 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5219 struct version_info *info = ver_info (data, j);
5221 if (info->inv_id && info->has_nonlin_use)
5222 n++;
5225 data->regs_used = n;
5226 if (dump_file && (dump_flags & TDF_DETAILS))
5227 fprintf (dump_file, " regs_used %d\n", n);
5229 if (dump_file && (dump_flags & TDF_DETAILS))
5231 fprintf (dump_file, " cost for size:\n");
5232 fprintf (dump_file, " ivs\tcost\n");
5233 for (j = 0; j <= 2 * target_avail_regs; j++)
5234 fprintf (dump_file, " %d\t%d\n", j,
5235 ivopts_global_cost_for_size (data, j));
5236 fprintf (dump_file, "\n");
5240 /* Returns true if A is a cheaper cost pair than B. */
5242 static bool
5243 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5245 int cmp;
5247 if (!a)
5248 return false;
5250 if (!b)
5251 return true;
5253 cmp = compare_costs (a->cost, b->cost);
5254 if (cmp < 0)
5255 return true;
5257 if (cmp > 0)
5258 return false;
5260 /* In case the costs are the same, prefer the cheaper candidate. */
5261 if (a->cand->cost < b->cand->cost)
5262 return true;
5264 return false;
5268 /* Returns candidate by that USE is expressed in IVS. */
5270 static struct cost_pair *
5271 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5273 return ivs->cand_for_use[use->id];
5276 /* Computes the cost field of IVS structure. */
5278 static void
5279 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5281 comp_cost cost = ivs->cand_use_cost;
5283 cost.cost += ivs->cand_cost;
5285 cost.cost += ivopts_global_cost_for_size (data,
5286 ivs->n_regs + ivs->num_used_inv_expr);
5288 ivs->cost = cost;
5291 /* Remove invariants in set INVS to set IVS. */
5293 static void
5294 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5296 bitmap_iterator bi;
5297 unsigned iid;
5299 if (!invs)
5300 return;
5302 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5304 ivs->n_invariant_uses[iid]--;
5305 if (ivs->n_invariant_uses[iid] == 0)
5306 ivs->n_regs--;
5310 /* Set USE not to be expressed by any candidate in IVS. */
5312 static void
5313 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5314 struct iv_use *use)
5316 unsigned uid = use->id, cid;
5317 struct cost_pair *cp;
5319 cp = ivs->cand_for_use[uid];
5320 if (!cp)
5321 return;
5322 cid = cp->cand->id;
5324 ivs->bad_uses++;
5325 ivs->cand_for_use[uid] = NULL;
5326 ivs->n_cand_uses[cid]--;
5328 if (ivs->n_cand_uses[cid] == 0)
5330 bitmap_clear_bit (ivs->cands, cid);
5331 /* Do not count the pseudocandidates. */
5332 if (cp->cand->iv)
5333 ivs->n_regs--;
5334 ivs->n_cands--;
5335 ivs->cand_cost -= cp->cand->cost;
5337 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5340 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5342 iv_ca_set_remove_invariants (ivs, cp->depends_on);
5344 if (cp->inv_expr_id != -1)
5346 ivs->used_inv_expr[cp->inv_expr_id]--;
5347 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5348 ivs->num_used_inv_expr--;
5350 iv_ca_recount_cost (data, ivs);
5353 /* Add invariants in set INVS to set IVS. */
5355 static void
5356 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5358 bitmap_iterator bi;
5359 unsigned iid;
5361 if (!invs)
5362 return;
5364 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5366 ivs->n_invariant_uses[iid]++;
5367 if (ivs->n_invariant_uses[iid] == 1)
5368 ivs->n_regs++;
5372 /* Set cost pair for USE in set IVS to CP. */
5374 static void
5375 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5376 struct iv_use *use, struct cost_pair *cp)
5378 unsigned uid = use->id, cid;
5380 if (ivs->cand_for_use[uid] == cp)
5381 return;
5383 if (ivs->cand_for_use[uid])
5384 iv_ca_set_no_cp (data, ivs, use);
5386 if (cp)
5388 cid = cp->cand->id;
5390 ivs->bad_uses--;
5391 ivs->cand_for_use[uid] = cp;
5392 ivs->n_cand_uses[cid]++;
5393 if (ivs->n_cand_uses[cid] == 1)
5395 bitmap_set_bit (ivs->cands, cid);
5396 /* Do not count the pseudocandidates. */
5397 if (cp->cand->iv)
5398 ivs->n_regs++;
5399 ivs->n_cands++;
5400 ivs->cand_cost += cp->cand->cost;
5402 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5405 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5406 iv_ca_set_add_invariants (ivs, cp->depends_on);
5408 if (cp->inv_expr_id != -1)
5410 ivs->used_inv_expr[cp->inv_expr_id]++;
5411 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5412 ivs->num_used_inv_expr++;
5414 iv_ca_recount_cost (data, ivs);
5418 /* Extend set IVS by expressing USE by some of the candidates in it
5419 if possible. All important candidates will be considered
5420 if IMPORTANT_CANDIDATES is true. */
5422 static void
5423 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5424 struct iv_use *use, bool important_candidates)
5426 struct cost_pair *best_cp = NULL, *cp;
5427 bitmap_iterator bi;
5428 bitmap cands;
5429 unsigned i;
5431 gcc_assert (ivs->upto >= use->id);
5433 if (ivs->upto == use->id)
5435 ivs->upto++;
5436 ivs->bad_uses++;
5439 cands = (important_candidates ? data->important_candidates : ivs->cands);
5440 EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5442 struct iv_cand *cand = iv_cand (data, i);
5444 cp = get_use_iv_cost (data, use, cand);
5446 if (cheaper_cost_pair (cp, best_cp))
5447 best_cp = cp;
5450 iv_ca_set_cp (data, ivs, use, best_cp);
5453 /* Get cost for assignment IVS. */
5455 static comp_cost
5456 iv_ca_cost (struct iv_ca *ivs)
5458 /* This was a conditional expression but it triggered a bug in
5459 Sun C 5.5. */
5460 if (ivs->bad_uses)
5461 return infinite_cost;
5462 else
5463 return ivs->cost;
5466 /* Returns true if all dependences of CP are among invariants in IVS. */
5468 static bool
5469 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5471 unsigned i;
5472 bitmap_iterator bi;
5474 if (!cp->depends_on)
5475 return true;
5477 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5479 if (ivs->n_invariant_uses[i] == 0)
5480 return false;
5483 return true;
5486 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5487 it before NEXT_CHANGE. */
5489 static struct iv_ca_delta *
5490 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5491 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5493 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5495 change->use = use;
5496 change->old_cp = old_cp;
5497 change->new_cp = new_cp;
5498 change->next_change = next_change;
5500 return change;
5503 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5504 are rewritten. */
5506 static struct iv_ca_delta *
5507 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5509 struct iv_ca_delta *last;
5511 if (!l2)
5512 return l1;
5514 if (!l1)
5515 return l2;
5517 for (last = l1; last->next_change; last = last->next_change)
5518 continue;
5519 last->next_change = l2;
5521 return l1;
5524 /* Reverse the list of changes DELTA, forming the inverse to it. */
5526 static struct iv_ca_delta *
5527 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5529 struct iv_ca_delta *act, *next, *prev = NULL;
5530 struct cost_pair *tmp;
5532 for (act = delta; act; act = next)
5534 next = act->next_change;
5535 act->next_change = prev;
5536 prev = act;
5538 tmp = act->old_cp;
5539 act->old_cp = act->new_cp;
5540 act->new_cp = tmp;
5543 return prev;
5546 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5547 reverted instead. */
5549 static void
5550 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5551 struct iv_ca_delta *delta, bool forward)
5553 struct cost_pair *from, *to;
5554 struct iv_ca_delta *act;
5556 if (!forward)
5557 delta = iv_ca_delta_reverse (delta);
5559 for (act = delta; act; act = act->next_change)
5561 from = act->old_cp;
5562 to = act->new_cp;
5563 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5564 iv_ca_set_cp (data, ivs, act->use, to);
5567 if (!forward)
5568 iv_ca_delta_reverse (delta);
5571 /* Returns true if CAND is used in IVS. */
5573 static bool
5574 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5576 return ivs->n_cand_uses[cand->id] > 0;
5579 /* Returns number of induction variable candidates in the set IVS. */
5581 static unsigned
5582 iv_ca_n_cands (struct iv_ca *ivs)
5584 return ivs->n_cands;
5587 /* Free the list of changes DELTA. */
5589 static void
5590 iv_ca_delta_free (struct iv_ca_delta **delta)
5592 struct iv_ca_delta *act, *next;
5594 for (act = *delta; act; act = next)
5596 next = act->next_change;
5597 free (act);
5600 *delta = NULL;
5603 /* Allocates new iv candidates assignment. */
5605 static struct iv_ca *
5606 iv_ca_new (struct ivopts_data *data)
5608 struct iv_ca *nw = XNEW (struct iv_ca);
5610 nw->upto = 0;
5611 nw->bad_uses = 0;
5612 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5613 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5614 nw->cands = BITMAP_ALLOC (NULL);
5615 nw->n_cands = 0;
5616 nw->n_regs = 0;
5617 nw->cand_use_cost = no_cost;
5618 nw->cand_cost = 0;
5619 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5620 nw->cost = no_cost;
5621 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5622 nw->num_used_inv_expr = 0;
5624 return nw;
5627 /* Free memory occupied by the set IVS. */
5629 static void
5630 iv_ca_free (struct iv_ca **ivs)
5632 free ((*ivs)->cand_for_use);
5633 free ((*ivs)->n_cand_uses);
5634 BITMAP_FREE ((*ivs)->cands);
5635 free ((*ivs)->n_invariant_uses);
5636 free ((*ivs)->used_inv_expr);
5637 free (*ivs);
5638 *ivs = NULL;
5641 /* Dumps IVS to FILE. */
5643 static void
5644 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5646 const char *pref = " invariants ";
5647 unsigned i;
5648 comp_cost cost = iv_ca_cost (ivs);
5650 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5651 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5652 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5653 bitmap_print (file, ivs->cands, " candidates: ","\n");
5655 for (i = 0; i < ivs->upto; i++)
5657 struct iv_use *use = iv_use (data, i);
5658 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5659 if (cp)
5660 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5661 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5662 else
5663 fprintf (file, " use:%d --> ??\n", use->id);
5666 for (i = 1; i <= data->max_inv_id; i++)
5667 if (ivs->n_invariant_uses[i])
5669 fprintf (file, "%s%d", pref, i);
5670 pref = ", ";
5672 fprintf (file, "\n\n");
5675 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5676 new set, and store differences in DELTA. Number of induction variables
5677 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5678 the function will try to find a solution with mimimal iv candidates. */
5680 static comp_cost
5681 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5682 struct iv_cand *cand, struct iv_ca_delta **delta,
5683 unsigned *n_ivs, bool min_ncand)
5685 unsigned i;
5686 comp_cost cost;
5687 struct iv_use *use;
5688 struct cost_pair *old_cp, *new_cp;
5690 *delta = NULL;
5691 for (i = 0; i < ivs->upto; i++)
5693 use = iv_use (data, i);
5694 old_cp = iv_ca_cand_for_use (ivs, use);
5696 if (old_cp
5697 && old_cp->cand == cand)
5698 continue;
5700 new_cp = get_use_iv_cost (data, use, cand);
5701 if (!new_cp)
5702 continue;
5704 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5705 continue;
5707 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5708 continue;
5710 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5713 iv_ca_delta_commit (data, ivs, *delta, true);
5714 cost = iv_ca_cost (ivs);
5715 if (n_ivs)
5716 *n_ivs = iv_ca_n_cands (ivs);
5717 iv_ca_delta_commit (data, ivs, *delta, false);
5719 return cost;
5722 /* Try narrowing set IVS by removing CAND. Return the cost of
5723 the new set and store the differences in DELTA. START is
5724 the candidate with which we start narrowing. */
5726 static comp_cost
5727 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5728 struct iv_cand *cand, struct iv_cand *start,
5729 struct iv_ca_delta **delta)
5731 unsigned i, ci;
5732 struct iv_use *use;
5733 struct cost_pair *old_cp, *new_cp, *cp;
5734 bitmap_iterator bi;
5735 struct iv_cand *cnd;
5736 comp_cost cost, best_cost, acost;
5738 *delta = NULL;
5739 for (i = 0; i < n_iv_uses (data); i++)
5741 use = iv_use (data, i);
5743 old_cp = iv_ca_cand_for_use (ivs, use);
5744 if (old_cp->cand != cand)
5745 continue;
5747 best_cost = iv_ca_cost (ivs);
5748 /* Start narrowing with START. */
5749 new_cp = get_use_iv_cost (data, use, start);
5751 if (data->consider_all_candidates)
5753 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5755 if (ci == cand->id || (start && ci == start->id))
5756 continue;
5758 cnd = iv_cand (data, ci);
5760 cp = get_use_iv_cost (data, use, cnd);
5761 if (!cp)
5762 continue;
5764 iv_ca_set_cp (data, ivs, use, cp);
5765 acost = iv_ca_cost (ivs);
5767 if (compare_costs (acost, best_cost) < 0)
5769 best_cost = acost;
5770 new_cp = cp;
5774 else
5776 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5778 if (ci == cand->id || (start && ci == start->id))
5779 continue;
5781 cnd = iv_cand (data, ci);
5783 cp = get_use_iv_cost (data, use, cnd);
5784 if (!cp)
5785 continue;
5787 iv_ca_set_cp (data, ivs, use, cp);
5788 acost = iv_ca_cost (ivs);
5790 if (compare_costs (acost, best_cost) < 0)
5792 best_cost = acost;
5793 new_cp = cp;
5797 /* Restore to old cp for use. */
5798 iv_ca_set_cp (data, ivs, use, old_cp);
5800 if (!new_cp)
5802 iv_ca_delta_free (delta);
5803 return infinite_cost;
5806 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5809 iv_ca_delta_commit (data, ivs, *delta, true);
5810 cost = iv_ca_cost (ivs);
5811 iv_ca_delta_commit (data, ivs, *delta, false);
5813 return cost;
5816 /* Try optimizing the set of candidates IVS by removing candidates different
5817 from to EXCEPT_CAND from it. Return cost of the new set, and store
5818 differences in DELTA. */
5820 static comp_cost
5821 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5822 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5824 bitmap_iterator bi;
5825 struct iv_ca_delta *act_delta, *best_delta;
5826 unsigned i;
5827 comp_cost best_cost, acost;
5828 struct iv_cand *cand;
5830 best_delta = NULL;
5831 best_cost = iv_ca_cost (ivs);
5833 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5835 cand = iv_cand (data, i);
5837 if (cand == except_cand)
5838 continue;
5840 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
5842 if (compare_costs (acost, best_cost) < 0)
5844 best_cost = acost;
5845 iv_ca_delta_free (&best_delta);
5846 best_delta = act_delta;
5848 else
5849 iv_ca_delta_free (&act_delta);
5852 if (!best_delta)
5854 *delta = NULL;
5855 return best_cost;
5858 /* Recurse to possibly remove other unnecessary ivs. */
5859 iv_ca_delta_commit (data, ivs, best_delta, true);
5860 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5861 iv_ca_delta_commit (data, ivs, best_delta, false);
5862 *delta = iv_ca_delta_join (best_delta, *delta);
5863 return best_cost;
5866 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
5867 cheaper local cost for USE than BEST_CP. Return pointer to
5868 the corresponding cost_pair, otherwise just return BEST_CP. */
5870 static struct cost_pair*
5871 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
5872 unsigned int cand_idx, struct iv_cand *old_cand,
5873 struct cost_pair *best_cp)
5875 struct iv_cand *cand;
5876 struct cost_pair *cp;
5878 gcc_assert (old_cand != NULL && best_cp != NULL);
5879 if (cand_idx == old_cand->id)
5880 return best_cp;
5882 cand = iv_cand (data, cand_idx);
5883 cp = get_use_iv_cost (data, use, cand);
5884 if (cp != NULL && cheaper_cost_pair (cp, best_cp))
5885 return cp;
5887 return best_cp;
5890 /* Try breaking local optimal fixed-point for IVS by replacing candidates
5891 which are used by more than one iv uses. For each of those candidates,
5892 this function tries to represent iv uses under that candidate using
5893 other ones with lower local cost, then tries to prune the new set.
5894 If the new set has lower cost, It returns the new cost after recording
5895 candidate replacement in list DELTA. */
5897 static comp_cost
5898 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
5899 struct iv_ca_delta **delta)
5901 bitmap_iterator bi, bj;
5902 unsigned int i, j, k;
5903 struct iv_use *use;
5904 struct iv_cand *cand;
5905 comp_cost orig_cost, acost;
5906 struct iv_ca_delta *act_delta, *tmp_delta;
5907 struct cost_pair *old_cp, *best_cp = NULL;
5909 *delta = NULL;
5910 orig_cost = iv_ca_cost (ivs);
5912 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5914 if (ivs->n_cand_uses[i] == 1
5915 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
5916 continue;
5918 cand = iv_cand (data, i);
5920 act_delta = NULL;
5921 /* Represent uses under current candidate using other ones with
5922 lower local cost. */
5923 for (j = 0; j < ivs->upto; j++)
5925 use = iv_use (data, j);
5926 old_cp = iv_ca_cand_for_use (ivs, use);
5928 if (old_cp->cand != cand)
5929 continue;
5931 best_cp = old_cp;
5932 if (data->consider_all_candidates)
5933 for (k = 0; k < n_iv_cands (data); k++)
5934 best_cp = cheaper_cost_with_cand (data, use, k,
5935 old_cp->cand, best_cp);
5936 else
5937 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
5938 best_cp = cheaper_cost_with_cand (data, use, k,
5939 old_cp->cand, best_cp);
5941 if (best_cp == old_cp)
5942 continue;
5944 act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
5946 /* No need for further prune. */
5947 if (!act_delta)
5948 continue;
5950 /* Prune the new candidate set. */
5951 iv_ca_delta_commit (data, ivs, act_delta, true);
5952 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
5953 iv_ca_delta_commit (data, ivs, act_delta, false);
5954 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5956 if (compare_costs (acost, orig_cost) < 0)
5958 *delta = act_delta;
5959 return acost;
5961 else
5962 iv_ca_delta_free (&act_delta);
5965 return orig_cost;
5968 /* Tries to extend the sets IVS in the best possible way in order
5969 to express the USE. If ORIGINALP is true, prefer candidates from
5970 the original set of IVs, otherwise favor important candidates not
5971 based on any memory object. */
5973 static bool
5974 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5975 struct iv_use *use, bool originalp)
5977 comp_cost best_cost, act_cost;
5978 unsigned i;
5979 bitmap_iterator bi;
5980 struct iv_cand *cand;
5981 struct iv_ca_delta *best_delta = NULL, *act_delta;
5982 struct cost_pair *cp;
5984 iv_ca_add_use (data, ivs, use, false);
5985 best_cost = iv_ca_cost (ivs);
5987 cp = iv_ca_cand_for_use (ivs, use);
5988 if (!cp)
5990 ivs->upto--;
5991 ivs->bad_uses--;
5992 iv_ca_add_use (data, ivs, use, true);
5993 best_cost = iv_ca_cost (ivs);
5994 cp = iv_ca_cand_for_use (ivs, use);
5996 if (cp)
5998 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5999 iv_ca_set_no_cp (data, ivs, use);
6002 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6003 first try important candidates not based on any memory object. Only if
6004 this fails, try the specific ones. Rationale -- in loops with many
6005 variables the best choice often is to use just one generic biv. If we
6006 added here many ivs specific to the uses, the optimization algorithm later
6007 would be likely to get stuck in a local minimum, thus causing us to create
6008 too many ivs. The approach from few ivs to more seems more likely to be
6009 successful -- starting from few ivs, replacing an expensive use by a
6010 specific iv should always be a win. */
6011 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6013 cand = iv_cand (data, i);
6015 if (originalp && cand->pos !=IP_ORIGINAL)
6016 continue;
6018 if (!originalp && cand->iv->base_object != NULL_TREE)
6019 continue;
6021 if (iv_ca_cand_used_p (ivs, cand))
6022 continue;
6024 cp = get_use_iv_cost (data, use, cand);
6025 if (!cp)
6026 continue;
6028 iv_ca_set_cp (data, ivs, use, cp);
6029 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6030 true);
6031 iv_ca_set_no_cp (data, ivs, use);
6032 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6034 if (compare_costs (act_cost, best_cost) < 0)
6036 best_cost = act_cost;
6038 iv_ca_delta_free (&best_delta);
6039 best_delta = act_delta;
6041 else
6042 iv_ca_delta_free (&act_delta);
6045 if (infinite_cost_p (best_cost))
6047 for (i = 0; i < use->n_map_members; i++)
6049 cp = use->cost_map + i;
6050 cand = cp->cand;
6051 if (!cand)
6052 continue;
6054 /* Already tried this. */
6055 if (cand->important)
6057 if (originalp && cand->pos == IP_ORIGINAL)
6058 continue;
6059 if (!originalp && cand->iv->base_object == NULL_TREE)
6060 continue;
6063 if (iv_ca_cand_used_p (ivs, cand))
6064 continue;
6066 act_delta = NULL;
6067 iv_ca_set_cp (data, ivs, use, cp);
6068 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6069 iv_ca_set_no_cp (data, ivs, use);
6070 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6071 cp, act_delta);
6073 if (compare_costs (act_cost, best_cost) < 0)
6075 best_cost = act_cost;
6077 if (best_delta)
6078 iv_ca_delta_free (&best_delta);
6079 best_delta = act_delta;
6081 else
6082 iv_ca_delta_free (&act_delta);
6086 iv_ca_delta_commit (data, ivs, best_delta, true);
6087 iv_ca_delta_free (&best_delta);
6089 return !infinite_cost_p (best_cost);
6092 /* Finds an initial assignment of candidates to uses. */
6094 static struct iv_ca *
6095 get_initial_solution (struct ivopts_data *data, bool originalp)
6097 struct iv_ca *ivs = iv_ca_new (data);
6098 unsigned i;
6100 for (i = 0; i < n_iv_uses (data); i++)
6101 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6103 iv_ca_free (&ivs);
6104 return NULL;
6107 return ivs;
6110 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6111 points to a bool variable, this function tries to break local
6112 optimal fixed-point by replacing candidates in IVS if it's true. */
6114 static bool
6115 try_improve_iv_set (struct ivopts_data *data,
6116 struct iv_ca *ivs, bool *try_replace_p)
6118 unsigned i, n_ivs;
6119 comp_cost acost, best_cost = iv_ca_cost (ivs);
6120 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6121 struct iv_cand *cand;
6123 /* Try extending the set of induction variables by one. */
6124 for (i = 0; i < n_iv_cands (data); i++)
6126 cand = iv_cand (data, i);
6128 if (iv_ca_cand_used_p (ivs, cand))
6129 continue;
6131 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6132 if (!act_delta)
6133 continue;
6135 /* If we successfully added the candidate and the set is small enough,
6136 try optimizing it by removing other candidates. */
6137 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6139 iv_ca_delta_commit (data, ivs, act_delta, true);
6140 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6141 iv_ca_delta_commit (data, ivs, act_delta, false);
6142 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6145 if (compare_costs (acost, best_cost) < 0)
6147 best_cost = acost;
6148 iv_ca_delta_free (&best_delta);
6149 best_delta = act_delta;
6151 else
6152 iv_ca_delta_free (&act_delta);
6155 if (!best_delta)
6157 /* Try removing the candidates from the set instead. */
6158 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6160 if (!best_delta && *try_replace_p)
6162 *try_replace_p = false;
6163 /* So far candidate selecting algorithm tends to choose fewer IVs
6164 so that it can handle cases in which loops have many variables
6165 but the best choice is often to use only one general biv. One
6166 weakness is it can't handle opposite cases, in which different
6167 candidates should be chosen with respect to each use. To solve
6168 the problem, we replace candidates in a manner described by the
6169 comments of iv_ca_replace, thus give general algorithm a chance
6170 to break local optimal fixed-point in these cases. */
6171 best_cost = iv_ca_replace (data, ivs, &best_delta);
6174 if (!best_delta)
6175 return false;
6178 iv_ca_delta_commit (data, ivs, best_delta, true);
6179 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6180 iv_ca_delta_free (&best_delta);
6181 return true;
6184 /* Attempts to find the optimal set of induction variables. We do simple
6185 greedy heuristic -- we try to replace at most one candidate in the selected
6186 solution and remove the unused ivs while this improves the cost. */
6188 static struct iv_ca *
6189 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6191 struct iv_ca *set;
6192 bool try_replace_p = true;
6194 /* Get the initial solution. */
6195 set = get_initial_solution (data, originalp);
6196 if (!set)
6198 if (dump_file && (dump_flags & TDF_DETAILS))
6199 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6200 return NULL;
6203 if (dump_file && (dump_flags & TDF_DETAILS))
6205 fprintf (dump_file, "Initial set of candidates:\n");
6206 iv_ca_dump (data, dump_file, set);
6209 while (try_improve_iv_set (data, set, &try_replace_p))
6211 if (dump_file && (dump_flags & TDF_DETAILS))
6213 fprintf (dump_file, "Improved to:\n");
6214 iv_ca_dump (data, dump_file, set);
6218 return set;
6221 static struct iv_ca *
6222 find_optimal_iv_set (struct ivopts_data *data)
6224 unsigned i;
6225 struct iv_ca *set, *origset;
6226 struct iv_use *use;
6227 comp_cost cost, origcost;
6229 /* Determine the cost based on a strategy that starts with original IVs,
6230 and try again using a strategy that prefers candidates not based
6231 on any IVs. */
6232 origset = find_optimal_iv_set_1 (data, true);
6233 set = find_optimal_iv_set_1 (data, false);
6235 if (!origset && !set)
6236 return NULL;
6238 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6239 cost = set ? iv_ca_cost (set) : infinite_cost;
6241 if (dump_file && (dump_flags & TDF_DETAILS))
6243 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6244 origcost.cost, origcost.complexity);
6245 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6246 cost.cost, cost.complexity);
6249 /* Choose the one with the best cost. */
6250 if (compare_costs (origcost, cost) <= 0)
6252 if (set)
6253 iv_ca_free (&set);
6254 set = origset;
6256 else if (origset)
6257 iv_ca_free (&origset);
6259 for (i = 0; i < n_iv_uses (data); i++)
6261 use = iv_use (data, i);
6262 use->selected = iv_ca_cand_for_use (set, use)->cand;
6265 return set;
6268 /* Creates a new induction variable corresponding to CAND. */
6270 static void
6271 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6273 gimple_stmt_iterator incr_pos;
6274 tree base;
6275 bool after = false;
6277 if (!cand->iv)
6278 return;
6280 switch (cand->pos)
6282 case IP_NORMAL:
6283 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6284 break;
6286 case IP_END:
6287 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6288 after = true;
6289 break;
6291 case IP_AFTER_USE:
6292 after = true;
6293 /* fall through */
6294 case IP_BEFORE_USE:
6295 incr_pos = gsi_for_stmt (cand->incremented_at);
6296 break;
6298 case IP_ORIGINAL:
6299 /* Mark that the iv is preserved. */
6300 name_info (data, cand->var_before)->preserve_biv = true;
6301 name_info (data, cand->var_after)->preserve_biv = true;
6303 /* Rewrite the increment so that it uses var_before directly. */
6304 find_interesting_uses_op (data, cand->var_after)->selected = cand;
6305 return;
6308 gimple_add_tmp_var (cand->var_before);
6310 base = unshare_expr (cand->iv->base);
6312 create_iv (base, unshare_expr (cand->iv->step),
6313 cand->var_before, data->current_loop,
6314 &incr_pos, after, &cand->var_before, &cand->var_after);
6317 /* Creates new induction variables described in SET. */
6319 static void
6320 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6322 unsigned i;
6323 struct iv_cand *cand;
6324 bitmap_iterator bi;
6326 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6328 cand = iv_cand (data, i);
6329 create_new_iv (data, cand);
6332 if (dump_file && (dump_flags & TDF_DETAILS))
6334 fprintf (dump_file, "\nSelected IV set: \n");
6335 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6337 cand = iv_cand (data, i);
6338 dump_cand (dump_file, cand);
6340 fprintf (dump_file, "\n");
6344 /* Rewrites USE (definition of iv used in a nonlinear expression)
6345 using candidate CAND. */
6347 static void
6348 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6349 struct iv_use *use, struct iv_cand *cand)
6351 tree comp;
6352 tree op, tgt;
6353 gimple ass;
6354 gimple_stmt_iterator bsi;
6356 /* An important special case -- if we are asked to express value of
6357 the original iv by itself, just exit; there is no need to
6358 introduce a new computation (that might also need casting the
6359 variable to unsigned and back). */
6360 if (cand->pos == IP_ORIGINAL
6361 && cand->incremented_at == use->stmt)
6363 enum tree_code stmt_code;
6365 gcc_assert (is_gimple_assign (use->stmt));
6366 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6368 /* Check whether we may leave the computation unchanged.
6369 This is the case only if it does not rely on other
6370 computations in the loop -- otherwise, the computation
6371 we rely upon may be removed in remove_unused_ivs,
6372 thus leading to ICE. */
6373 stmt_code = gimple_assign_rhs_code (use->stmt);
6374 if (stmt_code == PLUS_EXPR
6375 || stmt_code == MINUS_EXPR
6376 || stmt_code == POINTER_PLUS_EXPR)
6378 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6379 op = gimple_assign_rhs2 (use->stmt);
6380 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6381 op = gimple_assign_rhs1 (use->stmt);
6382 else
6383 op = NULL_TREE;
6385 else
6386 op = NULL_TREE;
6388 if (op && expr_invariant_in_loop_p (data->current_loop, op))
6389 return;
6392 comp = get_computation (data->current_loop, use, cand);
6393 gcc_assert (comp != NULL_TREE);
6395 switch (gimple_code (use->stmt))
6397 case GIMPLE_PHI:
6398 tgt = PHI_RESULT (use->stmt);
6400 /* If we should keep the biv, do not replace it. */
6401 if (name_info (data, tgt)->preserve_biv)
6402 return;
6404 bsi = gsi_after_labels (gimple_bb (use->stmt));
6405 break;
6407 case GIMPLE_ASSIGN:
6408 tgt = gimple_assign_lhs (use->stmt);
6409 bsi = gsi_for_stmt (use->stmt);
6410 break;
6412 default:
6413 gcc_unreachable ();
6416 if (!valid_gimple_rhs_p (comp)
6417 || (gimple_code (use->stmt) != GIMPLE_PHI
6418 /* We can't allow re-allocating the stmt as it might be pointed
6419 to still. */
6420 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6421 >= gimple_num_ops (gsi_stmt (bsi)))))
6423 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6424 true, GSI_SAME_STMT);
6425 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6427 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6428 /* As this isn't a plain copy we have to reset alignment
6429 information. */
6430 if (SSA_NAME_PTR_INFO (comp))
6431 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6435 if (gimple_code (use->stmt) == GIMPLE_PHI)
6437 ass = gimple_build_assign (tgt, comp);
6438 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6440 bsi = gsi_for_stmt (use->stmt);
6441 remove_phi_node (&bsi, false);
6443 else
6445 gimple_assign_set_rhs_from_tree (&bsi, comp);
6446 use->stmt = gsi_stmt (bsi);
6450 /* Performs a peephole optimization to reorder the iv update statement with
6451 a mem ref to enable instruction combining in later phases. The mem ref uses
6452 the iv value before the update, so the reordering transformation requires
6453 adjustment of the offset. CAND is the selected IV_CAND.
6455 Example:
6457 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6458 iv2 = iv1 + 1;
6460 if (t < val) (1)
6461 goto L;
6462 goto Head;
6465 directly propagating t over to (1) will introduce overlapping live range
6466 thus increase register pressure. This peephole transform it into:
6469 iv2 = iv1 + 1;
6470 t = MEM_REF (base, iv2, 8, 8);
6471 if (t < val)
6472 goto L;
6473 goto Head;
6476 static void
6477 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6479 tree var_after;
6480 gimple iv_update, stmt;
6481 basic_block bb;
6482 gimple_stmt_iterator gsi, gsi_iv;
6484 if (cand->pos != IP_NORMAL)
6485 return;
6487 var_after = cand->var_after;
6488 iv_update = SSA_NAME_DEF_STMT (var_after);
6490 bb = gimple_bb (iv_update);
6491 gsi = gsi_last_nondebug_bb (bb);
6492 stmt = gsi_stmt (gsi);
6494 /* Only handle conditional statement for now. */
6495 if (gimple_code (stmt) != GIMPLE_COND)
6496 return;
6498 gsi_prev_nondebug (&gsi);
6499 stmt = gsi_stmt (gsi);
6500 if (stmt != iv_update)
6501 return;
6503 gsi_prev_nondebug (&gsi);
6504 if (gsi_end_p (gsi))
6505 return;
6507 stmt = gsi_stmt (gsi);
6508 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6509 return;
6511 if (stmt != use->stmt)
6512 return;
6514 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6515 return;
6517 if (dump_file && (dump_flags & TDF_DETAILS))
6519 fprintf (dump_file, "Reordering \n");
6520 print_gimple_stmt (dump_file, iv_update, 0, 0);
6521 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6522 fprintf (dump_file, "\n");
6525 gsi = gsi_for_stmt (use->stmt);
6526 gsi_iv = gsi_for_stmt (iv_update);
6527 gsi_move_before (&gsi_iv, &gsi);
6529 cand->pos = IP_BEFORE_USE;
6530 cand->incremented_at = use->stmt;
6533 /* Rewrites USE (address that is an iv) using candidate CAND. */
6535 static void
6536 rewrite_use_address (struct ivopts_data *data,
6537 struct iv_use *use, struct iv_cand *cand)
6539 aff_tree aff;
6540 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6541 tree base_hint = NULL_TREE;
6542 tree ref, iv;
6543 bool ok;
6545 adjust_iv_update_pos (cand, use);
6546 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6547 gcc_assert (ok);
6548 unshare_aff_combination (&aff);
6550 /* To avoid undefined overflow problems, all IV candidates use unsigned
6551 integer types. The drawback is that this makes it impossible for
6552 create_mem_ref to distinguish an IV that is based on a memory object
6553 from one that represents simply an offset.
6555 To work around this problem, we pass a hint to create_mem_ref that
6556 indicates which variable (if any) in aff is an IV based on a memory
6557 object. Note that we only consider the candidate. If this is not
6558 based on an object, the base of the reference is in some subexpression
6559 of the use -- but these will use pointer types, so they are recognized
6560 by the create_mem_ref heuristics anyway. */
6561 if (cand->iv->base_object)
6562 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6564 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6565 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6566 reference_alias_ptr_type (*use->op_p),
6567 iv, base_hint, data->speed);
6568 copy_ref_info (ref, *use->op_p);
6569 *use->op_p = ref;
6572 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6573 candidate CAND. */
6575 static void
6576 rewrite_use_compare (struct ivopts_data *data,
6577 struct iv_use *use, struct iv_cand *cand)
6579 tree comp, *var_p, op, bound;
6580 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6581 enum tree_code compare;
6582 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6583 bool ok;
6585 bound = cp->value;
6586 if (bound)
6588 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6589 tree var_type = TREE_TYPE (var);
6590 gimple_seq stmts;
6592 if (dump_file && (dump_flags & TDF_DETAILS))
6594 fprintf (dump_file, "Replacing exit test: ");
6595 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6597 compare = cp->comp;
6598 bound = unshare_expr (fold_convert (var_type, bound));
6599 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6600 if (stmts)
6601 gsi_insert_seq_on_edge_immediate (
6602 loop_preheader_edge (data->current_loop),
6603 stmts);
6605 gimple_cond_set_lhs (use->stmt, var);
6606 gimple_cond_set_code (use->stmt, compare);
6607 gimple_cond_set_rhs (use->stmt, op);
6608 return;
6611 /* The induction variable elimination failed; just express the original
6612 giv. */
6613 comp = get_computation (data->current_loop, use, cand);
6614 gcc_assert (comp != NULL_TREE);
6616 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6617 gcc_assert (ok);
6619 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6620 true, GSI_SAME_STMT);
6623 /* Rewrites USE using candidate CAND. */
6625 static void
6626 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6628 switch (use->type)
6630 case USE_NONLINEAR_EXPR:
6631 rewrite_use_nonlinear_expr (data, use, cand);
6632 break;
6634 case USE_ADDRESS:
6635 rewrite_use_address (data, use, cand);
6636 break;
6638 case USE_COMPARE:
6639 rewrite_use_compare (data, use, cand);
6640 break;
6642 default:
6643 gcc_unreachable ();
6646 update_stmt (use->stmt);
6649 /* Rewrite the uses using the selected induction variables. */
6651 static void
6652 rewrite_uses (struct ivopts_data *data)
6654 unsigned i;
6655 struct iv_cand *cand;
6656 struct iv_use *use;
6658 for (i = 0; i < n_iv_uses (data); i++)
6660 use = iv_use (data, i);
6661 cand = use->selected;
6662 gcc_assert (cand);
6664 rewrite_use (data, use, cand);
6668 /* Removes the ivs that are not used after rewriting. */
6670 static void
6671 remove_unused_ivs (struct ivopts_data *data)
6673 unsigned j;
6674 bitmap_iterator bi;
6675 bitmap toremove = BITMAP_ALLOC (NULL);
6677 /* Figure out an order in which to release SSA DEFs so that we don't
6678 release something that we'd have to propagate into a debug stmt
6679 afterwards. */
6680 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6682 struct version_info *info;
6684 info = ver_info (data, j);
6685 if (info->iv
6686 && !integer_zerop (info->iv->step)
6687 && !info->inv_id
6688 && !info->iv->have_use_for
6689 && !info->preserve_biv)
6691 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6693 tree def = info->iv->ssa_name;
6695 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6697 imm_use_iterator imm_iter;
6698 use_operand_p use_p;
6699 gimple stmt;
6700 int count = 0;
6702 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6704 if (!gimple_debug_bind_p (stmt))
6705 continue;
6707 /* We just want to determine whether to do nothing
6708 (count == 0), to substitute the computed
6709 expression into a single use of the SSA DEF by
6710 itself (count == 1), or to use a debug temp
6711 because the SSA DEF is used multiple times or as
6712 part of a larger expression (count > 1). */
6713 count++;
6714 if (gimple_debug_bind_get_value (stmt) != def)
6715 count++;
6717 if (count > 1)
6718 BREAK_FROM_IMM_USE_STMT (imm_iter);
6721 if (!count)
6722 continue;
6724 struct iv_use dummy_use;
6725 struct iv_cand *best_cand = NULL, *cand;
6726 unsigned i, best_pref = 0, cand_pref;
6728 memset (&dummy_use, 0, sizeof (dummy_use));
6729 dummy_use.iv = info->iv;
6730 for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6732 cand = iv_use (data, i)->selected;
6733 if (cand == best_cand)
6734 continue;
6735 cand_pref = operand_equal_p (cand->iv->step,
6736 info->iv->step, 0)
6737 ? 4 : 0;
6738 cand_pref
6739 += TYPE_MODE (TREE_TYPE (cand->iv->base))
6740 == TYPE_MODE (TREE_TYPE (info->iv->base))
6741 ? 2 : 0;
6742 cand_pref
6743 += TREE_CODE (cand->iv->base) == INTEGER_CST
6744 ? 1 : 0;
6745 if (best_cand == NULL || best_pref < cand_pref)
6747 best_cand = cand;
6748 best_pref = cand_pref;
6752 if (!best_cand)
6753 continue;
6755 tree comp = get_computation_at (data->current_loop,
6756 &dummy_use, best_cand,
6757 SSA_NAME_DEF_STMT (def));
6758 if (!comp)
6759 continue;
6761 if (count > 1)
6763 tree vexpr = make_node (DEBUG_EXPR_DECL);
6764 DECL_ARTIFICIAL (vexpr) = 1;
6765 TREE_TYPE (vexpr) = TREE_TYPE (comp);
6766 if (SSA_NAME_VAR (def))
6767 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6768 else
6769 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6770 gimple def_temp = gimple_build_debug_bind (vexpr, comp, NULL);
6771 gimple_stmt_iterator gsi;
6773 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6774 gsi = gsi_after_labels (gimple_bb
6775 (SSA_NAME_DEF_STMT (def)));
6776 else
6777 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6779 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6780 comp = vexpr;
6783 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6785 if (!gimple_debug_bind_p (stmt))
6786 continue;
6788 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6789 SET_USE (use_p, comp);
6791 update_stmt (stmt);
6797 release_defs_bitset (toremove);
6799 BITMAP_FREE (toremove);
6802 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6803 for pointer_map_traverse. */
6805 static bool
6806 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6807 void *data ATTRIBUTE_UNUSED)
6809 struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6811 free (niter);
6812 return true;
6815 /* Frees data allocated by the optimization of a single loop. */
6817 static void
6818 free_loop_data (struct ivopts_data *data)
6820 unsigned i, j;
6821 bitmap_iterator bi;
6822 tree obj;
6824 if (data->niters)
6826 pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6827 pointer_map_destroy (data->niters);
6828 data->niters = NULL;
6831 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6833 struct version_info *info;
6835 info = ver_info (data, i);
6836 free (info->iv);
6837 info->iv = NULL;
6838 info->has_nonlin_use = false;
6839 info->preserve_biv = false;
6840 info->inv_id = 0;
6842 bitmap_clear (data->relevant);
6843 bitmap_clear (data->important_candidates);
6845 for (i = 0; i < n_iv_uses (data); i++)
6847 struct iv_use *use = iv_use (data, i);
6849 free (use->iv);
6850 BITMAP_FREE (use->related_cands);
6851 for (j = 0; j < use->n_map_members; j++)
6852 if (use->cost_map[j].depends_on)
6853 BITMAP_FREE (use->cost_map[j].depends_on);
6854 free (use->cost_map);
6855 free (use);
6857 data->iv_uses.truncate (0);
6859 for (i = 0; i < n_iv_cands (data); i++)
6861 struct iv_cand *cand = iv_cand (data, i);
6863 free (cand->iv);
6864 if (cand->depends_on)
6865 BITMAP_FREE (cand->depends_on);
6866 free (cand);
6868 data->iv_candidates.truncate (0);
6870 if (data->version_info_size < num_ssa_names)
6872 data->version_info_size = 2 * num_ssa_names;
6873 free (data->version_info);
6874 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6877 data->max_inv_id = 0;
6879 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6880 SET_DECL_RTL (obj, NULL_RTX);
6882 decl_rtl_to_reset.truncate (0);
6884 data->inv_expr_tab.empty ();
6885 data->inv_expr_id = 0;
6888 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6889 loop tree. */
6891 static void
6892 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6894 free_loop_data (data);
6895 free (data->version_info);
6896 BITMAP_FREE (data->relevant);
6897 BITMAP_FREE (data->important_candidates);
6899 decl_rtl_to_reset.release ();
6900 data->iv_uses.release ();
6901 data->iv_candidates.release ();
6902 data->inv_expr_tab.dispose ();
6905 /* Returns true if the loop body BODY includes any function calls. */
6907 static bool
6908 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6910 gimple_stmt_iterator gsi;
6911 unsigned i;
6913 for (i = 0; i < num_nodes; i++)
6914 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6916 gimple stmt = gsi_stmt (gsi);
6917 if (is_gimple_call (stmt)
6918 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6919 return true;
6921 return false;
6924 /* Optimizes the LOOP. Returns true if anything changed. */
6926 static bool
6927 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6929 bool changed = false;
6930 struct iv_ca *iv_ca;
6931 edge exit = single_dom_exit (loop);
6932 basic_block *body;
6934 gcc_assert (!data->niters);
6935 data->current_loop = loop;
6936 data->speed = optimize_loop_for_speed_p (loop);
6938 if (dump_file && (dump_flags & TDF_DETAILS))
6940 fprintf (dump_file, "Processing loop %d\n", loop->num);
6942 if (exit)
6944 fprintf (dump_file, " single exit %d -> %d, exit condition ",
6945 exit->src->index, exit->dest->index);
6946 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6947 fprintf (dump_file, "\n");
6950 fprintf (dump_file, "\n");
6953 body = get_loop_body (loop);
6954 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6955 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6956 free (body);
6958 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6960 /* For each ssa name determines whether it behaves as an induction variable
6961 in some loop. */
6962 if (!find_induction_variables (data))
6963 goto finish;
6965 /* Finds interesting uses (item 1). */
6966 find_interesting_uses (data);
6967 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6968 goto finish;
6970 /* Finds candidates for the induction variables (item 2). */
6971 find_iv_candidates (data);
6973 /* Calculates the costs (item 3, part 1). */
6974 determine_iv_costs (data);
6975 determine_use_iv_costs (data);
6976 determine_set_costs (data);
6978 /* Find the optimal set of induction variables (item 3, part 2). */
6979 iv_ca = find_optimal_iv_set (data);
6980 if (!iv_ca)
6981 goto finish;
6982 changed = true;
6984 /* Create the new induction variables (item 4, part 1). */
6985 create_new_ivs (data, iv_ca);
6986 iv_ca_free (&iv_ca);
6988 /* Rewrite the uses (item 4, part 2). */
6989 rewrite_uses (data);
6991 /* Remove the ivs that are unused after rewriting. */
6992 remove_unused_ivs (data);
6994 /* We have changed the structure of induction variables; it might happen
6995 that definitions in the scev database refer to some of them that were
6996 eliminated. */
6997 scev_reset ();
6999 finish:
7000 free_loop_data (data);
7002 return changed;
7005 /* Main entry point. Optimizes induction variables in loops. */
7007 void
7008 tree_ssa_iv_optimize (void)
7010 struct loop *loop;
7011 struct ivopts_data data;
7013 tree_ssa_iv_optimize_init (&data);
7015 /* Optimize the loops starting with the innermost ones. */
7016 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7018 if (dump_file && (dump_flags & TDF_DETAILS))
7019 flow_loop_dump (loop, dump_file, NULL, 1);
7021 tree_ssa_iv_optimize_loop (&data, loop);
7024 tree_ssa_iv_optimize_finalize (&data);