[Patch AArch64 1/3] Enable CRC by default for armv8.1-a
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob7be4f16fbac602a293b9ed41a8d37d006c510ce9
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2016 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "backend.h"
68 #include "rtl.h"
69 #include "tree.h"
70 #include "gimple.h"
71 #include "cfghooks.h"
72 #include "tree-pass.h"
73 #include "tm_p.h"
74 #include "ssa.h"
75 #include "expmed.h"
76 #include "insn-config.h"
77 #include "emit-rtl.h"
78 #include "recog.h"
79 #include "cgraph.h"
80 #include "gimple-pretty-print.h"
81 #include "alias.h"
82 #include "fold-const.h"
83 #include "stor-layout.h"
84 #include "tree-eh.h"
85 #include "gimplify.h"
86 #include "gimple-iterator.h"
87 #include "gimplify-me.h"
88 #include "tree-cfg.h"
89 #include "tree-ssa-loop-ivopts.h"
90 #include "tree-ssa-loop-manip.h"
91 #include "tree-ssa-loop-niter.h"
92 #include "tree-ssa-loop.h"
93 #include "explow.h"
94 #include "expr.h"
95 #include "tree-dfa.h"
96 #include "tree-ssa.h"
97 #include "cfgloop.h"
98 #include "tree-scalar-evolution.h"
99 #include "params.h"
100 #include "tree-affine.h"
101 #include "tree-ssa-propagate.h"
102 #include "tree-ssa-address.h"
103 #include "builtins.h"
104 #include "tree-vectorizer.h"
106 /* FIXME: Expressions are expanded to RTL in this pass to determine the
107 cost of different addressing modes. This should be moved to a TBD
108 interface between the GIMPLE and RTL worlds. */
110 /* The infinite cost. */
111 #define INFTY 10000000
113 #define AVG_LOOP_NITER(LOOP) 5
115 /* Returns the expected number of loop iterations for LOOP.
116 The average trip count is computed from profile data if it
117 exists. */
119 static inline HOST_WIDE_INT
120 avg_loop_niter (struct loop *loop)
122 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
123 if (niter == -1)
125 niter = max_stmt_executions_int (loop);
126 if (niter == -1 || niter > AVG_LOOP_NITER (loop))
127 return AVG_LOOP_NITER (loop);
130 return niter;
133 /* Representation of the induction variable. */
134 struct iv
136 tree base; /* Initial value of the iv. */
137 tree base_object; /* A memory object to that the induction variable points. */
138 tree step; /* Step of the iv (constant only). */
139 tree ssa_name; /* The ssa name with the value. */
140 unsigned use_id; /* The identifier in the use if it is the case. */
141 bool biv_p; /* Is it a biv? */
142 bool have_use_for; /* Do we already have a use for it? */
143 bool no_overflow; /* True if the iv doesn't overflow. */
144 bool have_address_use;/* For biv, indicate if it's used in any address
145 type use. */
148 /* Per-ssa version information (induction variable descriptions, etc.). */
149 struct version_info
151 tree name; /* The ssa name. */
152 struct iv *iv; /* Induction variable description. */
153 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
154 an expression that is not an induction variable. */
155 bool preserve_biv; /* For the original biv, whether to preserve it. */
156 unsigned inv_id; /* Id of an invariant. */
159 /* Types of uses. */
160 enum use_type
162 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
163 USE_ADDRESS, /* Use in an address. */
164 USE_COMPARE /* Use is a compare. */
167 /* Cost of a computation. */
168 struct comp_cost
170 int cost; /* The runtime cost. */
171 unsigned complexity; /* The estimate of the complexity of the code for
172 the computation (in no concrete units --
173 complexity field should be larger for more
174 complex expressions and addressing modes). */
175 int scratch; /* Scratch used during cost computation. */
178 static const comp_cost no_cost = {0, 0, 0};
179 static const comp_cost infinite_cost = {INFTY, INFTY, INFTY};
181 /* The candidate - cost pair. */
182 struct cost_pair
184 struct iv_cand *cand; /* The candidate. */
185 comp_cost cost; /* The cost. */
186 bitmap depends_on; /* The list of invariants that have to be
187 preserved. */
188 tree value; /* For final value elimination, the expression for
189 the final value of the iv. For iv elimination,
190 the new bound to compare with. */
191 enum tree_code comp; /* For iv elimination, the comparison. */
192 int inv_expr_id; /* Loop invariant expression id. */
195 /* Use. */
196 struct iv_use
198 unsigned id; /* The id of the use. */
199 unsigned sub_id; /* The id of the sub use. */
200 enum use_type type; /* Type of the use. */
201 struct iv *iv; /* The induction variable it is based on. */
202 gimple *stmt; /* Statement in that it occurs. */
203 tree *op_p; /* The place where it occurs. */
204 bitmap related_cands; /* The set of "related" iv candidates, plus the common
205 important ones. */
207 unsigned n_map_members; /* Number of candidates in the cost_map list. */
208 struct cost_pair *cost_map;
209 /* The costs wrto the iv candidates. */
211 struct iv_cand *selected;
212 /* The selected candidate. */
214 struct iv_use *next; /* The next sub use. */
215 tree addr_base; /* Base address with const offset stripped. */
216 unsigned HOST_WIDE_INT addr_offset;
217 /* Const offset stripped from base address. */
220 /* The position where the iv is computed. */
221 enum iv_position
223 IP_NORMAL, /* At the end, just before the exit condition. */
224 IP_END, /* At the end of the latch block. */
225 IP_BEFORE_USE, /* Immediately before a specific use. */
226 IP_AFTER_USE, /* Immediately after a specific use. */
227 IP_ORIGINAL /* The original biv. */
230 /* The induction variable candidate. */
231 struct iv_cand
233 unsigned id; /* The number of the candidate. */
234 bool important; /* Whether this is an "important" candidate, i.e. such
235 that it should be considered by all uses. */
236 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
237 gimple *incremented_at;/* For original biv, the statement where it is
238 incremented. */
239 tree var_before; /* The variable used for it before increment. */
240 tree var_after; /* The variable used for it after increment. */
241 struct iv *iv; /* The value of the candidate. NULL for
242 "pseudocandidate" used to indicate the possibility
243 to replace the final value of an iv by direct
244 computation of the value. */
245 unsigned cost; /* Cost of the candidate. */
246 unsigned cost_step; /* Cost of the candidate's increment operation. */
247 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
248 where it is incremented. */
249 bitmap depends_on; /* The list of invariants that are used in step of the
250 biv. */
251 struct iv *orig_iv; /* The original iv if this cand is added from biv with
252 smaller type. */
255 /* Hashtable entry for common candidate derived from iv uses. */
256 struct iv_common_cand
258 tree base;
259 tree step;
260 /* IV uses from which this common candidate is derived. */
261 auto_vec<iv_use *> uses;
262 hashval_t hash;
265 /* Hashtable helpers. */
267 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
269 static inline hashval_t hash (const iv_common_cand *);
270 static inline bool equal (const iv_common_cand *, const iv_common_cand *);
273 /* Hash function for possible common candidates. */
275 inline hashval_t
276 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
278 return ccand->hash;
281 /* Hash table equality function for common candidates. */
283 inline bool
284 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
285 const iv_common_cand *ccand2)
287 return (ccand1->hash == ccand2->hash
288 && operand_equal_p (ccand1->base, ccand2->base, 0)
289 && operand_equal_p (ccand1->step, ccand2->step, 0)
290 && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
291 == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
294 /* Loop invariant expression hashtable entry. */
295 struct iv_inv_expr_ent
297 tree expr;
298 int id;
299 hashval_t hash;
302 /* Hashtable helpers. */
304 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
306 static inline hashval_t hash (const iv_inv_expr_ent *);
307 static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
310 /* Hash function for loop invariant expressions. */
312 inline hashval_t
313 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
315 return expr->hash;
318 /* Hash table equality function for expressions. */
320 inline bool
321 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
322 const iv_inv_expr_ent *expr2)
324 return expr1->hash == expr2->hash
325 && operand_equal_p (expr1->expr, expr2->expr, 0);
328 struct ivopts_data
330 /* The currently optimized loop. */
331 struct loop *current_loop;
332 source_location loop_loc;
334 /* Numbers of iterations for all exits of the current loop. */
335 hash_map<edge, tree_niter_desc *> *niters;
337 /* Number of registers used in it. */
338 unsigned regs_used;
340 /* The size of version_info array allocated. */
341 unsigned version_info_size;
343 /* The array of information for the ssa names. */
344 struct version_info *version_info;
346 /* The hashtable of loop invariant expressions created
347 by ivopt. */
348 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
350 /* Loop invariant expression id. */
351 int inv_expr_id;
353 /* The bitmap of indices in version_info whose value was changed. */
354 bitmap relevant;
356 /* The uses of induction variables. */
357 vec<iv_use *> iv_uses;
359 /* The candidates. */
360 vec<iv_cand *> iv_candidates;
362 /* A bitmap of important candidates. */
363 bitmap important_candidates;
365 /* Cache used by tree_to_aff_combination_expand. */
366 hash_map<tree, name_expansion *> *name_expansion_cache;
368 /* The hashtable of common candidates derived from iv uses. */
369 hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
371 /* The common candidates. */
372 vec<iv_common_cand *> iv_common_cands;
374 /* The maximum invariant id. */
375 unsigned max_inv_id;
377 /* Number of no_overflow BIVs which are not used in memory address. */
378 unsigned bivs_not_used_in_addr;
380 /* Obstack for iv structure. */
381 struct obstack iv_obstack;
383 /* Whether to consider just related and important candidates when replacing a
384 use. */
385 bool consider_all_candidates;
387 /* Are we optimizing for speed? */
388 bool speed;
390 /* Whether the loop body includes any function calls. */
391 bool body_includes_call;
393 /* Whether the loop body can only be exited via single exit. */
394 bool loop_single_exit_p;
397 /* An assignment of iv candidates to uses. */
399 struct iv_ca
401 /* The number of uses covered by the assignment. */
402 unsigned upto;
404 /* Number of uses that cannot be expressed by the candidates in the set. */
405 unsigned bad_uses;
407 /* Candidate assigned to a use, together with the related costs. */
408 struct cost_pair **cand_for_use;
410 /* Number of times each candidate is used. */
411 unsigned *n_cand_uses;
413 /* The candidates used. */
414 bitmap cands;
416 /* The number of candidates in the set. */
417 unsigned n_cands;
419 /* Total number of registers needed. */
420 unsigned n_regs;
422 /* Total cost of expressing uses. */
423 comp_cost cand_use_cost;
425 /* Total cost of candidates. */
426 unsigned cand_cost;
428 /* Number of times each invariant is used. */
429 unsigned *n_invariant_uses;
431 /* The array holding the number of uses of each loop
432 invariant expressions created by ivopt. */
433 unsigned *used_inv_expr;
435 /* The number of created loop invariants. */
436 unsigned num_used_inv_expr;
438 /* Total cost of the assignment. */
439 comp_cost cost;
442 /* Difference of two iv candidate assignments. */
444 struct iv_ca_delta
446 /* Changed use. */
447 struct iv_use *use;
449 /* An old assignment (for rollback purposes). */
450 struct cost_pair *old_cp;
452 /* A new assignment. */
453 struct cost_pair *new_cp;
455 /* Next change in the list. */
456 struct iv_ca_delta *next_change;
459 /* Bound on number of candidates below that all candidates are considered. */
461 #define CONSIDER_ALL_CANDIDATES_BOUND \
462 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
464 /* If there are more iv occurrences, we just give up (it is quite unlikely that
465 optimizing such a loop would help, and it would take ages). */
467 #define MAX_CONSIDERED_USES \
468 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
470 /* If there are at most this number of ivs in the set, try removing unnecessary
471 ivs from the set always. */
473 #define ALWAYS_PRUNE_CAND_SET_BOUND \
474 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
476 /* The list of trees for that the decl_rtl field must be reset is stored
477 here. */
479 static vec<tree> decl_rtl_to_reset;
481 static comp_cost force_expr_to_var_cost (tree, bool);
483 /* Number of uses recorded in DATA. */
485 static inline unsigned
486 n_iv_uses (struct ivopts_data *data)
488 return data->iv_uses.length ();
491 /* Ith use recorded in DATA. */
493 static inline struct iv_use *
494 iv_use (struct ivopts_data *data, unsigned i)
496 return data->iv_uses[i];
499 /* Number of candidates recorded in DATA. */
501 static inline unsigned
502 n_iv_cands (struct ivopts_data *data)
504 return data->iv_candidates.length ();
507 /* Ith candidate recorded in DATA. */
509 static inline struct iv_cand *
510 iv_cand (struct ivopts_data *data, unsigned i)
512 return data->iv_candidates[i];
515 /* The single loop exit if it dominates the latch, NULL otherwise. */
517 edge
518 single_dom_exit (struct loop *loop)
520 edge exit = single_exit (loop);
522 if (!exit)
523 return NULL;
525 if (!just_once_each_iteration_p (loop, exit->src))
526 return NULL;
528 return exit;
531 /* Dumps information about the induction variable IV to FILE. */
533 void
534 dump_iv (FILE *file, struct iv *iv, bool dump_name)
536 if (iv->ssa_name && dump_name)
538 fprintf (file, "ssa name ");
539 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
540 fprintf (file, "\n");
543 fprintf (file, " type ");
544 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
545 fprintf (file, "\n");
547 if (iv->step)
549 fprintf (file, " base ");
550 print_generic_expr (file, iv->base, TDF_SLIM);
551 fprintf (file, "\n");
553 fprintf (file, " step ");
554 print_generic_expr (file, iv->step, TDF_SLIM);
555 fprintf (file, "\n");
557 else
559 fprintf (file, " invariant ");
560 print_generic_expr (file, iv->base, TDF_SLIM);
561 fprintf (file, "\n");
564 if (iv->base_object)
566 fprintf (file, " base object ");
567 print_generic_expr (file, iv->base_object, TDF_SLIM);
568 fprintf (file, "\n");
571 if (iv->biv_p)
572 fprintf (file, " is a biv\n");
574 if (iv->no_overflow)
575 fprintf (file, " iv doesn't overflow wrto loop niter\n");
578 /* Dumps information about the USE to FILE. */
580 void
581 dump_use (FILE *file, struct iv_use *use)
583 fprintf (file, "use %d", use->id);
584 if (use->sub_id)
585 fprintf (file, ".%d", use->sub_id);
587 fprintf (file, "\n");
589 switch (use->type)
591 case USE_NONLINEAR_EXPR:
592 fprintf (file, " generic\n");
593 break;
595 case USE_ADDRESS:
596 fprintf (file, " address\n");
597 break;
599 case USE_COMPARE:
600 fprintf (file, " compare\n");
601 break;
603 default:
604 gcc_unreachable ();
607 fprintf (file, " in statement ");
608 print_gimple_stmt (file, use->stmt, 0, 0);
609 fprintf (file, "\n");
611 fprintf (file, " at position ");
612 if (use->op_p)
613 print_generic_expr (file, *use->op_p, TDF_SLIM);
614 fprintf (file, "\n");
616 dump_iv (file, use->iv, false);
618 if (use->related_cands)
620 fprintf (file, " related candidates ");
621 dump_bitmap (file, use->related_cands);
625 /* Dumps information about the uses to FILE. */
627 void
628 dump_uses (FILE *file, struct ivopts_data *data)
630 unsigned i;
631 struct iv_use *use;
633 for (i = 0; i < n_iv_uses (data); i++)
635 use = iv_use (data, i);
638 dump_use (file, use);
639 use = use->next;
641 while (use);
642 fprintf (file, "\n");
646 /* Dumps information about induction variable candidate CAND to FILE. */
648 void
649 dump_cand (FILE *file, struct iv_cand *cand)
651 struct iv *iv = cand->iv;
653 fprintf (file, "candidate %d%s\n",
654 cand->id, cand->important ? " (important)" : "");
656 if (cand->depends_on)
658 fprintf (file, " depends on ");
659 dump_bitmap (file, cand->depends_on);
662 if (!iv)
664 fprintf (file, " final value replacement\n");
665 return;
668 if (cand->var_before)
670 fprintf (file, " var_before ");
671 print_generic_expr (file, cand->var_before, TDF_SLIM);
672 fprintf (file, "\n");
674 if (cand->var_after)
676 fprintf (file, " var_after ");
677 print_generic_expr (file, cand->var_after, TDF_SLIM);
678 fprintf (file, "\n");
681 switch (cand->pos)
683 case IP_NORMAL:
684 fprintf (file, " incremented before exit test\n");
685 break;
687 case IP_BEFORE_USE:
688 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
689 break;
691 case IP_AFTER_USE:
692 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
693 break;
695 case IP_END:
696 fprintf (file, " incremented at end\n");
697 break;
699 case IP_ORIGINAL:
700 fprintf (file, " original biv\n");
701 break;
704 dump_iv (file, iv, false);
707 /* Returns the info for ssa version VER. */
709 static inline struct version_info *
710 ver_info (struct ivopts_data *data, unsigned ver)
712 return data->version_info + ver;
715 /* Returns the info for ssa name NAME. */
717 static inline struct version_info *
718 name_info (struct ivopts_data *data, tree name)
720 return ver_info (data, SSA_NAME_VERSION (name));
723 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
724 emitted in LOOP. */
726 static bool
727 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
729 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
731 gcc_assert (bb);
733 if (sbb == loop->latch)
734 return true;
736 if (sbb != bb)
737 return false;
739 return stmt == last_stmt (bb);
742 /* Returns true if STMT if after the place where the original induction
743 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
744 if the positions are identical. */
746 static bool
747 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
749 basic_block cand_bb = gimple_bb (cand->incremented_at);
750 basic_block stmt_bb = gimple_bb (stmt);
752 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
753 return false;
755 if (stmt_bb != cand_bb)
756 return true;
758 if (true_if_equal
759 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
760 return true;
761 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
764 /* Returns true if STMT if after the place where the induction variable
765 CAND is incremented in LOOP. */
767 static bool
768 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
770 switch (cand->pos)
772 case IP_END:
773 return false;
775 case IP_NORMAL:
776 return stmt_after_ip_normal_pos (loop, stmt);
778 case IP_ORIGINAL:
779 case IP_AFTER_USE:
780 return stmt_after_inc_pos (cand, stmt, false);
782 case IP_BEFORE_USE:
783 return stmt_after_inc_pos (cand, stmt, true);
785 default:
786 gcc_unreachable ();
790 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
792 static bool
793 abnormal_ssa_name_p (tree exp)
795 if (!exp)
796 return false;
798 if (TREE_CODE (exp) != SSA_NAME)
799 return false;
801 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
804 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
805 abnormal phi node. Callback for for_each_index. */
807 static bool
808 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
809 void *data ATTRIBUTE_UNUSED)
811 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
813 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
814 return false;
815 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
816 return false;
819 return !abnormal_ssa_name_p (*index);
822 /* Returns true if EXPR contains a ssa name that occurs in an
823 abnormal phi node. */
825 bool
826 contains_abnormal_ssa_name_p (tree expr)
828 enum tree_code code;
829 enum tree_code_class codeclass;
831 if (!expr)
832 return false;
834 code = TREE_CODE (expr);
835 codeclass = TREE_CODE_CLASS (code);
837 if (code == SSA_NAME)
838 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
840 if (code == INTEGER_CST
841 || is_gimple_min_invariant (expr))
842 return false;
844 if (code == ADDR_EXPR)
845 return !for_each_index (&TREE_OPERAND (expr, 0),
846 idx_contains_abnormal_ssa_name_p,
847 NULL);
849 if (code == COND_EXPR)
850 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
851 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
852 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
854 switch (codeclass)
856 case tcc_binary:
857 case tcc_comparison:
858 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
859 return true;
861 /* Fallthru. */
862 case tcc_unary:
863 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
864 return true;
866 break;
868 default:
869 gcc_unreachable ();
872 return false;
875 /* Returns the structure describing number of iterations determined from
876 EXIT of DATA->current_loop, or NULL if something goes wrong. */
878 static struct tree_niter_desc *
879 niter_for_exit (struct ivopts_data *data, edge exit)
881 struct tree_niter_desc *desc;
882 tree_niter_desc **slot;
884 if (!data->niters)
886 data->niters = new hash_map<edge, tree_niter_desc *>;
887 slot = NULL;
889 else
890 slot = data->niters->get (exit);
892 if (!slot)
894 /* Try to determine number of iterations. We cannot safely work with ssa
895 names that appear in phi nodes on abnormal edges, so that we do not
896 create overlapping life ranges for them (PR 27283). */
897 desc = XNEW (struct tree_niter_desc);
898 if (!number_of_iterations_exit (data->current_loop,
899 exit, desc, true)
900 || contains_abnormal_ssa_name_p (desc->niter))
902 XDELETE (desc);
903 desc = NULL;
905 data->niters->put (exit, desc);
907 else
908 desc = *slot;
910 return desc;
913 /* Returns the structure describing number of iterations determined from
914 single dominating exit of DATA->current_loop, or NULL if something
915 goes wrong. */
917 static struct tree_niter_desc *
918 niter_for_single_dom_exit (struct ivopts_data *data)
920 edge exit = single_dom_exit (data->current_loop);
922 if (!exit)
923 return NULL;
925 return niter_for_exit (data, exit);
928 /* Initializes data structures used by the iv optimization pass, stored
929 in DATA. */
931 static void
932 tree_ssa_iv_optimize_init (struct ivopts_data *data)
934 data->version_info_size = 2 * num_ssa_names;
935 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
936 data->relevant = BITMAP_ALLOC (NULL);
937 data->important_candidates = BITMAP_ALLOC (NULL);
938 data->max_inv_id = 0;
939 data->niters = NULL;
940 data->iv_uses.create (20);
941 data->iv_candidates.create (20);
942 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
943 data->inv_expr_id = 0;
944 data->name_expansion_cache = NULL;
945 data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
946 data->iv_common_cands.create (20);
947 decl_rtl_to_reset.create (20);
948 gcc_obstack_init (&data->iv_obstack);
951 /* Returns a memory object to that EXPR points. In case we are able to
952 determine that it does not point to any such object, NULL is returned. */
954 static tree
955 determine_base_object (tree expr)
957 enum tree_code code = TREE_CODE (expr);
958 tree base, obj;
960 /* If this is a pointer casted to any type, we need to determine
961 the base object for the pointer; so handle conversions before
962 throwing away non-pointer expressions. */
963 if (CONVERT_EXPR_P (expr))
964 return determine_base_object (TREE_OPERAND (expr, 0));
966 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
967 return NULL_TREE;
969 switch (code)
971 case INTEGER_CST:
972 return NULL_TREE;
974 case ADDR_EXPR:
975 obj = TREE_OPERAND (expr, 0);
976 base = get_base_address (obj);
978 if (!base)
979 return expr;
981 if (TREE_CODE (base) == MEM_REF)
982 return determine_base_object (TREE_OPERAND (base, 0));
984 return fold_convert (ptr_type_node,
985 build_fold_addr_expr (base));
987 case POINTER_PLUS_EXPR:
988 return determine_base_object (TREE_OPERAND (expr, 0));
990 case PLUS_EXPR:
991 case MINUS_EXPR:
992 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
993 gcc_unreachable ();
995 default:
996 return fold_convert (ptr_type_node, expr);
1000 /* Return true if address expression with non-DECL_P operand appears
1001 in EXPR. */
1003 static bool
1004 contain_complex_addr_expr (tree expr)
1006 bool res = false;
1008 STRIP_NOPS (expr);
1009 switch (TREE_CODE (expr))
1011 case POINTER_PLUS_EXPR:
1012 case PLUS_EXPR:
1013 case MINUS_EXPR:
1014 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1015 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1016 break;
1018 case ADDR_EXPR:
1019 return (!DECL_P (TREE_OPERAND (expr, 0)));
1021 default:
1022 return false;
1025 return res;
1028 /* Allocates an induction variable with given initial value BASE and step STEP
1029 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
1031 static struct iv *
1032 alloc_iv (struct ivopts_data *data, tree base, tree step,
1033 bool no_overflow = false)
1035 tree expr = base;
1036 struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1037 sizeof (struct iv));
1038 gcc_assert (step != NULL_TREE);
1040 /* Lower address expression in base except ones with DECL_P as operand.
1041 By doing this:
1042 1) More accurate cost can be computed for address expressions;
1043 2) Duplicate candidates won't be created for bases in different
1044 forms, like &a[0] and &a. */
1045 STRIP_NOPS (expr);
1046 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1047 || contain_complex_addr_expr (expr))
1049 aff_tree comb;
1050 tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1051 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1054 iv->base = base;
1055 iv->base_object = determine_base_object (base);
1056 iv->step = step;
1057 iv->biv_p = false;
1058 iv->have_use_for = false;
1059 iv->use_id = 0;
1060 iv->ssa_name = NULL_TREE;
1061 iv->no_overflow = no_overflow;
1062 iv->have_address_use = false;
1064 return iv;
1067 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1068 doesn't overflow. */
1070 static void
1071 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1072 bool no_overflow)
1074 struct version_info *info = name_info (data, iv);
1076 gcc_assert (!info->iv);
1078 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1079 info->iv = alloc_iv (data, base, step, no_overflow);
1080 info->iv->ssa_name = iv;
1083 /* Finds induction variable declaration for VAR. */
1085 static struct iv *
1086 get_iv (struct ivopts_data *data, tree var)
1088 basic_block bb;
1089 tree type = TREE_TYPE (var);
1091 if (!POINTER_TYPE_P (type)
1092 && !INTEGRAL_TYPE_P (type))
1093 return NULL;
1095 if (!name_info (data, var)->iv)
1097 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1099 if (!bb
1100 || !flow_bb_inside_loop_p (data->current_loop, bb))
1101 set_iv (data, var, var, build_int_cst (type, 0), true);
1104 return name_info (data, var)->iv;
1107 /* Return the first non-invariant ssa var found in EXPR. */
1109 static tree
1110 extract_single_var_from_expr (tree expr)
1112 int i, n;
1113 tree tmp;
1114 enum tree_code code;
1116 if (!expr || is_gimple_min_invariant (expr))
1117 return NULL;
1119 code = TREE_CODE (expr);
1120 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1122 n = TREE_OPERAND_LENGTH (expr);
1123 for (i = 0; i < n; i++)
1125 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1127 if (tmp)
1128 return tmp;
1131 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1134 /* Finds basic ivs. */
1136 static bool
1137 find_bivs (struct ivopts_data *data)
1139 gphi *phi;
1140 affine_iv iv;
1141 tree step, type, base, stop;
1142 bool found = false;
1143 struct loop *loop = data->current_loop;
1144 gphi_iterator psi;
1146 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1148 phi = psi.phi ();
1150 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1151 continue;
1153 if (virtual_operand_p (PHI_RESULT (phi)))
1154 continue;
1156 if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1157 continue;
1159 if (integer_zerop (iv.step))
1160 continue;
1162 step = iv.step;
1163 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1164 /* Stop expanding iv base at the first ssa var referred by iv step.
1165 Ideally we should stop at any ssa var, because that's expensive
1166 and unusual to happen, we just do it on the first one.
1168 See PR64705 for the rationale. */
1169 stop = extract_single_var_from_expr (step);
1170 base = expand_simple_operations (base, stop);
1171 if (contains_abnormal_ssa_name_p (base)
1172 || contains_abnormal_ssa_name_p (step))
1173 continue;
1175 type = TREE_TYPE (PHI_RESULT (phi));
1176 base = fold_convert (type, base);
1177 if (step)
1179 if (POINTER_TYPE_P (type))
1180 step = convert_to_ptrofftype (step);
1181 else
1182 step = fold_convert (type, step);
1185 set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1186 found = true;
1189 return found;
1192 /* Marks basic ivs. */
1194 static void
1195 mark_bivs (struct ivopts_data *data)
1197 gphi *phi;
1198 gimple *def;
1199 tree var;
1200 struct iv *iv, *incr_iv;
1201 struct loop *loop = data->current_loop;
1202 basic_block incr_bb;
1203 gphi_iterator psi;
1205 data->bivs_not_used_in_addr = 0;
1206 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1208 phi = psi.phi ();
1210 iv = get_iv (data, PHI_RESULT (phi));
1211 if (!iv)
1212 continue;
1214 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1215 def = SSA_NAME_DEF_STMT (var);
1216 /* Don't mark iv peeled from other one as biv. */
1217 if (def
1218 && gimple_code (def) == GIMPLE_PHI
1219 && gimple_bb (def) == loop->header)
1220 continue;
1222 incr_iv = get_iv (data, var);
1223 if (!incr_iv)
1224 continue;
1226 /* If the increment is in the subloop, ignore it. */
1227 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1228 if (incr_bb->loop_father != data->current_loop
1229 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1230 continue;
1232 iv->biv_p = true;
1233 incr_iv->biv_p = true;
1234 if (iv->no_overflow)
1235 data->bivs_not_used_in_addr++;
1236 if (incr_iv->no_overflow)
1237 data->bivs_not_used_in_addr++;
1241 /* Checks whether STMT defines a linear induction variable and stores its
1242 parameters to IV. */
1244 static bool
1245 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1247 tree lhs, stop;
1248 struct loop *loop = data->current_loop;
1250 iv->base = NULL_TREE;
1251 iv->step = NULL_TREE;
1253 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1254 return false;
1256 lhs = gimple_assign_lhs (stmt);
1257 if (TREE_CODE (lhs) != SSA_NAME)
1258 return false;
1260 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1261 return false;
1263 /* Stop expanding iv base at the first ssa var referred by iv step.
1264 Ideally we should stop at any ssa var, because that's expensive
1265 and unusual to happen, we just do it on the first one.
1267 See PR64705 for the rationale. */
1268 stop = extract_single_var_from_expr (iv->step);
1269 iv->base = expand_simple_operations (iv->base, stop);
1270 if (contains_abnormal_ssa_name_p (iv->base)
1271 || contains_abnormal_ssa_name_p (iv->step))
1272 return false;
1274 /* If STMT could throw, then do not consider STMT as defining a GIV.
1275 While this will suppress optimizations, we can not safely delete this
1276 GIV and associated statements, even if it appears it is not used. */
1277 if (stmt_could_throw_p (stmt))
1278 return false;
1280 return true;
1283 /* Finds general ivs in statement STMT. */
1285 static void
1286 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1288 affine_iv iv;
1290 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1291 return;
1293 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1296 /* Finds general ivs in basic block BB. */
1298 static void
1299 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1301 gimple_stmt_iterator bsi;
1303 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1304 find_givs_in_stmt (data, gsi_stmt (bsi));
1307 /* Finds general ivs. */
1309 static void
1310 find_givs (struct ivopts_data *data)
1312 struct loop *loop = data->current_loop;
1313 basic_block *body = get_loop_body_in_dom_order (loop);
1314 unsigned i;
1316 for (i = 0; i < loop->num_nodes; i++)
1317 find_givs_in_bb (data, body[i]);
1318 free (body);
1321 /* For each ssa name defined in LOOP determines whether it is an induction
1322 variable and if so, its initial value and step. */
1324 static bool
1325 find_induction_variables (struct ivopts_data *data)
1327 unsigned i;
1328 bitmap_iterator bi;
1330 if (!find_bivs (data))
1331 return false;
1333 find_givs (data);
1334 mark_bivs (data);
1336 if (dump_file && (dump_flags & TDF_DETAILS))
1338 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1340 if (niter)
1342 fprintf (dump_file, " number of iterations ");
1343 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1344 if (!integer_zerop (niter->may_be_zero))
1346 fprintf (dump_file, "; zero if ");
1347 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1349 fprintf (dump_file, "\n\n");
1352 fprintf (dump_file, "Induction variables:\n\n");
1354 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1356 if (ver_info (data, i)->iv)
1357 dump_iv (dump_file, ver_info (data, i)->iv, true);
1361 return true;
1364 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1365 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1366 is the const offset stripped from IV base. For uses of other types,
1367 ADDR_BASE and ADDR_OFFSET are zero by default. */
1369 static struct iv_use *
1370 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1371 gimple *stmt, enum use_type use_type, tree addr_base = NULL,
1372 unsigned HOST_WIDE_INT addr_offset = 0)
1374 struct iv_use *use = XCNEW (struct iv_use);
1376 use->id = n_iv_uses (data);
1377 use->sub_id = 0;
1378 use->type = use_type;
1379 use->iv = iv;
1380 use->stmt = stmt;
1381 use->op_p = use_p;
1382 use->related_cands = BITMAP_ALLOC (NULL);
1383 use->next = NULL;
1384 use->addr_base = addr_base;
1385 use->addr_offset = addr_offset;
1387 data->iv_uses.safe_push (use);
1389 return use;
1392 /* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
1393 The sub use is recorded under the one whose use id is ID_GROUP. */
1395 static struct iv_use *
1396 record_sub_use (struct ivopts_data *data, tree *use_p,
1397 struct iv *iv, gimple *stmt, enum use_type use_type,
1398 tree addr_base, unsigned HOST_WIDE_INT addr_offset,
1399 unsigned int id_group)
1401 struct iv_use *use = XCNEW (struct iv_use);
1402 struct iv_use *group = iv_use (data, id_group);
1404 use->id = group->id;
1405 use->sub_id = 0;
1406 use->type = use_type;
1407 use->iv = iv;
1408 use->stmt = stmt;
1409 use->op_p = use_p;
1410 use->related_cands = NULL;
1411 use->addr_base = addr_base;
1412 use->addr_offset = addr_offset;
1414 /* Sub use list is maintained in offset ascending order. */
1415 if (addr_offset <= group->addr_offset)
1417 use->related_cands = group->related_cands;
1418 group->related_cands = NULL;
1419 use->next = group;
1420 data->iv_uses[id_group] = use;
1422 else
1424 struct iv_use *pre;
1427 pre = group;
1428 group = group->next;
1430 while (group && addr_offset > group->addr_offset);
1431 use->next = pre->next;
1432 pre->next = use;
1435 return use;
1438 /* Checks whether OP is a loop-level invariant and if so, records it.
1439 NONLINEAR_USE is true if the invariant is used in a way we do not
1440 handle specially. */
1442 static void
1443 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1445 basic_block bb;
1446 struct version_info *info;
1448 if (TREE_CODE (op) != SSA_NAME
1449 || virtual_operand_p (op))
1450 return;
1452 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1453 if (bb
1454 && flow_bb_inside_loop_p (data->current_loop, bb))
1455 return;
1457 info = name_info (data, op);
1458 info->name = op;
1459 info->has_nonlin_use |= nonlinear_use;
1460 if (!info->inv_id)
1461 info->inv_id = ++data->max_inv_id;
1462 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1465 /* Checks whether the use OP is interesting and if so, records it. */
1467 static struct iv_use *
1468 find_interesting_uses_op (struct ivopts_data *data, tree op)
1470 struct iv *iv;
1471 gimple *stmt;
1472 struct iv_use *use;
1474 if (TREE_CODE (op) != SSA_NAME)
1475 return NULL;
1477 iv = get_iv (data, op);
1478 if (!iv)
1479 return NULL;
1481 if (iv->have_use_for)
1483 use = iv_use (data, iv->use_id);
1485 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1486 return use;
1489 if (integer_zerop (iv->step))
1491 record_invariant (data, op, true);
1492 return NULL;
1494 iv->have_use_for = true;
1496 stmt = SSA_NAME_DEF_STMT (op);
1497 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1498 || is_gimple_assign (stmt));
1500 use = record_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR);
1501 iv->use_id = use->id;
1503 return use;
1506 /* Given a condition in statement STMT, checks whether it is a compare
1507 of an induction variable and an invariant. If this is the case,
1508 CONTROL_VAR is set to location of the iv, BOUND to the location of
1509 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1510 induction variable descriptions, and true is returned. If this is not
1511 the case, CONTROL_VAR and BOUND are set to the arguments of the
1512 condition and false is returned. */
1514 static bool
1515 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1516 tree **control_var, tree **bound,
1517 struct iv **iv_var, struct iv **iv_bound)
1519 /* The objects returned when COND has constant operands. */
1520 static struct iv const_iv;
1521 static tree zero;
1522 tree *op0 = &zero, *op1 = &zero;
1523 struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1524 bool ret = false;
1526 if (gimple_code (stmt) == GIMPLE_COND)
1528 gcond *cond_stmt = as_a <gcond *> (stmt);
1529 op0 = gimple_cond_lhs_ptr (cond_stmt);
1530 op1 = gimple_cond_rhs_ptr (cond_stmt);
1532 else
1534 op0 = gimple_assign_rhs1_ptr (stmt);
1535 op1 = gimple_assign_rhs2_ptr (stmt);
1538 zero = integer_zero_node;
1539 const_iv.step = integer_zero_node;
1541 if (TREE_CODE (*op0) == SSA_NAME)
1542 iv0 = get_iv (data, *op0);
1543 if (TREE_CODE (*op1) == SSA_NAME)
1544 iv1 = get_iv (data, *op1);
1546 /* Exactly one of the compared values must be an iv, and the other one must
1547 be an invariant. */
1548 if (!iv0 || !iv1)
1549 goto end;
1551 if (integer_zerop (iv0->step))
1553 /* Control variable may be on the other side. */
1554 std::swap (op0, op1);
1555 std::swap (iv0, iv1);
1557 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1559 end:
1560 if (control_var)
1561 *control_var = op0;
1562 if (iv_var)
1563 *iv_var = iv0;
1564 if (bound)
1565 *bound = op1;
1566 if (iv_bound)
1567 *iv_bound = iv1;
1569 return ret;
1572 /* Checks whether the condition in STMT is interesting and if so,
1573 records it. */
1575 static void
1576 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1578 tree *var_p, *bound_p;
1579 struct iv *var_iv;
1581 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1583 find_interesting_uses_op (data, *var_p);
1584 find_interesting_uses_op (data, *bound_p);
1585 return;
1588 record_use (data, NULL, var_iv, stmt, USE_COMPARE);
1591 /* Returns the outermost loop EXPR is obviously invariant in
1592 relative to the loop LOOP, i.e. if all its operands are defined
1593 outside of the returned loop. Returns NULL if EXPR is not
1594 even obviously invariant in LOOP. */
1596 struct loop *
1597 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1599 basic_block def_bb;
1600 unsigned i, len;
1602 if (is_gimple_min_invariant (expr))
1603 return current_loops->tree_root;
1605 if (TREE_CODE (expr) == SSA_NAME)
1607 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1608 if (def_bb)
1610 if (flow_bb_inside_loop_p (loop, def_bb))
1611 return NULL;
1612 return superloop_at_depth (loop,
1613 loop_depth (def_bb->loop_father) + 1);
1616 return current_loops->tree_root;
1619 if (!EXPR_P (expr))
1620 return NULL;
1622 unsigned maxdepth = 0;
1623 len = TREE_OPERAND_LENGTH (expr);
1624 for (i = 0; i < len; i++)
1626 struct loop *ivloop;
1627 if (!TREE_OPERAND (expr, i))
1628 continue;
1630 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1631 if (!ivloop)
1632 return NULL;
1633 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1636 return superloop_at_depth (loop, maxdepth);
1639 /* Returns true if expression EXPR is obviously invariant in LOOP,
1640 i.e. if all its operands are defined outside of the LOOP. LOOP
1641 should not be the function body. */
1643 bool
1644 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1646 basic_block def_bb;
1647 unsigned i, len;
1649 gcc_assert (loop_depth (loop) > 0);
1651 if (is_gimple_min_invariant (expr))
1652 return true;
1654 if (TREE_CODE (expr) == SSA_NAME)
1656 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1657 if (def_bb
1658 && flow_bb_inside_loop_p (loop, def_bb))
1659 return false;
1661 return true;
1664 if (!EXPR_P (expr))
1665 return false;
1667 len = TREE_OPERAND_LENGTH (expr);
1668 for (i = 0; i < len; i++)
1669 if (TREE_OPERAND (expr, i)
1670 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1671 return false;
1673 return true;
1676 /* Given expression EXPR which computes inductive values with respect
1677 to loop recorded in DATA, this function returns biv from which EXPR
1678 is derived by tracing definition chains of ssa variables in EXPR. */
1680 static struct iv*
1681 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1683 struct iv *iv;
1684 unsigned i, n;
1685 tree e2, e1;
1686 enum tree_code code;
1687 gimple *stmt;
1689 if (expr == NULL_TREE)
1690 return NULL;
1692 if (is_gimple_min_invariant (expr))
1693 return NULL;
1695 code = TREE_CODE (expr);
1696 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1698 n = TREE_OPERAND_LENGTH (expr);
1699 for (i = 0; i < n; i++)
1701 iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1702 if (iv)
1703 return iv;
1707 /* Stop if it's not ssa name. */
1708 if (code != SSA_NAME)
1709 return NULL;
1711 iv = get_iv (data, expr);
1712 if (!iv || integer_zerop (iv->step))
1713 return NULL;
1714 else if (iv->biv_p)
1715 return iv;
1717 stmt = SSA_NAME_DEF_STMT (expr);
1718 if (gphi *phi = dyn_cast <gphi *> (stmt))
1720 ssa_op_iter iter;
1721 use_operand_p use_p;
1723 if (virtual_operand_p (gimple_phi_result (phi)))
1724 return NULL;
1726 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1728 tree use = USE_FROM_PTR (use_p);
1729 iv = find_deriving_biv_for_expr (data, use);
1730 if (iv)
1731 return iv;
1733 return NULL;
1735 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1736 return NULL;
1738 e1 = gimple_assign_rhs1 (stmt);
1739 code = gimple_assign_rhs_code (stmt);
1740 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1741 return find_deriving_biv_for_expr (data, e1);
1743 switch (code)
1745 case MULT_EXPR:
1746 case PLUS_EXPR:
1747 case MINUS_EXPR:
1748 case POINTER_PLUS_EXPR:
1749 /* Increments, decrements and multiplications by a constant
1750 are simple. */
1751 e2 = gimple_assign_rhs2 (stmt);
1752 iv = find_deriving_biv_for_expr (data, e2);
1753 if (iv)
1754 return iv;
1756 /* Fallthru. */
1757 CASE_CONVERT:
1758 /* Casts are simple. */
1759 return find_deriving_biv_for_expr (data, e1);
1761 default:
1762 break;
1765 return NULL;
1768 /* Record BIV, its predecessor and successor that they are used in
1769 address type uses. */
1771 static void
1772 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1774 unsigned i;
1775 tree type, base_1, base_2;
1776 bitmap_iterator bi;
1778 if (!biv || !biv->biv_p || integer_zerop (biv->step)
1779 || biv->have_address_use || !biv->no_overflow)
1780 return;
1782 type = TREE_TYPE (biv->base);
1783 if (!INTEGRAL_TYPE_P (type))
1784 return;
1786 biv->have_address_use = true;
1787 data->bivs_not_used_in_addr--;
1788 base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1789 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1791 struct iv *iv = ver_info (data, i)->iv;
1793 if (!iv || !iv->biv_p || integer_zerop (iv->step)
1794 || iv->have_address_use || !iv->no_overflow)
1795 continue;
1797 if (type != TREE_TYPE (iv->base)
1798 || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1799 continue;
1801 if (!operand_equal_p (biv->step, iv->step, 0))
1802 continue;
1804 base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
1805 if (operand_equal_p (base_1, iv->base, 0)
1806 || operand_equal_p (base_2, biv->base, 0))
1808 iv->have_address_use = true;
1809 data->bivs_not_used_in_addr--;
1814 /* Cumulates the steps of indices into DATA and replaces their values with the
1815 initial ones. Returns false when the value of the index cannot be determined.
1816 Callback for for_each_index. */
1818 struct ifs_ivopts_data
1820 struct ivopts_data *ivopts_data;
1821 gimple *stmt;
1822 tree step;
1825 static bool
1826 idx_find_step (tree base, tree *idx, void *data)
1828 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1829 struct iv *iv;
1830 bool use_overflow_semantics = false;
1831 tree step, iv_base, iv_step, lbound, off;
1832 struct loop *loop = dta->ivopts_data->current_loop;
1834 /* If base is a component ref, require that the offset of the reference
1835 be invariant. */
1836 if (TREE_CODE (base) == COMPONENT_REF)
1838 off = component_ref_field_offset (base);
1839 return expr_invariant_in_loop_p (loop, off);
1842 /* If base is array, first check whether we will be able to move the
1843 reference out of the loop (in order to take its address in strength
1844 reduction). In order for this to work we need both lower bound
1845 and step to be loop invariants. */
1846 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1848 /* Moreover, for a range, the size needs to be invariant as well. */
1849 if (TREE_CODE (base) == ARRAY_RANGE_REF
1850 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1851 return false;
1853 step = array_ref_element_size (base);
1854 lbound = array_ref_low_bound (base);
1856 if (!expr_invariant_in_loop_p (loop, step)
1857 || !expr_invariant_in_loop_p (loop, lbound))
1858 return false;
1861 if (TREE_CODE (*idx) != SSA_NAME)
1862 return true;
1864 iv = get_iv (dta->ivopts_data, *idx);
1865 if (!iv)
1866 return false;
1868 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1869 *&x[0], which is not folded and does not trigger the
1870 ARRAY_REF path below. */
1871 *idx = iv->base;
1873 if (integer_zerop (iv->step))
1874 return true;
1876 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1878 step = array_ref_element_size (base);
1880 /* We only handle addresses whose step is an integer constant. */
1881 if (TREE_CODE (step) != INTEGER_CST)
1882 return false;
1884 else
1885 /* The step for pointer arithmetics already is 1 byte. */
1886 step = size_one_node;
1888 iv_base = iv->base;
1889 iv_step = iv->step;
1890 if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
1891 use_overflow_semantics = true;
1893 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1894 sizetype, &iv_base, &iv_step, dta->stmt,
1895 use_overflow_semantics))
1897 /* The index might wrap. */
1898 return false;
1901 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1902 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1904 if (dta->ivopts_data->bivs_not_used_in_addr)
1906 if (!iv->biv_p)
1907 iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
1909 record_biv_for_address_use (dta->ivopts_data, iv);
1911 return true;
1914 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1915 object is passed to it in DATA. */
1917 static bool
1918 idx_record_use (tree base, tree *idx,
1919 void *vdata)
1921 struct ivopts_data *data = (struct ivopts_data *) vdata;
1922 find_interesting_uses_op (data, *idx);
1923 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1925 find_interesting_uses_op (data, array_ref_element_size (base));
1926 find_interesting_uses_op (data, array_ref_low_bound (base));
1928 return true;
1931 /* If we can prove that TOP = cst * BOT for some constant cst,
1932 store cst to MUL and return true. Otherwise return false.
1933 The returned value is always sign-extended, regardless of the
1934 signedness of TOP and BOT. */
1936 static bool
1937 constant_multiple_of (tree top, tree bot, widest_int *mul)
1939 tree mby;
1940 enum tree_code code;
1941 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1942 widest_int res, p0, p1;
1944 STRIP_NOPS (top);
1945 STRIP_NOPS (bot);
1947 if (operand_equal_p (top, bot, 0))
1949 *mul = 1;
1950 return true;
1953 code = TREE_CODE (top);
1954 switch (code)
1956 case MULT_EXPR:
1957 mby = TREE_OPERAND (top, 1);
1958 if (TREE_CODE (mby) != INTEGER_CST)
1959 return false;
1961 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1962 return false;
1964 *mul = wi::sext (res * wi::to_widest (mby), precision);
1965 return true;
1967 case PLUS_EXPR:
1968 case MINUS_EXPR:
1969 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1970 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1971 return false;
1973 if (code == MINUS_EXPR)
1974 p1 = -p1;
1975 *mul = wi::sext (p0 + p1, precision);
1976 return true;
1978 case INTEGER_CST:
1979 if (TREE_CODE (bot) != INTEGER_CST)
1980 return false;
1982 p0 = widest_int::from (top, SIGNED);
1983 p1 = widest_int::from (bot, SIGNED);
1984 if (p1 == 0)
1985 return false;
1986 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1987 return res == 0;
1989 default:
1990 return false;
1994 /* Return true if memory reference REF with step STEP may be unaligned. */
1996 static bool
1997 may_be_unaligned_p (tree ref, tree step)
1999 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2000 thus they are not misaligned. */
2001 if (TREE_CODE (ref) == TARGET_MEM_REF)
2002 return false;
2004 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2005 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2006 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2008 unsigned HOST_WIDE_INT bitpos;
2009 unsigned int ref_align;
2010 get_object_alignment_1 (ref, &ref_align, &bitpos);
2011 if (ref_align < align
2012 || (bitpos % align) != 0
2013 || (bitpos % BITS_PER_UNIT) != 0)
2014 return true;
2016 unsigned int trailing_zeros = tree_ctz (step);
2017 if (trailing_zeros < HOST_BITS_PER_INT
2018 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2019 return true;
2021 return false;
2024 /* Return true if EXPR may be non-addressable. */
2026 bool
2027 may_be_nonaddressable_p (tree expr)
2029 switch (TREE_CODE (expr))
2031 case TARGET_MEM_REF:
2032 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2033 target, thus they are always addressable. */
2034 return false;
2036 case MEM_REF:
2037 /* Likewise for MEM_REFs, modulo the storage order. */
2038 return REF_REVERSE_STORAGE_ORDER (expr);
2040 case BIT_FIELD_REF:
2041 if (REF_REVERSE_STORAGE_ORDER (expr))
2042 return true;
2043 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2045 case COMPONENT_REF:
2046 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2047 return true;
2048 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2049 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2051 case ARRAY_REF:
2052 case ARRAY_RANGE_REF:
2053 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2054 return true;
2055 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2057 case VIEW_CONVERT_EXPR:
2058 /* This kind of view-conversions may wrap non-addressable objects
2059 and make them look addressable. After some processing the
2060 non-addressability may be uncovered again, causing ADDR_EXPRs
2061 of inappropriate objects to be built. */
2062 if (is_gimple_reg (TREE_OPERAND (expr, 0))
2063 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2064 return true;
2065 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2067 CASE_CONVERT:
2068 return true;
2070 default:
2071 break;
2074 return false;
2077 static tree
2078 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
2080 /* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
2081 If there is an existing use which has same stripped iv base and step,
2082 this function records this one as a sub use to that; otherwise records
2083 it as a normal one. */
2085 static struct iv_use *
2086 record_group_use (struct ivopts_data *data, tree *use_p,
2087 struct iv *iv, gimple *stmt, enum use_type use_type)
2089 unsigned int i;
2090 struct iv_use *use;
2091 tree addr_base;
2092 unsigned HOST_WIDE_INT addr_offset;
2094 /* Only support sub use for address type uses, that is, with base
2095 object. */
2096 if (!iv->base_object)
2097 return record_use (data, use_p, iv, stmt, use_type);
2099 addr_base = strip_offset (iv->base, &addr_offset);
2100 for (i = 0; i < n_iv_uses (data); i++)
2102 use = iv_use (data, i);
2103 if (use->type != USE_ADDRESS || !use->iv->base_object)
2104 continue;
2106 /* Check if it has the same stripped base and step. */
2107 if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
2108 && operand_equal_p (iv->step, use->iv->step, 0)
2109 && operand_equal_p (addr_base, use->addr_base, 0))
2110 break;
2113 if (i == n_iv_uses (data))
2114 return record_use (data, use_p, iv, stmt,
2115 use_type, addr_base, addr_offset);
2116 else
2117 return record_sub_use (data, use_p, iv, stmt,
2118 use_type, addr_base, addr_offset, i);
2121 /* Finds addresses in *OP_P inside STMT. */
2123 static void
2124 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2125 tree *op_p)
2127 tree base = *op_p, step = size_zero_node;
2128 struct iv *civ;
2129 struct ifs_ivopts_data ifs_ivopts_data;
2131 /* Do not play with volatile memory references. A bit too conservative,
2132 perhaps, but safe. */
2133 if (gimple_has_volatile_ops (stmt))
2134 goto fail;
2136 /* Ignore bitfields for now. Not really something terribly complicated
2137 to handle. TODO. */
2138 if (TREE_CODE (base) == BIT_FIELD_REF)
2139 goto fail;
2141 base = unshare_expr (base);
2143 if (TREE_CODE (base) == TARGET_MEM_REF)
2145 tree type = build_pointer_type (TREE_TYPE (base));
2146 tree astep;
2148 if (TMR_BASE (base)
2149 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2151 civ = get_iv (data, TMR_BASE (base));
2152 if (!civ)
2153 goto fail;
2155 TMR_BASE (base) = civ->base;
2156 step = civ->step;
2158 if (TMR_INDEX2 (base)
2159 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2161 civ = get_iv (data, TMR_INDEX2 (base));
2162 if (!civ)
2163 goto fail;
2165 TMR_INDEX2 (base) = civ->base;
2166 step = civ->step;
2168 if (TMR_INDEX (base)
2169 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2171 civ = get_iv (data, TMR_INDEX (base));
2172 if (!civ)
2173 goto fail;
2175 TMR_INDEX (base) = civ->base;
2176 astep = civ->step;
2178 if (astep)
2180 if (TMR_STEP (base))
2181 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2183 step = fold_build2 (PLUS_EXPR, type, step, astep);
2187 if (integer_zerop (step))
2188 goto fail;
2189 base = tree_mem_ref_addr (type, base);
2191 else
2193 ifs_ivopts_data.ivopts_data = data;
2194 ifs_ivopts_data.stmt = stmt;
2195 ifs_ivopts_data.step = size_zero_node;
2196 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2197 || integer_zerop (ifs_ivopts_data.step))
2198 goto fail;
2199 step = ifs_ivopts_data.step;
2201 /* Check that the base expression is addressable. This needs
2202 to be done after substituting bases of IVs into it. */
2203 if (may_be_nonaddressable_p (base))
2204 goto fail;
2206 /* Moreover, on strict alignment platforms, check that it is
2207 sufficiently aligned. */
2208 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2209 goto fail;
2211 base = build_fold_addr_expr (base);
2213 /* Substituting bases of IVs into the base expression might
2214 have caused folding opportunities. */
2215 if (TREE_CODE (base) == ADDR_EXPR)
2217 tree *ref = &TREE_OPERAND (base, 0);
2218 while (handled_component_p (*ref))
2219 ref = &TREE_OPERAND (*ref, 0);
2220 if (TREE_CODE (*ref) == MEM_REF)
2222 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2223 TREE_OPERAND (*ref, 0),
2224 TREE_OPERAND (*ref, 1));
2225 if (tem)
2226 *ref = tem;
2231 civ = alloc_iv (data, base, step);
2232 record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2233 return;
2235 fail:
2236 for_each_index (op_p, idx_record_use, data);
2239 /* Finds and records invariants used in STMT. */
2241 static void
2242 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2244 ssa_op_iter iter;
2245 use_operand_p use_p;
2246 tree op;
2248 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2250 op = USE_FROM_PTR (use_p);
2251 record_invariant (data, op, false);
2255 /* Finds interesting uses of induction variables in the statement STMT. */
2257 static void
2258 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2260 struct iv *iv;
2261 tree op, *lhs, *rhs;
2262 ssa_op_iter iter;
2263 use_operand_p use_p;
2264 enum tree_code code;
2266 find_invariants_stmt (data, stmt);
2268 if (gimple_code (stmt) == GIMPLE_COND)
2270 find_interesting_uses_cond (data, stmt);
2271 return;
2274 if (is_gimple_assign (stmt))
2276 lhs = gimple_assign_lhs_ptr (stmt);
2277 rhs = gimple_assign_rhs1_ptr (stmt);
2279 if (TREE_CODE (*lhs) == SSA_NAME)
2281 /* If the statement defines an induction variable, the uses are not
2282 interesting by themselves. */
2284 iv = get_iv (data, *lhs);
2286 if (iv && !integer_zerop (iv->step))
2287 return;
2290 code = gimple_assign_rhs_code (stmt);
2291 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2292 && (REFERENCE_CLASS_P (*rhs)
2293 || is_gimple_val (*rhs)))
2295 if (REFERENCE_CLASS_P (*rhs))
2296 find_interesting_uses_address (data, stmt, rhs);
2297 else
2298 find_interesting_uses_op (data, *rhs);
2300 if (REFERENCE_CLASS_P (*lhs))
2301 find_interesting_uses_address (data, stmt, lhs);
2302 return;
2304 else if (TREE_CODE_CLASS (code) == tcc_comparison)
2306 find_interesting_uses_cond (data, stmt);
2307 return;
2310 /* TODO -- we should also handle address uses of type
2312 memory = call (whatever);
2316 call (memory). */
2319 if (gimple_code (stmt) == GIMPLE_PHI
2320 && gimple_bb (stmt) == data->current_loop->header)
2322 iv = get_iv (data, PHI_RESULT (stmt));
2324 if (iv && !integer_zerop (iv->step))
2325 return;
2328 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2330 op = USE_FROM_PTR (use_p);
2332 if (TREE_CODE (op) != SSA_NAME)
2333 continue;
2335 iv = get_iv (data, op);
2336 if (!iv)
2337 continue;
2339 find_interesting_uses_op (data, op);
2343 /* Finds interesting uses of induction variables outside of loops
2344 on loop exit edge EXIT. */
2346 static void
2347 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2349 gphi *phi;
2350 gphi_iterator psi;
2351 tree def;
2353 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2355 phi = psi.phi ();
2356 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2357 if (!virtual_operand_p (def))
2358 find_interesting_uses_op (data, def);
2362 /* Finds uses of the induction variables that are interesting. */
2364 static void
2365 find_interesting_uses (struct ivopts_data *data)
2367 basic_block bb;
2368 gimple_stmt_iterator bsi;
2369 basic_block *body = get_loop_body (data->current_loop);
2370 unsigned i;
2371 struct version_info *info;
2372 edge e;
2374 if (dump_file && (dump_flags & TDF_DETAILS))
2375 fprintf (dump_file, "Uses:\n\n");
2377 for (i = 0; i < data->current_loop->num_nodes; i++)
2379 edge_iterator ei;
2380 bb = body[i];
2382 FOR_EACH_EDGE (e, ei, bb->succs)
2383 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2384 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2385 find_interesting_uses_outside (data, e);
2387 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2388 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2389 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2390 if (!is_gimple_debug (gsi_stmt (bsi)))
2391 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2394 if (dump_file && (dump_flags & TDF_DETAILS))
2396 bitmap_iterator bi;
2398 fprintf (dump_file, "\n");
2400 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2402 info = ver_info (data, i);
2403 if (info->inv_id)
2405 fprintf (dump_file, " ");
2406 print_generic_expr (dump_file, info->name, TDF_SLIM);
2407 fprintf (dump_file, " is invariant (%d)%s\n",
2408 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2412 fprintf (dump_file, "\n");
2415 free (body);
2418 /* Compute maximum offset of [base + offset] addressing mode
2419 for memory reference represented by USE. */
2421 static HOST_WIDE_INT
2422 compute_max_addr_offset (struct iv_use *use)
2424 int width;
2425 rtx reg, addr;
2426 HOST_WIDE_INT i, off;
2427 unsigned list_index, num;
2428 addr_space_t as;
2429 machine_mode mem_mode, addr_mode;
2430 static vec<HOST_WIDE_INT> max_offset_list;
2432 as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2433 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2435 num = max_offset_list.length ();
2436 list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2437 if (list_index >= num)
2439 max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2440 for (; num < max_offset_list.length (); num++)
2441 max_offset_list[num] = -1;
2444 off = max_offset_list[list_index];
2445 if (off != -1)
2446 return off;
2448 addr_mode = targetm.addr_space.address_mode (as);
2449 reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2450 addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2452 width = GET_MODE_BITSIZE (addr_mode) - 1;
2453 if (width > (HOST_BITS_PER_WIDE_INT - 1))
2454 width = HOST_BITS_PER_WIDE_INT - 1;
2456 for (i = width; i > 0; i--)
2458 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
2459 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2460 if (memory_address_addr_space_p (mem_mode, addr, as))
2461 break;
2463 /* For some strict-alignment targets, the offset must be naturally
2464 aligned. Try an aligned offset if mem_mode is not QImode. */
2465 off = ((unsigned HOST_WIDE_INT) 1 << i);
2466 if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2468 off -= GET_MODE_SIZE (mem_mode);
2469 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2470 if (memory_address_addr_space_p (mem_mode, addr, as))
2471 break;
2474 if (i == 0)
2475 off = 0;
2477 max_offset_list[list_index] = off;
2478 return off;
2481 /* Check if all small groups should be split. Return true if and
2482 only if:
2484 1) At least one groups contain two uses with different offsets.
2485 2) No group contains more than two uses with different offsets.
2487 Return false otherwise. We want to split such groups because:
2489 1) Small groups don't have much benefit and may interfer with
2490 general candidate selection.
2491 2) Size for problem with only small groups is usually small and
2492 general algorithm can handle it well.
2494 TODO -- Above claim may not hold when auto increment is supported. */
2496 static bool
2497 split_all_small_groups (struct ivopts_data *data)
2499 bool split_p = false;
2500 unsigned int i, n, distinct;
2501 struct iv_use *pre, *use;
2503 n = n_iv_uses (data);
2504 for (i = 0; i < n; i++)
2506 use = iv_use (data, i);
2507 if (!use->next)
2508 continue;
2510 distinct = 1;
2511 gcc_assert (use->type == USE_ADDRESS);
2512 for (pre = use, use = use->next; use; pre = use, use = use->next)
2514 if (pre->addr_offset != use->addr_offset)
2515 distinct++;
2517 if (distinct > 2)
2518 return false;
2520 if (distinct == 2)
2521 split_p = true;
2524 return split_p;
2527 /* For each group of address type uses, this function further groups
2528 these uses according to the maximum offset supported by target's
2529 [base + offset] addressing mode. */
2531 static void
2532 group_address_uses (struct ivopts_data *data)
2534 HOST_WIDE_INT max_offset = -1;
2535 unsigned int i, n, sub_id;
2536 struct iv_use *pre, *use;
2537 unsigned HOST_WIDE_INT addr_offset_first;
2539 /* Reset max offset to split all small groups. */
2540 if (split_all_small_groups (data))
2541 max_offset = 0;
2543 n = n_iv_uses (data);
2544 for (i = 0; i < n; i++)
2546 use = iv_use (data, i);
2547 if (!use->next)
2548 continue;
2550 gcc_assert (use->type == USE_ADDRESS);
2551 if (max_offset != 0)
2552 max_offset = compute_max_addr_offset (use);
2554 while (use)
2556 sub_id = 0;
2557 addr_offset_first = use->addr_offset;
2558 /* Only uses with offset that can fit in offset part against
2559 the first use can be grouped together. */
2560 for (pre = use, use = use->next;
2561 use && (use->addr_offset - addr_offset_first
2562 <= (unsigned HOST_WIDE_INT) max_offset);
2563 pre = use, use = use->next)
2565 use->id = pre->id;
2566 use->sub_id = ++sub_id;
2569 /* Break the list and create new group. */
2570 if (use)
2572 pre->next = NULL;
2573 use->id = n_iv_uses (data);
2574 use->related_cands = BITMAP_ALLOC (NULL);
2575 data->iv_uses.safe_push (use);
2580 if (dump_file && (dump_flags & TDF_DETAILS))
2581 dump_uses (dump_file, data);
2584 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2585 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2586 we are at the top-level of the processed address. */
2588 static tree
2589 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2590 HOST_WIDE_INT *offset)
2592 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2593 enum tree_code code;
2594 tree type, orig_type = TREE_TYPE (expr);
2595 HOST_WIDE_INT off0, off1, st;
2596 tree orig_expr = expr;
2598 STRIP_NOPS (expr);
2600 type = TREE_TYPE (expr);
2601 code = TREE_CODE (expr);
2602 *offset = 0;
2604 switch (code)
2606 case INTEGER_CST:
2607 if (!cst_and_fits_in_hwi (expr)
2608 || integer_zerop (expr))
2609 return orig_expr;
2611 *offset = int_cst_value (expr);
2612 return build_int_cst (orig_type, 0);
2614 case POINTER_PLUS_EXPR:
2615 case PLUS_EXPR:
2616 case MINUS_EXPR:
2617 op0 = TREE_OPERAND (expr, 0);
2618 op1 = TREE_OPERAND (expr, 1);
2620 op0 = strip_offset_1 (op0, false, false, &off0);
2621 op1 = strip_offset_1 (op1, false, false, &off1);
2623 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2624 if (op0 == TREE_OPERAND (expr, 0)
2625 && op1 == TREE_OPERAND (expr, 1))
2626 return orig_expr;
2628 if (integer_zerop (op1))
2629 expr = op0;
2630 else if (integer_zerop (op0))
2632 if (code == MINUS_EXPR)
2633 expr = fold_build1 (NEGATE_EXPR, type, op1);
2634 else
2635 expr = op1;
2637 else
2638 expr = fold_build2 (code, type, op0, op1);
2640 return fold_convert (orig_type, expr);
2642 case MULT_EXPR:
2643 op1 = TREE_OPERAND (expr, 1);
2644 if (!cst_and_fits_in_hwi (op1))
2645 return orig_expr;
2647 op0 = TREE_OPERAND (expr, 0);
2648 op0 = strip_offset_1 (op0, false, false, &off0);
2649 if (op0 == TREE_OPERAND (expr, 0))
2650 return orig_expr;
2652 *offset = off0 * int_cst_value (op1);
2653 if (integer_zerop (op0))
2654 expr = op0;
2655 else
2656 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2658 return fold_convert (orig_type, expr);
2660 case ARRAY_REF:
2661 case ARRAY_RANGE_REF:
2662 if (!inside_addr)
2663 return orig_expr;
2665 step = array_ref_element_size (expr);
2666 if (!cst_and_fits_in_hwi (step))
2667 break;
2669 st = int_cst_value (step);
2670 op1 = TREE_OPERAND (expr, 1);
2671 op1 = strip_offset_1 (op1, false, false, &off1);
2672 *offset = off1 * st;
2674 if (top_compref
2675 && integer_zerop (op1))
2677 /* Strip the component reference completely. */
2678 op0 = TREE_OPERAND (expr, 0);
2679 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2680 *offset += off0;
2681 return op0;
2683 break;
2685 case COMPONENT_REF:
2687 tree field;
2689 if (!inside_addr)
2690 return orig_expr;
2692 tmp = component_ref_field_offset (expr);
2693 field = TREE_OPERAND (expr, 1);
2694 if (top_compref
2695 && cst_and_fits_in_hwi (tmp)
2696 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2698 HOST_WIDE_INT boffset, abs_off;
2700 /* Strip the component reference completely. */
2701 op0 = TREE_OPERAND (expr, 0);
2702 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2703 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2704 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2705 if (boffset < 0)
2706 abs_off = -abs_off;
2708 *offset = off0 + int_cst_value (tmp) + abs_off;
2709 return op0;
2712 break;
2714 case ADDR_EXPR:
2715 op0 = TREE_OPERAND (expr, 0);
2716 op0 = strip_offset_1 (op0, true, true, &off0);
2717 *offset += off0;
2719 if (op0 == TREE_OPERAND (expr, 0))
2720 return orig_expr;
2722 expr = build_fold_addr_expr (op0);
2723 return fold_convert (orig_type, expr);
2725 case MEM_REF:
2726 /* ??? Offset operand? */
2727 inside_addr = false;
2728 break;
2730 default:
2731 return orig_expr;
2734 /* Default handling of expressions for that we want to recurse into
2735 the first operand. */
2736 op0 = TREE_OPERAND (expr, 0);
2737 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2738 *offset += off0;
2740 if (op0 == TREE_OPERAND (expr, 0)
2741 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2742 return orig_expr;
2744 expr = copy_node (expr);
2745 TREE_OPERAND (expr, 0) = op0;
2746 if (op1)
2747 TREE_OPERAND (expr, 1) = op1;
2749 /* Inside address, we might strip the top level component references,
2750 thus changing type of the expression. Handling of ADDR_EXPR
2751 will fix that. */
2752 expr = fold_convert (orig_type, expr);
2754 return expr;
2757 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2759 static tree
2760 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2762 HOST_WIDE_INT off;
2763 tree core = strip_offset_1 (expr, false, false, &off);
2764 *offset = off;
2765 return core;
2768 /* Returns variant of TYPE that can be used as base for different uses.
2769 We return unsigned type with the same precision, which avoids problems
2770 with overflows. */
2772 static tree
2773 generic_type_for (tree type)
2775 if (POINTER_TYPE_P (type))
2776 return unsigned_type_for (type);
2778 if (TYPE_UNSIGNED (type))
2779 return type;
2781 return unsigned_type_for (type);
2784 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2785 the bitmap to that we should store it. */
2787 static struct ivopts_data *fd_ivopts_data;
2788 static tree
2789 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2791 bitmap *depends_on = (bitmap *) data;
2792 struct version_info *info;
2794 if (TREE_CODE (*expr_p) != SSA_NAME)
2795 return NULL_TREE;
2796 info = name_info (fd_ivopts_data, *expr_p);
2798 if (!info->inv_id || info->has_nonlin_use)
2799 return NULL_TREE;
2801 if (!*depends_on)
2802 *depends_on = BITMAP_ALLOC (NULL);
2803 bitmap_set_bit (*depends_on, info->inv_id);
2805 return NULL_TREE;
2808 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2809 position to POS. If USE is not NULL, the candidate is set as related to
2810 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2811 replacement of the final value of the iv by a direct computation. */
2813 static struct iv_cand *
2814 add_candidate_1 (struct ivopts_data *data,
2815 tree base, tree step, bool important, enum iv_position pos,
2816 struct iv_use *use, gimple *incremented_at,
2817 struct iv *orig_iv = NULL)
2819 unsigned i;
2820 struct iv_cand *cand = NULL;
2821 tree type, orig_type;
2823 /* -fkeep-gc-roots-live means that we have to keep a real pointer
2824 live, but the ivopts code may replace a real pointer with one
2825 pointing before or after the memory block that is then adjusted
2826 into the memory block during the loop. FIXME: It would likely be
2827 better to actually force the pointer live and still use ivopts;
2828 for example, it would be enough to write the pointer into memory
2829 and keep it there until after the loop. */
2830 if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
2831 return NULL;
2833 /* For non-original variables, make sure their values are computed in a type
2834 that does not invoke undefined behavior on overflows (since in general,
2835 we cannot prove that these induction variables are non-wrapping). */
2836 if (pos != IP_ORIGINAL)
2838 orig_type = TREE_TYPE (base);
2839 type = generic_type_for (orig_type);
2840 if (type != orig_type)
2842 base = fold_convert (type, base);
2843 step = fold_convert (type, step);
2847 for (i = 0; i < n_iv_cands (data); i++)
2849 cand = iv_cand (data, i);
2851 if (cand->pos != pos)
2852 continue;
2854 if (cand->incremented_at != incremented_at
2855 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2856 && cand->ainc_use != use))
2857 continue;
2859 if (!cand->iv)
2861 if (!base && !step)
2862 break;
2864 continue;
2867 if (!base && !step)
2868 continue;
2870 if (operand_equal_p (base, cand->iv->base, 0)
2871 && operand_equal_p (step, cand->iv->step, 0)
2872 && (TYPE_PRECISION (TREE_TYPE (base))
2873 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2874 break;
2877 if (i == n_iv_cands (data))
2879 cand = XCNEW (struct iv_cand);
2880 cand->id = i;
2882 if (!base && !step)
2883 cand->iv = NULL;
2884 else
2885 cand->iv = alloc_iv (data, base, step);
2887 cand->pos = pos;
2888 if (pos != IP_ORIGINAL && cand->iv)
2890 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2891 cand->var_after = cand->var_before;
2893 cand->important = important;
2894 cand->incremented_at = incremented_at;
2895 data->iv_candidates.safe_push (cand);
2897 if (step
2898 && TREE_CODE (step) != INTEGER_CST)
2900 fd_ivopts_data = data;
2901 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2904 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2905 cand->ainc_use = use;
2906 else
2907 cand->ainc_use = NULL;
2909 cand->orig_iv = orig_iv;
2910 if (dump_file && (dump_flags & TDF_DETAILS))
2911 dump_cand (dump_file, cand);
2914 if (important && !cand->important)
2916 cand->important = true;
2917 if (dump_file && (dump_flags & TDF_DETAILS))
2918 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2921 if (use)
2923 bitmap_set_bit (use->related_cands, i);
2924 if (dump_file && (dump_flags & TDF_DETAILS))
2925 fprintf (dump_file, "Candidate %d is related to use %d\n",
2926 cand->id, use->id);
2929 return cand;
2932 /* Returns true if incrementing the induction variable at the end of the LOOP
2933 is allowed.
2935 The purpose is to avoid splitting latch edge with a biv increment, thus
2936 creating a jump, possibly confusing other optimization passes and leaving
2937 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2938 is not available (so we do not have a better alternative), or if the latch
2939 edge is already nonempty. */
2941 static bool
2942 allow_ip_end_pos_p (struct loop *loop)
2944 if (!ip_normal_pos (loop))
2945 return true;
2947 if (!empty_block_p (ip_end_pos (loop)))
2948 return true;
2950 return false;
2953 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2954 Important field is set to IMPORTANT. */
2956 static void
2957 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2958 bool important, struct iv_use *use)
2960 basic_block use_bb = gimple_bb (use->stmt);
2961 machine_mode mem_mode;
2962 unsigned HOST_WIDE_INT cstepi;
2964 /* If we insert the increment in any position other than the standard
2965 ones, we must ensure that it is incremented once per iteration.
2966 It must not be in an inner nested loop, or one side of an if
2967 statement. */
2968 if (use_bb->loop_father != data->current_loop
2969 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2970 || stmt_could_throw_p (use->stmt)
2971 || !cst_and_fits_in_hwi (step))
2972 return;
2974 cstepi = int_cst_value (step);
2976 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2977 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2978 || USE_STORE_PRE_INCREMENT (mem_mode))
2979 && GET_MODE_SIZE (mem_mode) == cstepi)
2980 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2981 || USE_STORE_PRE_DECREMENT (mem_mode))
2982 && GET_MODE_SIZE (mem_mode) == -cstepi))
2984 enum tree_code code = MINUS_EXPR;
2985 tree new_base;
2986 tree new_step = step;
2988 if (POINTER_TYPE_P (TREE_TYPE (base)))
2990 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2991 code = POINTER_PLUS_EXPR;
2993 else
2994 new_step = fold_convert (TREE_TYPE (base), new_step);
2995 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2996 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2997 use->stmt);
2999 if (((USE_LOAD_POST_INCREMENT (mem_mode)
3000 || USE_STORE_POST_INCREMENT (mem_mode))
3001 && GET_MODE_SIZE (mem_mode) == cstepi)
3002 || ((USE_LOAD_POST_DECREMENT (mem_mode)
3003 || USE_STORE_POST_DECREMENT (mem_mode))
3004 && GET_MODE_SIZE (mem_mode) == -cstepi))
3006 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3007 use->stmt);
3011 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3012 position to POS. If USE is not NULL, the candidate is set as related to
3013 it. The candidate computation is scheduled before exit condition and at
3014 the end of loop. */
3016 static void
3017 add_candidate (struct ivopts_data *data,
3018 tree base, tree step, bool important, struct iv_use *use,
3019 struct iv *orig_iv = NULL)
3021 gcc_assert (use == NULL || use->sub_id == 0);
3023 if (ip_normal_pos (data->current_loop))
3024 add_candidate_1 (data, base, step, important,
3025 IP_NORMAL, use, NULL, orig_iv);
3026 if (ip_end_pos (data->current_loop)
3027 && allow_ip_end_pos_p (data->current_loop))
3028 add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3031 /* Adds standard iv candidates. */
3033 static void
3034 add_standard_iv_candidates (struct ivopts_data *data)
3036 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3038 /* The same for a double-integer type if it is still fast enough. */
3039 if (TYPE_PRECISION
3040 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3041 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3042 add_candidate (data, build_int_cst (long_integer_type_node, 0),
3043 build_int_cst (long_integer_type_node, 1), true, NULL);
3045 /* The same for a double-integer type if it is still fast enough. */
3046 if (TYPE_PRECISION
3047 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3048 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3049 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3050 build_int_cst (long_long_integer_type_node, 1), true, NULL);
3054 /* Adds candidates bases on the old induction variable IV. */
3056 static void
3057 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3059 gimple *phi;
3060 tree def;
3061 struct iv_cand *cand;
3063 /* Check if this biv is used in address type use. */
3064 if (iv->no_overflow && iv->have_address_use
3065 && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3066 && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3068 tree base = fold_convert (sizetype, iv->base);
3069 tree step = fold_convert (sizetype, iv->step);
3071 /* Add iv cand of same precision as index part in TARGET_MEM_REF. */
3072 add_candidate (data, base, step, true, NULL, iv);
3073 /* Add iv cand of the original type only if it has nonlinear use. */
3074 if (iv->have_use_for)
3075 add_candidate (data, iv->base, iv->step, true, NULL);
3077 else
3078 add_candidate (data, iv->base, iv->step, true, NULL);
3080 /* The same, but with initial value zero. */
3081 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3082 add_candidate (data, size_int (0), iv->step, true, NULL);
3083 else
3084 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3085 iv->step, true, NULL);
3087 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3088 if (gimple_code (phi) == GIMPLE_PHI)
3090 /* Additionally record the possibility of leaving the original iv
3091 untouched. */
3092 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3093 /* Don't add candidate if it's from another PHI node because
3094 it's an affine iv appearing in the form of PEELED_CHREC. */
3095 phi = SSA_NAME_DEF_STMT (def);
3096 if (gimple_code (phi) != GIMPLE_PHI)
3098 cand = add_candidate_1 (data,
3099 iv->base, iv->step, true, IP_ORIGINAL, NULL,
3100 SSA_NAME_DEF_STMT (def));
3101 if (cand)
3103 cand->var_before = iv->ssa_name;
3104 cand->var_after = def;
3107 else
3108 gcc_assert (gimple_bb (phi) == data->current_loop->header);
3112 /* Adds candidates based on the old induction variables. */
3114 static void
3115 add_iv_candidate_for_bivs (struct ivopts_data *data)
3117 unsigned i;
3118 struct iv *iv;
3119 bitmap_iterator bi;
3121 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3123 iv = ver_info (data, i)->iv;
3124 if (iv && iv->biv_p && !integer_zerop (iv->step))
3125 add_iv_candidate_for_biv (data, iv);
3129 /* Record common candidate {BASE, STEP} derived from USE in hashtable. */
3131 static void
3132 record_common_cand (struct ivopts_data *data, tree base,
3133 tree step, struct iv_use *use)
3135 struct iv_common_cand ent;
3136 struct iv_common_cand **slot;
3138 gcc_assert (use != NULL);
3140 ent.base = base;
3141 ent.step = step;
3142 ent.hash = iterative_hash_expr (base, 0);
3143 ent.hash = iterative_hash_expr (step, ent.hash);
3145 slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3146 if (*slot == NULL)
3148 *slot = new iv_common_cand ();
3149 (*slot)->base = base;
3150 (*slot)->step = step;
3151 (*slot)->uses.create (8);
3152 (*slot)->hash = ent.hash;
3153 data->iv_common_cands.safe_push ((*slot));
3155 (*slot)->uses.safe_push (use);
3156 return;
3159 /* Comparison function used to sort common candidates. */
3161 static int
3162 common_cand_cmp (const void *p1, const void *p2)
3164 unsigned n1, n2;
3165 const struct iv_common_cand *const *const ccand1
3166 = (const struct iv_common_cand *const *)p1;
3167 const struct iv_common_cand *const *const ccand2
3168 = (const struct iv_common_cand *const *)p2;
3170 n1 = (*ccand1)->uses.length ();
3171 n2 = (*ccand2)->uses.length ();
3172 return n2 - n1;
3175 /* Adds IV candidates based on common candidated recorded. */
3177 static void
3178 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3180 unsigned i, j;
3181 struct iv_cand *cand_1, *cand_2;
3183 data->iv_common_cands.qsort (common_cand_cmp);
3184 for (i = 0; i < data->iv_common_cands.length (); i++)
3186 struct iv_common_cand *ptr = data->iv_common_cands[i];
3188 /* Only add IV candidate if it's derived from multiple uses. */
3189 if (ptr->uses.length () <= 1)
3190 break;
3192 cand_1 = NULL;
3193 cand_2 = NULL;
3194 if (ip_normal_pos (data->current_loop))
3195 cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3196 false, IP_NORMAL, NULL, NULL);
3198 if (ip_end_pos (data->current_loop)
3199 && allow_ip_end_pos_p (data->current_loop))
3200 cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3201 false, IP_END, NULL, NULL);
3203 /* Bind deriving uses and the new candidates. */
3204 for (j = 0; j < ptr->uses.length (); j++)
3206 struct iv_use *use = ptr->uses[j];
3207 if (cand_1)
3208 bitmap_set_bit (use->related_cands, cand_1->id);
3209 if (cand_2)
3210 bitmap_set_bit (use->related_cands, cand_2->id);
3214 /* Release data since it is useless from this point. */
3215 data->iv_common_cand_tab->empty ();
3216 data->iv_common_cands.truncate (0);
3219 /* Adds candidates based on the value of USE's iv. */
3221 static void
3222 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3224 unsigned HOST_WIDE_INT offset;
3225 tree base;
3226 tree basetype;
3227 struct iv *iv = use->iv;
3229 add_candidate (data, iv->base, iv->step, false, use);
3231 /* Record common candidate for use in case it can be shared by others. */
3232 record_common_cand (data, iv->base, iv->step, use);
3234 /* Record common candidate with initial value zero. */
3235 basetype = TREE_TYPE (iv->base);
3236 if (POINTER_TYPE_P (basetype))
3237 basetype = sizetype;
3238 record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3240 /* Record common candidate with constant offset stripped in base.
3241 Like the use itself, we also add candidate directly for it. */
3242 base = strip_offset (iv->base, &offset);
3243 if (offset || base != iv->base)
3245 record_common_cand (data, base, iv->step, use);
3246 add_candidate (data, base, iv->step, false, use);
3249 /* Record common candidate with base_object removed in base. */
3250 if (iv->base_object != NULL)
3252 unsigned i;
3253 aff_tree aff_base;
3254 tree step, base_object = iv->base_object;
3256 base = iv->base;
3257 step = iv->step;
3258 STRIP_NOPS (base);
3259 STRIP_NOPS (step);
3260 STRIP_NOPS (base_object);
3261 tree_to_aff_combination (base, TREE_TYPE (base), &aff_base);
3262 for (i = 0; i < aff_base.n; i++)
3264 if (aff_base.elts[i].coef != 1)
3265 continue;
3267 if (operand_equal_p (aff_base.elts[i].val, base_object, 0))
3268 break;
3270 if (i < aff_base.n)
3272 aff_combination_remove_elt (&aff_base, i);
3273 base = aff_combination_to_tree (&aff_base);
3274 basetype = TREE_TYPE (base);
3275 if (POINTER_TYPE_P (basetype))
3276 basetype = sizetype;
3278 step = fold_convert (basetype, step);
3279 record_common_cand (data, base, step, use);
3280 /* Also record common candidate with offset stripped. */
3281 base = strip_offset (base, &offset);
3282 if (offset)
3283 record_common_cand (data, base, step, use);
3287 /* At last, add auto-incremental candidates. Make such variables
3288 important since other iv uses with same base object may be based
3289 on it. */
3290 if (use != NULL && use->type == USE_ADDRESS)
3291 add_autoinc_candidates (data, iv->base, iv->step, true, use);
3294 /* Adds candidates based on the uses. */
3296 static void
3297 add_iv_candidate_for_uses (struct ivopts_data *data)
3299 unsigned i;
3301 for (i = 0; i < n_iv_uses (data); i++)
3303 struct iv_use *use = iv_use (data, i);
3305 if (!use)
3306 continue;
3308 switch (use->type)
3310 case USE_NONLINEAR_EXPR:
3311 case USE_COMPARE:
3312 case USE_ADDRESS:
3313 /* Just add the ivs based on the value of the iv used here. */
3314 add_iv_candidate_for_use (data, use);
3315 break;
3317 default:
3318 gcc_unreachable ();
3321 add_iv_candidate_derived_from_uses (data);
3324 /* Record important candidates and add them to related_cands bitmaps. */
3326 static void
3327 record_important_candidates (struct ivopts_data *data)
3329 unsigned i;
3330 struct iv_use *use;
3332 for (i = 0; i < n_iv_cands (data); i++)
3334 struct iv_cand *cand = iv_cand (data, i);
3336 if (cand->important)
3337 bitmap_set_bit (data->important_candidates, i);
3340 data->consider_all_candidates = (n_iv_cands (data)
3341 <= CONSIDER_ALL_CANDIDATES_BOUND);
3343 /* Add important candidates to uses' related_cands bitmaps. */
3344 for (i = 0; i < n_iv_uses (data); i++)
3346 use = iv_use (data, i);
3347 bitmap_ior_into (use->related_cands, data->important_candidates);
3351 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3352 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3353 we allocate a simple list to every use. */
3355 static void
3356 alloc_use_cost_map (struct ivopts_data *data)
3358 unsigned i, size, s;
3360 for (i = 0; i < n_iv_uses (data); i++)
3362 struct iv_use *use = iv_use (data, i);
3364 if (data->consider_all_candidates)
3365 size = n_iv_cands (data);
3366 else
3368 s = bitmap_count_bits (use->related_cands);
3370 /* Round up to the power of two, so that moduling by it is fast. */
3371 size = s ? (1 << ceil_log2 (s)) : 1;
3374 use->n_map_members = size;
3375 use->cost_map = XCNEWVEC (struct cost_pair, size);
3379 /* Returns description of computation cost of expression whose runtime
3380 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
3382 static comp_cost
3383 new_cost (unsigned runtime, unsigned complexity)
3385 comp_cost cost;
3387 cost.cost = runtime;
3388 cost.complexity = complexity;
3390 return cost;
3393 /* Returns true if COST is infinite. */
3395 static bool
3396 infinite_cost_p (comp_cost cost)
3398 return cost.cost == INFTY;
3401 /* Adds costs COST1 and COST2. */
3403 static comp_cost
3404 add_costs (comp_cost cost1, comp_cost cost2)
3406 if (infinite_cost_p (cost1) || infinite_cost_p (cost2))
3407 return infinite_cost;
3409 cost1.cost += cost2.cost;
3410 cost1.complexity += cost2.complexity;
3412 return cost1;
3414 /* Subtracts costs COST1 and COST2. */
3416 static comp_cost
3417 sub_costs (comp_cost cost1, comp_cost cost2)
3419 cost1.cost -= cost2.cost;
3420 cost1.complexity -= cost2.complexity;
3422 return cost1;
3425 /* Returns a negative number if COST1 < COST2, a positive number if
3426 COST1 > COST2, and 0 if COST1 = COST2. */
3428 static int
3429 compare_costs (comp_cost cost1, comp_cost cost2)
3431 if (cost1.cost == cost2.cost)
3432 return cost1.complexity - cost2.complexity;
3434 return cost1.cost - cost2.cost;
3437 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
3438 on invariants DEPENDS_ON and that the value used in expressing it
3439 is VALUE, and in case of iv elimination the comparison operator is COMP. */
3441 static void
3442 set_use_iv_cost (struct ivopts_data *data,
3443 struct iv_use *use, struct iv_cand *cand,
3444 comp_cost cost, bitmap depends_on, tree value,
3445 enum tree_code comp, int inv_expr_id)
3447 unsigned i, s;
3449 if (infinite_cost_p (cost))
3451 BITMAP_FREE (depends_on);
3452 return;
3455 if (data->consider_all_candidates)
3457 use->cost_map[cand->id].cand = cand;
3458 use->cost_map[cand->id].cost = cost;
3459 use->cost_map[cand->id].depends_on = depends_on;
3460 use->cost_map[cand->id].value = value;
3461 use->cost_map[cand->id].comp = comp;
3462 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
3463 return;
3466 /* n_map_members is a power of two, so this computes modulo. */
3467 s = cand->id & (use->n_map_members - 1);
3468 for (i = s; i < use->n_map_members; i++)
3469 if (!use->cost_map[i].cand)
3470 goto found;
3471 for (i = 0; i < s; i++)
3472 if (!use->cost_map[i].cand)
3473 goto found;
3475 gcc_unreachable ();
3477 found:
3478 use->cost_map[i].cand = cand;
3479 use->cost_map[i].cost = cost;
3480 use->cost_map[i].depends_on = depends_on;
3481 use->cost_map[i].value = value;
3482 use->cost_map[i].comp = comp;
3483 use->cost_map[i].inv_expr_id = inv_expr_id;
3486 /* Gets cost of (USE, CANDIDATE) pair. */
3488 static struct cost_pair *
3489 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
3490 struct iv_cand *cand)
3492 unsigned i, s;
3493 struct cost_pair *ret;
3495 if (!cand)
3496 return NULL;
3498 if (data->consider_all_candidates)
3500 ret = use->cost_map + cand->id;
3501 if (!ret->cand)
3502 return NULL;
3504 return ret;
3507 /* n_map_members is a power of two, so this computes modulo. */
3508 s = cand->id & (use->n_map_members - 1);
3509 for (i = s; i < use->n_map_members; i++)
3510 if (use->cost_map[i].cand == cand)
3511 return use->cost_map + i;
3512 else if (use->cost_map[i].cand == NULL)
3513 return NULL;
3514 for (i = 0; i < s; i++)
3515 if (use->cost_map[i].cand == cand)
3516 return use->cost_map + i;
3517 else if (use->cost_map[i].cand == NULL)
3518 return NULL;
3520 return NULL;
3523 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3524 static rtx
3525 produce_memory_decl_rtl (tree obj, int *regno)
3527 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3528 machine_mode address_mode = targetm.addr_space.address_mode (as);
3529 rtx x;
3531 gcc_assert (obj);
3532 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3534 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3535 x = gen_rtx_SYMBOL_REF (address_mode, name);
3536 SET_SYMBOL_REF_DECL (x, obj);
3537 x = gen_rtx_MEM (DECL_MODE (obj), x);
3538 set_mem_addr_space (x, as);
3539 targetm.encode_section_info (obj, x, true);
3541 else
3543 x = gen_raw_REG (address_mode, (*regno)++);
3544 x = gen_rtx_MEM (DECL_MODE (obj), x);
3545 set_mem_addr_space (x, as);
3548 return x;
3551 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3552 walk_tree. DATA contains the actual fake register number. */
3554 static tree
3555 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3557 tree obj = NULL_TREE;
3558 rtx x = NULL_RTX;
3559 int *regno = (int *) data;
3561 switch (TREE_CODE (*expr_p))
3563 case ADDR_EXPR:
3564 for (expr_p = &TREE_OPERAND (*expr_p, 0);
3565 handled_component_p (*expr_p);
3566 expr_p = &TREE_OPERAND (*expr_p, 0))
3567 continue;
3568 obj = *expr_p;
3569 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3570 x = produce_memory_decl_rtl (obj, regno);
3571 break;
3573 case SSA_NAME:
3574 *ws = 0;
3575 obj = SSA_NAME_VAR (*expr_p);
3576 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3577 if (!obj)
3578 return NULL_TREE;
3579 if (!DECL_RTL_SET_P (obj))
3580 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3581 break;
3583 case VAR_DECL:
3584 case PARM_DECL:
3585 case RESULT_DECL:
3586 *ws = 0;
3587 obj = *expr_p;
3589 if (DECL_RTL_SET_P (obj))
3590 break;
3592 if (DECL_MODE (obj) == BLKmode)
3593 x = produce_memory_decl_rtl (obj, regno);
3594 else
3595 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3597 break;
3599 default:
3600 break;
3603 if (x)
3605 decl_rtl_to_reset.safe_push (obj);
3606 SET_DECL_RTL (obj, x);
3609 return NULL_TREE;
3612 /* Determines cost of the computation of EXPR. */
3614 static unsigned
3615 computation_cost (tree expr, bool speed)
3617 rtx_insn *seq;
3618 rtx rslt;
3619 tree type = TREE_TYPE (expr);
3620 unsigned cost;
3621 /* Avoid using hard regs in ways which may be unsupported. */
3622 int regno = LAST_VIRTUAL_REGISTER + 1;
3623 struct cgraph_node *node = cgraph_node::get (current_function_decl);
3624 enum node_frequency real_frequency = node->frequency;
3626 node->frequency = NODE_FREQUENCY_NORMAL;
3627 crtl->maybe_hot_insn_p = speed;
3628 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3629 start_sequence ();
3630 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3631 seq = get_insns ();
3632 end_sequence ();
3633 default_rtl_profile ();
3634 node->frequency = real_frequency;
3636 cost = seq_cost (seq, speed);
3637 if (MEM_P (rslt))
3638 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3639 TYPE_ADDR_SPACE (type), speed);
3640 else if (!REG_P (rslt))
3641 cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3643 return cost;
3646 /* Returns variable containing the value of candidate CAND at statement AT. */
3648 static tree
3649 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3651 if (stmt_after_increment (loop, cand, stmt))
3652 return cand->var_after;
3653 else
3654 return cand->var_before;
3657 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3658 same precision that is at least as wide as the precision of TYPE, stores
3659 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3660 type of A and B. */
3662 static tree
3663 determine_common_wider_type (tree *a, tree *b)
3665 tree wider_type = NULL;
3666 tree suba, subb;
3667 tree atype = TREE_TYPE (*a);
3669 if (CONVERT_EXPR_P (*a))
3671 suba = TREE_OPERAND (*a, 0);
3672 wider_type = TREE_TYPE (suba);
3673 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3674 return atype;
3676 else
3677 return atype;
3679 if (CONVERT_EXPR_P (*b))
3681 subb = TREE_OPERAND (*b, 0);
3682 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3683 return atype;
3685 else
3686 return atype;
3688 *a = suba;
3689 *b = subb;
3690 return wider_type;
3693 /* Determines the expression by that USE is expressed from induction variable
3694 CAND at statement AT in LOOP. The expression is stored in a decomposed
3695 form into AFF. Returns false if USE cannot be expressed using CAND. */
3697 static bool
3698 get_computation_aff (struct loop *loop,
3699 struct iv_use *use, struct iv_cand *cand, gimple *at,
3700 struct aff_tree *aff)
3702 tree ubase = use->iv->base;
3703 tree ustep = use->iv->step;
3704 tree cbase = cand->iv->base;
3705 tree cstep = cand->iv->step, cstep_common;
3706 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3707 tree common_type, var;
3708 tree uutype;
3709 aff_tree cbase_aff, var_aff;
3710 widest_int rat;
3712 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3714 /* We do not have a precision to express the values of use. */
3715 return false;
3718 var = var_at_stmt (loop, cand, at);
3719 uutype = unsigned_type_for (utype);
3721 /* If the conversion is not noop, perform it. */
3722 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3724 if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3725 && (CONVERT_EXPR_P (cstep) || TREE_CODE (cstep) == INTEGER_CST))
3727 tree inner_base, inner_step, inner_type;
3728 inner_base = TREE_OPERAND (cbase, 0);
3729 if (CONVERT_EXPR_P (cstep))
3730 inner_step = TREE_OPERAND (cstep, 0);
3731 else
3732 inner_step = cstep;
3734 inner_type = TREE_TYPE (inner_base);
3735 /* If candidate is added from a biv whose type is smaller than
3736 ctype, we know both candidate and the biv won't overflow.
3737 In this case, it's safe to skip the convertion in candidate.
3738 As an example, (unsigned short)((unsigned long)A) equals to
3739 (unsigned short)A, if A has a type no larger than short. */
3740 if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3742 cbase = inner_base;
3743 cstep = inner_step;
3746 cstep = fold_convert (uutype, cstep);
3747 cbase = fold_convert (uutype, cbase);
3748 var = fold_convert (uutype, var);
3751 /* Ratio is 1 when computing the value of biv cand by itself.
3752 We can't rely on constant_multiple_of in this case because the
3753 use is created after the original biv is selected. The call
3754 could fail because of inconsistent fold behavior. See PR68021
3755 for more information. */
3756 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3758 gcc_assert (is_gimple_assign (use->stmt));
3759 gcc_assert (use->iv->ssa_name == cand->var_after);
3760 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3761 rat = 1;
3763 else if (!constant_multiple_of (ustep, cstep, &rat))
3764 return false;
3766 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3767 type, we achieve better folding by computing their difference in this
3768 wider type, and cast the result to UUTYPE. We do not need to worry about
3769 overflows, as all the arithmetics will in the end be performed in UUTYPE
3770 anyway. */
3771 common_type = determine_common_wider_type (&ubase, &cbase);
3773 /* use = ubase - ratio * cbase + ratio * var. */
3774 tree_to_aff_combination (ubase, common_type, aff);
3775 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3776 tree_to_aff_combination (var, uutype, &var_aff);
3778 /* We need to shift the value if we are after the increment. */
3779 if (stmt_after_increment (loop, cand, at))
3781 aff_tree cstep_aff;
3783 if (common_type != uutype)
3784 cstep_common = fold_convert (common_type, cstep);
3785 else
3786 cstep_common = cstep;
3788 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3789 aff_combination_add (&cbase_aff, &cstep_aff);
3792 aff_combination_scale (&cbase_aff, -rat);
3793 aff_combination_add (aff, &cbase_aff);
3794 if (common_type != uutype)
3795 aff_combination_convert (aff, uutype);
3797 aff_combination_scale (&var_aff, rat);
3798 aff_combination_add (aff, &var_aff);
3800 return true;
3803 /* Return the type of USE. */
3805 static tree
3806 get_use_type (struct iv_use *use)
3808 tree base_type = TREE_TYPE (use->iv->base);
3809 tree type;
3811 if (use->type == USE_ADDRESS)
3813 /* The base_type may be a void pointer. Create a pointer type based on
3814 the mem_ref instead. */
3815 type = build_pointer_type (TREE_TYPE (*use->op_p));
3816 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3817 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3819 else
3820 type = base_type;
3822 return type;
3825 /* Determines the expression by that USE is expressed from induction variable
3826 CAND at statement AT in LOOP. The computation is unshared. */
3828 static tree
3829 get_computation_at (struct loop *loop,
3830 struct iv_use *use, struct iv_cand *cand, gimple *at)
3832 aff_tree aff;
3833 tree type = get_use_type (use);
3835 if (!get_computation_aff (loop, use, cand, at, &aff))
3836 return NULL_TREE;
3837 unshare_aff_combination (&aff);
3838 return fold_convert (type, aff_combination_to_tree (&aff));
3841 /* Determines the expression by that USE is expressed from induction variable
3842 CAND in LOOP. The computation is unshared. */
3844 static tree
3845 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3847 return get_computation_at (loop, use, cand, use->stmt);
3850 /* Adjust the cost COST for being in loop setup rather than loop body.
3851 If we're optimizing for space, the loop setup overhead is constant;
3852 if we're optimizing for speed, amortize it over the per-iteration cost. */
3853 static unsigned
3854 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3856 if (cost == INFTY)
3857 return cost;
3858 else if (optimize_loop_for_speed_p (data->current_loop))
3859 return cost / avg_loop_niter (data->current_loop);
3860 else
3861 return cost;
3864 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3865 validity for a memory reference accessing memory of mode MODE in
3866 address space AS. */
3869 bool
3870 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3871 addr_space_t as)
3873 #define MAX_RATIO 128
3874 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3875 static vec<sbitmap> valid_mult_list;
3876 sbitmap valid_mult;
3878 if (data_index >= valid_mult_list.length ())
3879 valid_mult_list.safe_grow_cleared (data_index + 1);
3881 valid_mult = valid_mult_list[data_index];
3882 if (!valid_mult)
3884 machine_mode address_mode = targetm.addr_space.address_mode (as);
3885 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3886 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3887 rtx addr, scaled;
3888 HOST_WIDE_INT i;
3890 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3891 bitmap_clear (valid_mult);
3892 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3893 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3894 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3896 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3897 if (memory_address_addr_space_p (mode, addr, as)
3898 || memory_address_addr_space_p (mode, scaled, as))
3899 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3902 if (dump_file && (dump_flags & TDF_DETAILS))
3904 fprintf (dump_file, " allowed multipliers:");
3905 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3906 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3907 fprintf (dump_file, " %d", (int) i);
3908 fprintf (dump_file, "\n");
3909 fprintf (dump_file, "\n");
3912 valid_mult_list[data_index] = valid_mult;
3915 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3916 return false;
3918 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3921 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3922 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3923 variable is omitted. Compute the cost for a memory reference that accesses
3924 a memory location of mode MEM_MODE in address space AS.
3926 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3927 size of MEM_MODE / RATIO) is available. To make this determination, we
3928 look at the size of the increment to be made, which is given in CSTEP.
3929 CSTEP may be zero if the step is unknown.
3930 STMT_AFTER_INC is true iff the statement we're looking at is after the
3931 increment of the original biv.
3933 TODO -- there must be some better way. This all is quite crude. */
3935 enum ainc_type
3937 AINC_PRE_INC, /* Pre increment. */
3938 AINC_PRE_DEC, /* Pre decrement. */
3939 AINC_POST_INC, /* Post increment. */
3940 AINC_POST_DEC, /* Post decrement. */
3941 AINC_NONE /* Also the number of auto increment types. */
3944 struct address_cost_data
3946 HOST_WIDE_INT min_offset, max_offset;
3947 unsigned costs[2][2][2][2];
3948 unsigned ainc_costs[AINC_NONE];
3952 static comp_cost
3953 get_address_cost (bool symbol_present, bool var_present,
3954 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3955 HOST_WIDE_INT cstep, machine_mode mem_mode,
3956 addr_space_t as, bool speed,
3957 bool stmt_after_inc, bool *may_autoinc)
3959 machine_mode address_mode = targetm.addr_space.address_mode (as);
3960 static vec<address_cost_data *> address_cost_data_list;
3961 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3962 address_cost_data *data;
3963 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3964 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3965 unsigned cost, acost, complexity;
3966 enum ainc_type autoinc_type;
3967 bool offset_p, ratio_p, autoinc;
3968 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3969 unsigned HOST_WIDE_INT mask;
3970 unsigned bits;
3972 if (data_index >= address_cost_data_list.length ())
3973 address_cost_data_list.safe_grow_cleared (data_index + 1);
3975 data = address_cost_data_list[data_index];
3976 if (!data)
3978 HOST_WIDE_INT i;
3979 HOST_WIDE_INT rat, off = 0;
3980 int old_cse_not_expected, width;
3981 unsigned sym_p, var_p, off_p, rat_p, add_c;
3982 rtx_insn *seq;
3983 rtx addr, base;
3984 rtx reg0, reg1;
3986 data = (address_cost_data *) xcalloc (1, sizeof (*data));
3988 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3990 width = GET_MODE_BITSIZE (address_mode) - 1;
3991 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3992 width = HOST_BITS_PER_WIDE_INT - 1;
3993 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3995 for (i = width; i >= 0; i--)
3997 off = -((unsigned HOST_WIDE_INT) 1 << i);
3998 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3999 if (memory_address_addr_space_p (mem_mode, addr, as))
4000 break;
4002 data->min_offset = (i == -1? 0 : off);
4004 for (i = width; i >= 0; i--)
4006 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
4007 XEXP (addr, 1) = gen_int_mode (off, address_mode);
4008 if (memory_address_addr_space_p (mem_mode, addr, as))
4009 break;
4010 /* For some strict-alignment targets, the offset must be naturally
4011 aligned. Try an aligned offset if mem_mode is not QImode. */
4012 off = mem_mode != QImode
4013 ? ((unsigned HOST_WIDE_INT) 1 << i)
4014 - GET_MODE_SIZE (mem_mode)
4015 : 0;
4016 if (off > 0)
4018 XEXP (addr, 1) = gen_int_mode (off, address_mode);
4019 if (memory_address_addr_space_p (mem_mode, addr, as))
4020 break;
4023 if (i == -1)
4024 off = 0;
4025 data->max_offset = off;
4027 if (dump_file && (dump_flags & TDF_DETAILS))
4029 fprintf (dump_file, "get_address_cost:\n");
4030 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
4031 GET_MODE_NAME (mem_mode),
4032 data->min_offset);
4033 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
4034 GET_MODE_NAME (mem_mode),
4035 data->max_offset);
4038 rat = 1;
4039 for (i = 2; i <= MAX_RATIO; i++)
4040 if (multiplier_allowed_in_address_p (i, mem_mode, as))
4042 rat = i;
4043 break;
4046 /* Compute the cost of various addressing modes. */
4047 acost = 0;
4048 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
4049 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
4051 if (USE_LOAD_PRE_DECREMENT (mem_mode)
4052 || USE_STORE_PRE_DECREMENT (mem_mode))
4054 addr = gen_rtx_PRE_DEC (address_mode, reg0);
4055 has_predec[mem_mode]
4056 = memory_address_addr_space_p (mem_mode, addr, as);
4058 if (has_predec[mem_mode])
4059 data->ainc_costs[AINC_PRE_DEC]
4060 = address_cost (addr, mem_mode, as, speed);
4062 if (USE_LOAD_POST_DECREMENT (mem_mode)
4063 || USE_STORE_POST_DECREMENT (mem_mode))
4065 addr = gen_rtx_POST_DEC (address_mode, reg0);
4066 has_postdec[mem_mode]
4067 = memory_address_addr_space_p (mem_mode, addr, as);
4069 if (has_postdec[mem_mode])
4070 data->ainc_costs[AINC_POST_DEC]
4071 = address_cost (addr, mem_mode, as, speed);
4073 if (USE_LOAD_PRE_INCREMENT (mem_mode)
4074 || USE_STORE_PRE_DECREMENT (mem_mode))
4076 addr = gen_rtx_PRE_INC (address_mode, reg0);
4077 has_preinc[mem_mode]
4078 = memory_address_addr_space_p (mem_mode, addr, as);
4080 if (has_preinc[mem_mode])
4081 data->ainc_costs[AINC_PRE_INC]
4082 = address_cost (addr, mem_mode, as, speed);
4084 if (USE_LOAD_POST_INCREMENT (mem_mode)
4085 || USE_STORE_POST_INCREMENT (mem_mode))
4087 addr = gen_rtx_POST_INC (address_mode, reg0);
4088 has_postinc[mem_mode]
4089 = memory_address_addr_space_p (mem_mode, addr, as);
4091 if (has_postinc[mem_mode])
4092 data->ainc_costs[AINC_POST_INC]
4093 = address_cost (addr, mem_mode, as, speed);
4095 for (i = 0; i < 16; i++)
4097 sym_p = i & 1;
4098 var_p = (i >> 1) & 1;
4099 off_p = (i >> 2) & 1;
4100 rat_p = (i >> 3) & 1;
4102 addr = reg0;
4103 if (rat_p)
4104 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
4105 gen_int_mode (rat, address_mode));
4107 if (var_p)
4108 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
4110 if (sym_p)
4112 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
4113 /* ??? We can run into trouble with some backends by presenting
4114 it with symbols which haven't been properly passed through
4115 targetm.encode_section_info. By setting the local bit, we
4116 enhance the probability of things working. */
4117 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
4119 if (off_p)
4120 base = gen_rtx_fmt_e (CONST, address_mode,
4121 gen_rtx_fmt_ee
4122 (PLUS, address_mode, base,
4123 gen_int_mode (off, address_mode)));
4125 else if (off_p)
4126 base = gen_int_mode (off, address_mode);
4127 else
4128 base = NULL_RTX;
4130 if (base)
4131 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
4133 start_sequence ();
4134 /* To avoid splitting addressing modes, pretend that no cse will
4135 follow. */
4136 old_cse_not_expected = cse_not_expected;
4137 cse_not_expected = true;
4138 addr = memory_address_addr_space (mem_mode, addr, as);
4139 cse_not_expected = old_cse_not_expected;
4140 seq = get_insns ();
4141 end_sequence ();
4143 acost = seq_cost (seq, speed);
4144 acost += address_cost (addr, mem_mode, as, speed);
4146 if (!acost)
4147 acost = 1;
4148 data->costs[sym_p][var_p][off_p][rat_p] = acost;
4151 /* On some targets, it is quite expensive to load symbol to a register,
4152 which makes addresses that contain symbols look much more expensive.
4153 However, the symbol will have to be loaded in any case before the
4154 loop (and quite likely we have it in register already), so it does not
4155 make much sense to penalize them too heavily. So make some final
4156 tweaks for the SYMBOL_PRESENT modes:
4158 If VAR_PRESENT is false, and the mode obtained by changing symbol to
4159 var is cheaper, use this mode with small penalty.
4160 If VAR_PRESENT is true, try whether the mode with
4161 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
4162 if this is the case, use it. */
4163 add_c = add_cost (speed, address_mode);
4164 for (i = 0; i < 8; i++)
4166 var_p = i & 1;
4167 off_p = (i >> 1) & 1;
4168 rat_p = (i >> 2) & 1;
4170 acost = data->costs[0][1][off_p][rat_p] + 1;
4171 if (var_p)
4172 acost += add_c;
4174 if (acost < data->costs[1][var_p][off_p][rat_p])
4175 data->costs[1][var_p][off_p][rat_p] = acost;
4178 if (dump_file && (dump_flags & TDF_DETAILS))
4180 fprintf (dump_file, "Address costs:\n");
4182 for (i = 0; i < 16; i++)
4184 sym_p = i & 1;
4185 var_p = (i >> 1) & 1;
4186 off_p = (i >> 2) & 1;
4187 rat_p = (i >> 3) & 1;
4189 fprintf (dump_file, " ");
4190 if (sym_p)
4191 fprintf (dump_file, "sym + ");
4192 if (var_p)
4193 fprintf (dump_file, "var + ");
4194 if (off_p)
4195 fprintf (dump_file, "cst + ");
4196 if (rat_p)
4197 fprintf (dump_file, "rat * ");
4199 acost = data->costs[sym_p][var_p][off_p][rat_p];
4200 fprintf (dump_file, "index costs %d\n", acost);
4202 if (has_predec[mem_mode] || has_postdec[mem_mode]
4203 || has_preinc[mem_mode] || has_postinc[mem_mode])
4204 fprintf (dump_file, " May include autoinc/dec\n");
4205 fprintf (dump_file, "\n");
4208 address_cost_data_list[data_index] = data;
4211 bits = GET_MODE_BITSIZE (address_mode);
4212 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
4213 offset &= mask;
4214 if ((offset >> (bits - 1) & 1))
4215 offset |= ~mask;
4216 s_offset = offset;
4218 autoinc = false;
4219 autoinc_type = AINC_NONE;
4220 msize = GET_MODE_SIZE (mem_mode);
4221 autoinc_offset = offset;
4222 if (stmt_after_inc)
4223 autoinc_offset += ratio * cstep;
4224 if (symbol_present || var_present || ratio != 1)
4225 autoinc = false;
4226 else
4228 if (has_postinc[mem_mode] && autoinc_offset == 0
4229 && msize == cstep)
4230 autoinc_type = AINC_POST_INC;
4231 else if (has_postdec[mem_mode] && autoinc_offset == 0
4232 && msize == -cstep)
4233 autoinc_type = AINC_POST_DEC;
4234 else if (has_preinc[mem_mode] && autoinc_offset == msize
4235 && msize == cstep)
4236 autoinc_type = AINC_PRE_INC;
4237 else if (has_predec[mem_mode] && autoinc_offset == -msize
4238 && msize == -cstep)
4239 autoinc_type = AINC_PRE_DEC;
4241 if (autoinc_type != AINC_NONE)
4242 autoinc = true;
4245 cost = 0;
4246 offset_p = (s_offset != 0
4247 && data->min_offset <= s_offset
4248 && s_offset <= data->max_offset);
4249 ratio_p = (ratio != 1
4250 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
4252 if (ratio != 1 && !ratio_p)
4253 cost += mult_by_coeff_cost (ratio, address_mode, speed);
4255 if (s_offset && !offset_p && !symbol_present)
4256 cost += add_cost (speed, address_mode);
4258 if (may_autoinc)
4259 *may_autoinc = autoinc;
4260 if (autoinc)
4261 acost = data->ainc_costs[autoinc_type];
4262 else
4263 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
4264 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
4265 return new_cost (cost + acost, complexity);
4268 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
4269 EXPR operand holding the shift. COST0 and COST1 are the costs for
4270 calculating the operands of EXPR. Returns true if successful, and returns
4271 the cost in COST. */
4273 static bool
4274 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
4275 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4277 comp_cost res;
4278 tree op1 = TREE_OPERAND (expr, 1);
4279 tree cst = TREE_OPERAND (mult, 1);
4280 tree multop = TREE_OPERAND (mult, 0);
4281 int m = exact_log2 (int_cst_value (cst));
4282 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4283 int as_cost, sa_cost;
4284 bool mult_in_op1;
4286 if (!(m >= 0 && m < maxm))
4287 return false;
4289 STRIP_NOPS (op1);
4290 mult_in_op1 = operand_equal_p (op1, mult, 0);
4292 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4294 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4295 use that in preference to a shift insn followed by an add insn. */
4296 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4297 ? shiftadd_cost (speed, mode, m)
4298 : (mult_in_op1
4299 ? shiftsub1_cost (speed, mode, m)
4300 : shiftsub0_cost (speed, mode, m)));
4302 res = new_cost (MIN (as_cost, sa_cost), 0);
4303 res = add_costs (res, mult_in_op1 ? cost0 : cost1);
4305 STRIP_NOPS (multop);
4306 if (!is_gimple_val (multop))
4307 res = add_costs (res, force_expr_to_var_cost (multop, speed));
4309 *cost = res;
4310 return true;
4313 /* Estimates cost of forcing expression EXPR into a variable. */
4315 static comp_cost
4316 force_expr_to_var_cost (tree expr, bool speed)
4318 static bool costs_initialized = false;
4319 static unsigned integer_cost [2];
4320 static unsigned symbol_cost [2];
4321 static unsigned address_cost [2];
4322 tree op0, op1;
4323 comp_cost cost0, cost1, cost;
4324 machine_mode mode;
4326 if (!costs_initialized)
4328 tree type = build_pointer_type (integer_type_node);
4329 tree var, addr;
4330 rtx x;
4331 int i;
4333 var = create_tmp_var_raw (integer_type_node, "test_var");
4334 TREE_STATIC (var) = 1;
4335 x = produce_memory_decl_rtl (var, NULL);
4336 SET_DECL_RTL (var, x);
4338 addr = build1 (ADDR_EXPR, type, var);
4341 for (i = 0; i < 2; i++)
4343 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4344 2000), i);
4346 symbol_cost[i] = computation_cost (addr, i) + 1;
4348 address_cost[i]
4349 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4350 if (dump_file && (dump_flags & TDF_DETAILS))
4352 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4353 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
4354 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
4355 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
4356 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
4357 fprintf (dump_file, "\n");
4361 costs_initialized = true;
4364 STRIP_NOPS (expr);
4366 if (SSA_VAR_P (expr))
4367 return no_cost;
4369 if (is_gimple_min_invariant (expr))
4371 if (TREE_CODE (expr) == INTEGER_CST)
4372 return new_cost (integer_cost [speed], 0);
4374 if (TREE_CODE (expr) == ADDR_EXPR)
4376 tree obj = TREE_OPERAND (expr, 0);
4378 if (TREE_CODE (obj) == VAR_DECL
4379 || TREE_CODE (obj) == PARM_DECL
4380 || TREE_CODE (obj) == RESULT_DECL)
4381 return new_cost (symbol_cost [speed], 0);
4384 return new_cost (address_cost [speed], 0);
4387 switch (TREE_CODE (expr))
4389 case POINTER_PLUS_EXPR:
4390 case PLUS_EXPR:
4391 case MINUS_EXPR:
4392 case MULT_EXPR:
4393 op0 = TREE_OPERAND (expr, 0);
4394 op1 = TREE_OPERAND (expr, 1);
4395 STRIP_NOPS (op0);
4396 STRIP_NOPS (op1);
4397 break;
4399 CASE_CONVERT:
4400 case NEGATE_EXPR:
4401 op0 = TREE_OPERAND (expr, 0);
4402 STRIP_NOPS (op0);
4403 op1 = NULL_TREE;
4404 break;
4406 default:
4407 /* Just an arbitrary value, FIXME. */
4408 return new_cost (target_spill_cost[speed], 0);
4411 if (op0 == NULL_TREE
4412 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4413 cost0 = no_cost;
4414 else
4415 cost0 = force_expr_to_var_cost (op0, speed);
4417 if (op1 == NULL_TREE
4418 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4419 cost1 = no_cost;
4420 else
4421 cost1 = force_expr_to_var_cost (op1, speed);
4423 mode = TYPE_MODE (TREE_TYPE (expr));
4424 switch (TREE_CODE (expr))
4426 case POINTER_PLUS_EXPR:
4427 case PLUS_EXPR:
4428 case MINUS_EXPR:
4429 case NEGATE_EXPR:
4430 cost = new_cost (add_cost (speed, mode), 0);
4431 if (TREE_CODE (expr) != NEGATE_EXPR)
4433 tree mult = NULL_TREE;
4434 comp_cost sa_cost;
4435 if (TREE_CODE (op1) == MULT_EXPR)
4436 mult = op1;
4437 else if (TREE_CODE (op0) == MULT_EXPR)
4438 mult = op0;
4440 if (mult != NULL_TREE
4441 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4442 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4443 speed, &sa_cost))
4444 return sa_cost;
4446 break;
4448 CASE_CONVERT:
4450 tree inner_mode, outer_mode;
4451 outer_mode = TREE_TYPE (expr);
4452 inner_mode = TREE_TYPE (op0);
4453 cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
4454 TYPE_MODE (inner_mode), speed), 0);
4456 break;
4458 case MULT_EXPR:
4459 if (cst_and_fits_in_hwi (op0))
4460 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
4461 mode, speed), 0);
4462 else if (cst_and_fits_in_hwi (op1))
4463 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
4464 mode, speed), 0);
4465 else
4466 return new_cost (target_spill_cost [speed], 0);
4467 break;
4469 default:
4470 gcc_unreachable ();
4473 cost = add_costs (cost, cost0);
4474 cost = add_costs (cost, cost1);
4476 /* Bound the cost by target_spill_cost. The parts of complicated
4477 computations often are either loop invariant or at least can
4478 be shared between several iv uses, so letting this grow without
4479 limits would not give reasonable results. */
4480 if (cost.cost > (int) target_spill_cost [speed])
4481 cost.cost = target_spill_cost [speed];
4483 return cost;
4486 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
4487 invariants the computation depends on. */
4489 static comp_cost
4490 force_var_cost (struct ivopts_data *data,
4491 tree expr, bitmap *depends_on)
4493 if (depends_on)
4495 fd_ivopts_data = data;
4496 walk_tree (&expr, find_depends, depends_on, NULL);
4499 return force_expr_to_var_cost (expr, data->speed);
4502 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
4503 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4504 to false if the corresponding part is missing. DEPENDS_ON is a set of the
4505 invariants the computation depends on. */
4507 static comp_cost
4508 split_address_cost (struct ivopts_data *data,
4509 tree addr, bool *symbol_present, bool *var_present,
4510 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4512 tree core;
4513 HOST_WIDE_INT bitsize;
4514 HOST_WIDE_INT bitpos;
4515 tree toffset;
4516 machine_mode mode;
4517 int unsignedp, reversep, volatilep;
4519 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4520 &unsignedp, &reversep, &volatilep, false);
4522 if (toffset != 0
4523 || bitpos % BITS_PER_UNIT != 0
4524 || reversep
4525 || TREE_CODE (core) != VAR_DECL)
4527 *symbol_present = false;
4528 *var_present = true;
4529 fd_ivopts_data = data;
4530 if (depends_on)
4531 walk_tree (&addr, find_depends, depends_on, NULL);
4533 return new_cost (target_spill_cost[data->speed], 0);
4536 *offset += bitpos / BITS_PER_UNIT;
4537 if (TREE_STATIC (core)
4538 || DECL_EXTERNAL (core))
4540 *symbol_present = true;
4541 *var_present = false;
4542 return no_cost;
4545 *symbol_present = false;
4546 *var_present = true;
4547 return no_cost;
4550 /* Estimates cost of expressing difference of addresses E1 - E2 as
4551 var + symbol + offset. The value of offset is added to OFFSET,
4552 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4553 part is missing. DEPENDS_ON is a set of the invariants the computation
4554 depends on. */
4556 static comp_cost
4557 ptr_difference_cost (struct ivopts_data *data,
4558 tree e1, tree e2, bool *symbol_present, bool *var_present,
4559 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4561 HOST_WIDE_INT diff = 0;
4562 aff_tree aff_e1, aff_e2;
4563 tree type;
4565 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4567 if (ptr_difference_const (e1, e2, &diff))
4569 *offset += diff;
4570 *symbol_present = false;
4571 *var_present = false;
4572 return no_cost;
4575 if (integer_zerop (e2))
4576 return split_address_cost (data, TREE_OPERAND (e1, 0),
4577 symbol_present, var_present, offset, depends_on);
4579 *symbol_present = false;
4580 *var_present = true;
4582 type = signed_type_for (TREE_TYPE (e1));
4583 tree_to_aff_combination (e1, type, &aff_e1);
4584 tree_to_aff_combination (e2, type, &aff_e2);
4585 aff_combination_scale (&aff_e2, -1);
4586 aff_combination_add (&aff_e1, &aff_e2);
4588 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4591 /* Estimates cost of expressing difference E1 - E2 as
4592 var + symbol + offset. The value of offset is added to OFFSET,
4593 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4594 part is missing. DEPENDS_ON is a set of the invariants the computation
4595 depends on. */
4597 static comp_cost
4598 difference_cost (struct ivopts_data *data,
4599 tree e1, tree e2, bool *symbol_present, bool *var_present,
4600 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4602 machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4603 unsigned HOST_WIDE_INT off1, off2;
4604 aff_tree aff_e1, aff_e2;
4605 tree type;
4607 e1 = strip_offset (e1, &off1);
4608 e2 = strip_offset (e2, &off2);
4609 *offset += off1 - off2;
4611 STRIP_NOPS (e1);
4612 STRIP_NOPS (e2);
4614 if (TREE_CODE (e1) == ADDR_EXPR)
4615 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4616 offset, depends_on);
4617 *symbol_present = false;
4619 if (operand_equal_p (e1, e2, 0))
4621 *var_present = false;
4622 return no_cost;
4625 *var_present = true;
4627 if (integer_zerop (e2))
4628 return force_var_cost (data, e1, depends_on);
4630 if (integer_zerop (e1))
4632 comp_cost cost = force_var_cost (data, e2, depends_on);
4633 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
4634 return cost;
4637 type = signed_type_for (TREE_TYPE (e1));
4638 tree_to_aff_combination (e1, type, &aff_e1);
4639 tree_to_aff_combination (e2, type, &aff_e2);
4640 aff_combination_scale (&aff_e2, -1);
4641 aff_combination_add (&aff_e1, &aff_e2);
4643 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4646 /* Returns true if AFF1 and AFF2 are identical. */
4648 static bool
4649 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4651 unsigned i;
4653 if (aff1->n != aff2->n)
4654 return false;
4656 for (i = 0; i < aff1->n; i++)
4658 if (aff1->elts[i].coef != aff2->elts[i].coef)
4659 return false;
4661 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4662 return false;
4664 return true;
4667 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
4669 static int
4670 get_expr_id (struct ivopts_data *data, tree expr)
4672 struct iv_inv_expr_ent ent;
4673 struct iv_inv_expr_ent **slot;
4675 ent.expr = expr;
4676 ent.hash = iterative_hash_expr (expr, 0);
4677 slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4678 if (*slot)
4679 return (*slot)->id;
4681 *slot = XNEW (struct iv_inv_expr_ent);
4682 (*slot)->expr = expr;
4683 (*slot)->hash = ent.hash;
4684 (*slot)->id = data->inv_expr_id++;
4685 return (*slot)->id;
4688 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4689 requires a new compiler generated temporary. Returns -1 otherwise.
4690 ADDRESS_P is a flag indicating if the expression is for address
4691 computation. */
4693 static int
4694 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4695 tree cbase, HOST_WIDE_INT ratio,
4696 bool address_p)
4698 aff_tree ubase_aff, cbase_aff;
4699 tree expr, ub, cb;
4701 STRIP_NOPS (ubase);
4702 STRIP_NOPS (cbase);
4703 ub = ubase;
4704 cb = cbase;
4706 if ((TREE_CODE (ubase) == INTEGER_CST)
4707 && (TREE_CODE (cbase) == INTEGER_CST))
4708 return -1;
4710 /* Strips the constant part. */
4711 if (TREE_CODE (ubase) == PLUS_EXPR
4712 || TREE_CODE (ubase) == MINUS_EXPR
4713 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4715 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4716 ubase = TREE_OPERAND (ubase, 0);
4719 /* Strips the constant part. */
4720 if (TREE_CODE (cbase) == PLUS_EXPR
4721 || TREE_CODE (cbase) == MINUS_EXPR
4722 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4724 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4725 cbase = TREE_OPERAND (cbase, 0);
4728 if (address_p)
4730 if (((TREE_CODE (ubase) == SSA_NAME)
4731 || (TREE_CODE (ubase) == ADDR_EXPR
4732 && is_gimple_min_invariant (ubase)))
4733 && (TREE_CODE (cbase) == INTEGER_CST))
4734 return -1;
4736 if (((TREE_CODE (cbase) == SSA_NAME)
4737 || (TREE_CODE (cbase) == ADDR_EXPR
4738 && is_gimple_min_invariant (cbase)))
4739 && (TREE_CODE (ubase) == INTEGER_CST))
4740 return -1;
4743 if (ratio == 1)
4745 if (operand_equal_p (ubase, cbase, 0))
4746 return -1;
4748 if (TREE_CODE (ubase) == ADDR_EXPR
4749 && TREE_CODE (cbase) == ADDR_EXPR)
4751 tree usym, csym;
4753 usym = TREE_OPERAND (ubase, 0);
4754 csym = TREE_OPERAND (cbase, 0);
4755 if (TREE_CODE (usym) == ARRAY_REF)
4757 tree ind = TREE_OPERAND (usym, 1);
4758 if (TREE_CODE (ind) == INTEGER_CST
4759 && tree_fits_shwi_p (ind)
4760 && tree_to_shwi (ind) == 0)
4761 usym = TREE_OPERAND (usym, 0);
4763 if (TREE_CODE (csym) == ARRAY_REF)
4765 tree ind = TREE_OPERAND (csym, 1);
4766 if (TREE_CODE (ind) == INTEGER_CST
4767 && tree_fits_shwi_p (ind)
4768 && tree_to_shwi (ind) == 0)
4769 csym = TREE_OPERAND (csym, 0);
4771 if (operand_equal_p (usym, csym, 0))
4772 return -1;
4774 /* Now do more complex comparison */
4775 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4776 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4777 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4778 return -1;
4781 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4782 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4784 aff_combination_scale (&cbase_aff, -1 * ratio);
4785 aff_combination_add (&ubase_aff, &cbase_aff);
4786 expr = aff_combination_to_tree (&ubase_aff);
4787 return get_expr_id (data, expr);
4792 /* Determines the cost of the computation by that USE is expressed
4793 from induction variable CAND. If ADDRESS_P is true, we just need
4794 to create an address from it, otherwise we want to get it into
4795 register. A set of invariants we depend on is stored in
4796 DEPENDS_ON. AT is the statement at that the value is computed.
4797 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4798 addressing is likely. */
4800 static comp_cost
4801 get_computation_cost_at (struct ivopts_data *data,
4802 struct iv_use *use, struct iv_cand *cand,
4803 bool address_p, bitmap *depends_on, gimple *at,
4804 bool *can_autoinc,
4805 int *inv_expr_id)
4807 tree ubase = use->iv->base, ustep = use->iv->step;
4808 tree cbase, cstep;
4809 tree utype = TREE_TYPE (ubase), ctype;
4810 unsigned HOST_WIDE_INT cstepi, offset = 0;
4811 HOST_WIDE_INT ratio, aratio;
4812 bool var_present, symbol_present, stmt_is_after_inc;
4813 comp_cost cost;
4814 widest_int rat;
4815 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4816 machine_mode mem_mode = (address_p
4817 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4818 : VOIDmode);
4820 if (depends_on)
4821 *depends_on = NULL;
4823 /* Only consider real candidates. */
4824 if (!cand->iv)
4825 return infinite_cost;
4827 cbase = cand->iv->base;
4828 cstep = cand->iv->step;
4829 ctype = TREE_TYPE (cbase);
4831 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4833 /* We do not have a precision to express the values of use. */
4834 return infinite_cost;
4837 if (address_p
4838 || (use->iv->base_object
4839 && cand->iv->base_object
4840 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4841 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4843 /* Do not try to express address of an object with computation based
4844 on address of a different object. This may cause problems in rtl
4845 level alias analysis (that does not expect this to be happening,
4846 as this is illegal in C), and would be unlikely to be useful
4847 anyway. */
4848 if (use->iv->base_object
4849 && cand->iv->base_object
4850 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4851 return infinite_cost;
4854 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4856 /* TODO -- add direct handling of this case. */
4857 goto fallback;
4860 /* CSTEPI is removed from the offset in case statement is after the
4861 increment. If the step is not constant, we use zero instead.
4862 This is a bit imprecise (there is the extra addition), but
4863 redundancy elimination is likely to transform the code so that
4864 it uses value of the variable before increment anyway,
4865 so it is not that much unrealistic. */
4866 if (cst_and_fits_in_hwi (cstep))
4867 cstepi = int_cst_value (cstep);
4868 else
4869 cstepi = 0;
4871 if (!constant_multiple_of (ustep, cstep, &rat))
4872 return infinite_cost;
4874 if (wi::fits_shwi_p (rat))
4875 ratio = rat.to_shwi ();
4876 else
4877 return infinite_cost;
4879 STRIP_NOPS (cbase);
4880 ctype = TREE_TYPE (cbase);
4882 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4884 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4885 or ratio == 1, it is better to handle this like
4887 ubase - ratio * cbase + ratio * var
4889 (also holds in the case ratio == -1, TODO. */
4891 if (cst_and_fits_in_hwi (cbase))
4893 offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4894 cost = difference_cost (data,
4895 ubase, build_int_cst (utype, 0),
4896 &symbol_present, &var_present, &offset,
4897 depends_on);
4898 cost.cost /= avg_loop_niter (data->current_loop);
4900 else if (ratio == 1)
4902 tree real_cbase = cbase;
4904 /* Check to see if any adjustment is needed. */
4905 if (cstepi == 0 && stmt_is_after_inc)
4907 aff_tree real_cbase_aff;
4908 aff_tree cstep_aff;
4910 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4911 &real_cbase_aff);
4912 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4914 aff_combination_add (&real_cbase_aff, &cstep_aff);
4915 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4918 cost = difference_cost (data,
4919 ubase, real_cbase,
4920 &symbol_present, &var_present, &offset,
4921 depends_on);
4922 cost.cost /= avg_loop_niter (data->current_loop);
4924 else if (address_p
4925 && !POINTER_TYPE_P (ctype)
4926 && multiplier_allowed_in_address_p
4927 (ratio, mem_mode,
4928 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4930 if (cstepi == 0 && stmt_is_after_inc)
4932 if (POINTER_TYPE_P (ctype))
4933 cbase = fold_build2 (POINTER_PLUS_EXPR, ctype, cbase, cstep);
4934 else
4935 cbase = fold_build2 (PLUS_EXPR, ctype, cbase, cstep);
4937 cbase
4938 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4939 cost = difference_cost (data,
4940 ubase, cbase,
4941 &symbol_present, &var_present, &offset,
4942 depends_on);
4943 cost.cost /= avg_loop_niter (data->current_loop);
4945 else
4947 cost = force_var_cost (data, cbase, depends_on);
4948 cost = add_costs (cost,
4949 difference_cost (data,
4950 ubase, build_int_cst (utype, 0),
4951 &symbol_present, &var_present,
4952 &offset, depends_on));
4953 cost.cost /= avg_loop_niter (data->current_loop);
4954 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4957 /* Record setup cost in scrach field. */
4958 cost.scratch = cost.cost;
4959 /* Set of invariants depended on by sub use has already been computed
4960 for the first use in the group. */
4961 if (use->sub_id)
4963 cost.cost = 0;
4964 if (depends_on && *depends_on)
4965 bitmap_clear (*depends_on);
4967 else if (inv_expr_id)
4969 *inv_expr_id =
4970 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4971 /* Clear depends on. */
4972 if (*inv_expr_id != -1 && depends_on && *depends_on)
4973 bitmap_clear (*depends_on);
4976 /* If we are after the increment, the value of the candidate is higher by
4977 one iteration. */
4978 if (stmt_is_after_inc)
4979 offset -= ratio * cstepi;
4981 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4982 (symbol/var1/const parts may be omitted). If we are looking for an
4983 address, find the cost of addressing this. */
4984 if (address_p)
4985 return add_costs (cost,
4986 get_address_cost (symbol_present, var_present,
4987 offset, ratio, cstepi,
4988 mem_mode,
4989 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4990 speed, stmt_is_after_inc,
4991 can_autoinc));
4993 /* Otherwise estimate the costs for computing the expression. */
4994 if (!symbol_present && !var_present && !offset)
4996 if (ratio != 1)
4997 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4998 return cost;
5001 /* Symbol + offset should be compile-time computable so consider that they
5002 are added once to the variable, if present. */
5003 if (var_present && (symbol_present || offset))
5004 cost.cost += adjust_setup_cost (data,
5005 add_cost (speed, TYPE_MODE (ctype)));
5007 /* Having offset does not affect runtime cost in case it is added to
5008 symbol, but it increases complexity. */
5009 if (offset)
5010 cost.complexity++;
5012 cost.cost += add_cost (speed, TYPE_MODE (ctype));
5014 aratio = ratio > 0 ? ratio : -ratio;
5015 if (aratio != 1)
5016 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
5017 return cost;
5019 fallback:
5020 if (can_autoinc)
5021 *can_autoinc = false;
5024 /* Just get the expression, expand it and measure the cost. */
5025 tree comp = get_computation_at (data->current_loop, use, cand, at);
5027 if (!comp)
5028 return infinite_cost;
5030 if (address_p)
5031 comp = build_simple_mem_ref (comp);
5033 cost = new_cost (computation_cost (comp, speed), 0);
5034 cost.scratch = 0;
5035 return cost;
5039 /* Determines the cost of the computation by that USE is expressed
5040 from induction variable CAND. If ADDRESS_P is true, we just need
5041 to create an address from it, otherwise we want to get it into
5042 register. A set of invariants we depend on is stored in
5043 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
5044 autoinc addressing is likely. */
5046 static comp_cost
5047 get_computation_cost (struct ivopts_data *data,
5048 struct iv_use *use, struct iv_cand *cand,
5049 bool address_p, bitmap *depends_on,
5050 bool *can_autoinc, int *inv_expr_id)
5052 return get_computation_cost_at (data,
5053 use, cand, address_p, depends_on, use->stmt,
5054 can_autoinc, inv_expr_id);
5057 /* Determines cost of basing replacement of USE on CAND in a generic
5058 expression. */
5060 static bool
5061 determine_use_iv_cost_generic (struct ivopts_data *data,
5062 struct iv_use *use, struct iv_cand *cand)
5064 bitmap depends_on;
5065 comp_cost cost;
5066 int inv_expr_id = -1;
5068 /* The simple case first -- if we need to express value of the preserved
5069 original biv, the cost is 0. This also prevents us from counting the
5070 cost of increment twice -- once at this use and once in the cost of
5071 the candidate. */
5072 if (cand->pos == IP_ORIGINAL
5073 && cand->incremented_at == use->stmt)
5075 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
5076 ERROR_MARK, -1);
5077 return true;
5080 cost = get_computation_cost (data, use, cand, false, &depends_on,
5081 NULL, &inv_expr_id);
5083 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
5084 inv_expr_id);
5086 return !infinite_cost_p (cost);
5089 /* Determines cost of basing replacement of USE on CAND in an address. */
5091 static bool
5092 determine_use_iv_cost_address (struct ivopts_data *data,
5093 struct iv_use *use, struct iv_cand *cand)
5095 bitmap depends_on;
5096 bool can_autoinc, first;
5097 int inv_expr_id = -1;
5098 struct iv_use *sub_use;
5099 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
5100 &can_autoinc, &inv_expr_id);
5101 comp_cost sub_cost = cost;
5103 if (cand->ainc_use == use)
5105 if (can_autoinc)
5106 cost.cost -= cand->cost_step;
5107 /* If we generated the candidate solely for exploiting autoincrement
5108 opportunities, and it turns out it can't be used, set the cost to
5109 infinity to make sure we ignore it. */
5110 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
5111 cost = infinite_cost;
5114 if (!infinite_cost_p (cost) && use->next)
5116 first = true;
5117 sub_use = use->next;
5118 /* We don't want to add setup cost for sub-uses. */
5119 sub_cost.cost -= sub_cost.scratch;
5120 /* Add cost for sub uses in group. */
5123 /* Compute cost for the first sub use with different offset to
5124 the main use and add it afterwards. Costs for these uses
5125 could be quite different. Given below uses in a group:
5126 use 0 : {base + A + offset_0, step}
5127 use 0.1: {base + A + offset_0, step}
5128 use 0.2: {base + A + offset_1, step}
5129 use 0.3: {base + A + offset_2, step}
5130 when we need to compute costs with candidate:
5131 cand 1 : {base + B + offset_0, step}
5133 The first sub use with different offset is use 0.2, its cost
5134 is larger than cost of use 0/0.1 because we need to compute:
5135 A - B + offset_1 - offset_0
5136 rather than:
5137 A - B. */
5138 if (first && use->addr_offset != sub_use->addr_offset)
5140 first = false;
5141 sub_cost = get_computation_cost (data, sub_use, cand, true,
5142 NULL, &can_autoinc, NULL);
5143 if (infinite_cost_p (sub_cost))
5145 cost = infinite_cost;
5146 break;
5150 cost = add_costs (cost, sub_cost);
5151 sub_use = sub_use->next;
5153 while (sub_use);
5156 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
5157 inv_expr_id);
5159 return !infinite_cost_p (cost);
5162 /* Computes value of candidate CAND at position AT in iteration NITER, and
5163 stores it to VAL. */
5165 static void
5166 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
5167 aff_tree *val)
5169 aff_tree step, delta, nit;
5170 struct iv *iv = cand->iv;
5171 tree type = TREE_TYPE (iv->base);
5172 tree steptype = type;
5173 if (POINTER_TYPE_P (type))
5174 steptype = sizetype;
5175 steptype = unsigned_type_for (type);
5177 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
5178 aff_combination_convert (&step, steptype);
5179 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
5180 aff_combination_convert (&nit, steptype);
5181 aff_combination_mult (&nit, &step, &delta);
5182 if (stmt_after_increment (loop, cand, at))
5183 aff_combination_add (&delta, &step);
5185 tree_to_aff_combination (iv->base, type, val);
5186 if (!POINTER_TYPE_P (type))
5187 aff_combination_convert (val, steptype);
5188 aff_combination_add (val, &delta);
5191 /* Returns period of induction variable iv. */
5193 static tree
5194 iv_period (struct iv *iv)
5196 tree step = iv->step, period, type;
5197 tree pow2div;
5199 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
5201 type = unsigned_type_for (TREE_TYPE (step));
5202 /* Period of the iv is lcm (step, type_range)/step -1,
5203 i.e., N*type_range/step - 1. Since type range is power
5204 of two, N == (step >> num_of_ending_zeros_binary (step),
5205 so the final result is
5207 (type_range >> num_of_ending_zeros_binary (step)) - 1
5210 pow2div = num_ending_zeros (step);
5212 period = build_low_bits_mask (type,
5213 (TYPE_PRECISION (type)
5214 - tree_to_uhwi (pow2div)));
5216 return period;
5219 /* Returns the comparison operator used when eliminating the iv USE. */
5221 static enum tree_code
5222 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
5224 struct loop *loop = data->current_loop;
5225 basic_block ex_bb;
5226 edge exit;
5228 ex_bb = gimple_bb (use->stmt);
5229 exit = EDGE_SUCC (ex_bb, 0);
5230 if (flow_bb_inside_loop_p (loop, exit->dest))
5231 exit = EDGE_SUCC (ex_bb, 1);
5233 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5236 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
5237 we only detect the situation that BASE = SOMETHING + OFFSET, where the
5238 calculation is performed in non-wrapping type.
5240 TODO: More generally, we could test for the situation that
5241 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5242 This would require knowing the sign of OFFSET. */
5244 static bool
5245 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5247 enum tree_code code;
5248 tree e1, e2;
5249 aff_tree aff_e1, aff_e2, aff_offset;
5251 if (!nowrap_type_p (TREE_TYPE (base)))
5252 return false;
5254 base = expand_simple_operations (base);
5256 if (TREE_CODE (base) == SSA_NAME)
5258 gimple *stmt = SSA_NAME_DEF_STMT (base);
5260 if (gimple_code (stmt) != GIMPLE_ASSIGN)
5261 return false;
5263 code = gimple_assign_rhs_code (stmt);
5264 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5265 return false;
5267 e1 = gimple_assign_rhs1 (stmt);
5268 e2 = gimple_assign_rhs2 (stmt);
5270 else
5272 code = TREE_CODE (base);
5273 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5274 return false;
5275 e1 = TREE_OPERAND (base, 0);
5276 e2 = TREE_OPERAND (base, 1);
5279 /* Use affine expansion as deeper inspection to prove the equality. */
5280 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5281 &aff_e2, &data->name_expansion_cache);
5282 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5283 &aff_offset, &data->name_expansion_cache);
5284 aff_combination_scale (&aff_offset, -1);
5285 switch (code)
5287 case PLUS_EXPR:
5288 aff_combination_add (&aff_e2, &aff_offset);
5289 if (aff_combination_zero_p (&aff_e2))
5290 return true;
5292 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5293 &aff_e1, &data->name_expansion_cache);
5294 aff_combination_add (&aff_e1, &aff_offset);
5295 return aff_combination_zero_p (&aff_e1);
5297 case POINTER_PLUS_EXPR:
5298 aff_combination_add (&aff_e2, &aff_offset);
5299 return aff_combination_zero_p (&aff_e2);
5301 default:
5302 return false;
5306 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5307 comparison with CAND. NITER describes the number of iterations of
5308 the loops. If successful, the comparison in COMP_P is altered accordingly.
5310 We aim to handle the following situation:
5312 sometype *base, *p;
5313 int a, b, i;
5315 i = a;
5316 p = p_0 = base + a;
5320 bla (*p);
5321 p++;
5322 i++;
5324 while (i < b);
5326 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5327 We aim to optimize this to
5329 p = p_0 = base + a;
5332 bla (*p);
5333 p++;
5335 while (p < p_0 - a + b);
5337 This preserves the correctness, since the pointer arithmetics does not
5338 overflow. More precisely:
5340 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5341 overflow in computing it or the values of p.
5342 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5343 overflow. To prove this, we use the fact that p_0 = base + a. */
5345 static bool
5346 iv_elimination_compare_lt (struct ivopts_data *data,
5347 struct iv_cand *cand, enum tree_code *comp_p,
5348 struct tree_niter_desc *niter)
5350 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5351 struct aff_tree nit, tmpa, tmpb;
5352 enum tree_code comp;
5353 HOST_WIDE_INT step;
5355 /* We need to know that the candidate induction variable does not overflow.
5356 While more complex analysis may be used to prove this, for now just
5357 check that the variable appears in the original program and that it
5358 is computed in a type that guarantees no overflows. */
5359 cand_type = TREE_TYPE (cand->iv->base);
5360 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5361 return false;
5363 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5364 the calculation of the BOUND could overflow, making the comparison
5365 invalid. */
5366 if (!data->loop_single_exit_p)
5367 return false;
5369 /* We need to be able to decide whether candidate is increasing or decreasing
5370 in order to choose the right comparison operator. */
5371 if (!cst_and_fits_in_hwi (cand->iv->step))
5372 return false;
5373 step = int_cst_value (cand->iv->step);
5375 /* Check that the number of iterations matches the expected pattern:
5376 a + 1 > b ? 0 : b - a - 1. */
5377 mbz = niter->may_be_zero;
5378 if (TREE_CODE (mbz) == GT_EXPR)
5380 /* Handle a + 1 > b. */
5381 tree op0 = TREE_OPERAND (mbz, 0);
5382 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5384 a = TREE_OPERAND (op0, 0);
5385 b = TREE_OPERAND (mbz, 1);
5387 else
5388 return false;
5390 else if (TREE_CODE (mbz) == LT_EXPR)
5392 tree op1 = TREE_OPERAND (mbz, 1);
5394 /* Handle b < a + 1. */
5395 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5397 a = TREE_OPERAND (op1, 0);
5398 b = TREE_OPERAND (mbz, 0);
5400 else
5401 return false;
5403 else
5404 return false;
5406 /* Expected number of iterations is B - A - 1. Check that it matches
5407 the actual number, i.e., that B - A - NITER = 1. */
5408 tree_to_aff_combination (niter->niter, nit_type, &nit);
5409 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5410 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5411 aff_combination_scale (&nit, -1);
5412 aff_combination_scale (&tmpa, -1);
5413 aff_combination_add (&tmpb, &tmpa);
5414 aff_combination_add (&tmpb, &nit);
5415 if (tmpb.n != 0 || tmpb.offset != 1)
5416 return false;
5418 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5419 overflow. */
5420 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5421 cand->iv->step,
5422 fold_convert (TREE_TYPE (cand->iv->step), a));
5423 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5424 return false;
5426 /* Determine the new comparison operator. */
5427 comp = step < 0 ? GT_EXPR : LT_EXPR;
5428 if (*comp_p == NE_EXPR)
5429 *comp_p = comp;
5430 else if (*comp_p == EQ_EXPR)
5431 *comp_p = invert_tree_comparison (comp, false);
5432 else
5433 gcc_unreachable ();
5435 return true;
5438 /* Check whether it is possible to express the condition in USE by comparison
5439 of candidate CAND. If so, store the value compared with to BOUND, and the
5440 comparison operator to COMP. */
5442 static bool
5443 may_eliminate_iv (struct ivopts_data *data,
5444 struct iv_use *use, struct iv_cand *cand, tree *bound,
5445 enum tree_code *comp)
5447 basic_block ex_bb;
5448 edge exit;
5449 tree period;
5450 struct loop *loop = data->current_loop;
5451 aff_tree bnd;
5452 struct tree_niter_desc *desc = NULL;
5454 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5455 return false;
5457 /* For now works only for exits that dominate the loop latch.
5458 TODO: extend to other conditions inside loop body. */
5459 ex_bb = gimple_bb (use->stmt);
5460 if (use->stmt != last_stmt (ex_bb)
5461 || gimple_code (use->stmt) != GIMPLE_COND
5462 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5463 return false;
5465 exit = EDGE_SUCC (ex_bb, 0);
5466 if (flow_bb_inside_loop_p (loop, exit->dest))
5467 exit = EDGE_SUCC (ex_bb, 1);
5468 if (flow_bb_inside_loop_p (loop, exit->dest))
5469 return false;
5471 desc = niter_for_exit (data, exit);
5472 if (!desc)
5473 return false;
5475 /* Determine whether we can use the variable to test the exit condition.
5476 This is the case iff the period of the induction variable is greater
5477 than the number of iterations for which the exit condition is true. */
5478 period = iv_period (cand->iv);
5480 /* If the number of iterations is constant, compare against it directly. */
5481 if (TREE_CODE (desc->niter) == INTEGER_CST)
5483 /* See cand_value_at. */
5484 if (stmt_after_increment (loop, cand, use->stmt))
5486 if (!tree_int_cst_lt (desc->niter, period))
5487 return false;
5489 else
5491 if (tree_int_cst_lt (period, desc->niter))
5492 return false;
5496 /* If not, and if this is the only possible exit of the loop, see whether
5497 we can get a conservative estimate on the number of iterations of the
5498 entire loop and compare against that instead. */
5499 else
5501 widest_int period_value, max_niter;
5503 max_niter = desc->max;
5504 if (stmt_after_increment (loop, cand, use->stmt))
5505 max_niter += 1;
5506 period_value = wi::to_widest (period);
5507 if (wi::gtu_p (max_niter, period_value))
5509 /* See if we can take advantage of inferred loop bound information. */
5510 if (data->loop_single_exit_p)
5512 if (!max_loop_iterations (loop, &max_niter))
5513 return false;
5514 /* The loop bound is already adjusted by adding 1. */
5515 if (wi::gtu_p (max_niter, period_value))
5516 return false;
5518 else
5519 return false;
5523 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5525 *bound = fold_convert (TREE_TYPE (cand->iv->base),
5526 aff_combination_to_tree (&bnd));
5527 *comp = iv_elimination_compare (data, use);
5529 /* It is unlikely that computing the number of iterations using division
5530 would be more profitable than keeping the original induction variable. */
5531 if (expression_expensive_p (*bound))
5532 return false;
5534 /* Sometimes, it is possible to handle the situation that the number of
5535 iterations may be zero unless additional assumtions by using <
5536 instead of != in the exit condition.
5538 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5539 base the exit condition on it. However, that is often too
5540 expensive. */
5541 if (!integer_zerop (desc->may_be_zero))
5542 return iv_elimination_compare_lt (data, cand, comp, desc);
5544 return true;
5547 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5548 be copied, if it is used in the loop body and DATA->body_includes_call. */
5550 static int
5551 parm_decl_cost (struct ivopts_data *data, tree bound)
5553 tree sbound = bound;
5554 STRIP_NOPS (sbound);
5556 if (TREE_CODE (sbound) == SSA_NAME
5557 && SSA_NAME_IS_DEFAULT_DEF (sbound)
5558 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5559 && data->body_includes_call)
5560 return COSTS_N_INSNS (1);
5562 return 0;
5565 /* Determines cost of basing replacement of USE on CAND in a condition. */
5567 static bool
5568 determine_use_iv_cost_condition (struct ivopts_data *data,
5569 struct iv_use *use, struct iv_cand *cand)
5571 tree bound = NULL_TREE;
5572 struct iv *cmp_iv;
5573 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
5574 comp_cost elim_cost, express_cost, cost, bound_cost;
5575 bool ok;
5576 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
5577 tree *control_var, *bound_cst;
5578 enum tree_code comp = ERROR_MARK;
5580 /* Only consider real candidates. */
5581 if (!cand->iv)
5583 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
5584 ERROR_MARK, -1);
5585 return false;
5588 /* Try iv elimination. */
5589 if (may_eliminate_iv (data, use, cand, &bound, &comp))
5591 elim_cost = force_var_cost (data, bound, &depends_on_elim);
5592 if (elim_cost.cost == 0)
5593 elim_cost.cost = parm_decl_cost (data, bound);
5594 else if (TREE_CODE (bound) == INTEGER_CST)
5595 elim_cost.cost = 0;
5596 /* If we replace a loop condition 'i < n' with 'p < base + n',
5597 depends_on_elim will have 'base' and 'n' set, which implies
5598 that both 'base' and 'n' will be live during the loop. More likely,
5599 'base + n' will be loop invariant, resulting in only one live value
5600 during the loop. So in that case we clear depends_on_elim and set
5601 elim_inv_expr_id instead. */
5602 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5604 elim_inv_expr_id = get_expr_id (data, bound);
5605 bitmap_clear (depends_on_elim);
5607 /* The bound is a loop invariant, so it will be only computed
5608 once. */
5609 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5611 else
5612 elim_cost = infinite_cost;
5614 /* Try expressing the original giv. If it is compared with an invariant,
5615 note that we cannot get rid of it. */
5616 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5617 NULL, &cmp_iv);
5618 gcc_assert (ok);
5620 /* When the condition is a comparison of the candidate IV against
5621 zero, prefer this IV.
5623 TODO: The constant that we're subtracting from the cost should
5624 be target-dependent. This information should be added to the
5625 target costs for each backend. */
5626 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
5627 && integer_zerop (*bound_cst)
5628 && (operand_equal_p (*control_var, cand->var_after, 0)
5629 || operand_equal_p (*control_var, cand->var_before, 0)))
5630 elim_cost.cost -= 1;
5632 express_cost = get_computation_cost (data, use, cand, false,
5633 &depends_on_express, NULL,
5634 &express_inv_expr_id);
5635 fd_ivopts_data = data;
5636 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5638 /* Count the cost of the original bound as well. */
5639 bound_cost = force_var_cost (data, *bound_cst, NULL);
5640 if (bound_cost.cost == 0)
5641 bound_cost.cost = parm_decl_cost (data, *bound_cst);
5642 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5643 bound_cost.cost = 0;
5644 express_cost.cost += bound_cost.cost;
5646 /* Choose the better approach, preferring the eliminated IV. */
5647 if (compare_costs (elim_cost, express_cost) <= 0)
5649 cost = elim_cost;
5650 depends_on = depends_on_elim;
5651 depends_on_elim = NULL;
5652 inv_expr_id = elim_inv_expr_id;
5654 else
5656 cost = express_cost;
5657 depends_on = depends_on_express;
5658 depends_on_express = NULL;
5659 bound = NULL_TREE;
5660 comp = ERROR_MARK;
5661 inv_expr_id = express_inv_expr_id;
5664 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
5666 if (depends_on_elim)
5667 BITMAP_FREE (depends_on_elim);
5668 if (depends_on_express)
5669 BITMAP_FREE (depends_on_express);
5671 return !infinite_cost_p (cost);
5674 /* Determines cost of basing replacement of USE on CAND. Returns false
5675 if USE cannot be based on CAND. */
5677 static bool
5678 determine_use_iv_cost (struct ivopts_data *data,
5679 struct iv_use *use, struct iv_cand *cand)
5681 switch (use->type)
5683 case USE_NONLINEAR_EXPR:
5684 return determine_use_iv_cost_generic (data, use, cand);
5686 case USE_ADDRESS:
5687 return determine_use_iv_cost_address (data, use, cand);
5689 case USE_COMPARE:
5690 return determine_use_iv_cost_condition (data, use, cand);
5692 default:
5693 gcc_unreachable ();
5697 /* Return true if get_computation_cost indicates that autoincrement is
5698 a possibility for the pair of USE and CAND, false otherwise. */
5700 static bool
5701 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5702 struct iv_cand *cand)
5704 bitmap depends_on;
5705 bool can_autoinc;
5706 comp_cost cost;
5708 if (use->type != USE_ADDRESS)
5709 return false;
5711 cost = get_computation_cost (data, use, cand, true, &depends_on,
5712 &can_autoinc, NULL);
5714 BITMAP_FREE (depends_on);
5716 return !infinite_cost_p (cost) && can_autoinc;
5719 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5720 use that allows autoincrement, and set their AINC_USE if possible. */
5722 static void
5723 set_autoinc_for_original_candidates (struct ivopts_data *data)
5725 unsigned i, j;
5727 for (i = 0; i < n_iv_cands (data); i++)
5729 struct iv_cand *cand = iv_cand (data, i);
5730 struct iv_use *closest_before = NULL;
5731 struct iv_use *closest_after = NULL;
5732 if (cand->pos != IP_ORIGINAL)
5733 continue;
5735 for (j = 0; j < n_iv_uses (data); j++)
5737 struct iv_use *use = iv_use (data, j);
5738 unsigned uid = gimple_uid (use->stmt);
5740 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5741 continue;
5743 if (uid < gimple_uid (cand->incremented_at)
5744 && (closest_before == NULL
5745 || uid > gimple_uid (closest_before->stmt)))
5746 closest_before = use;
5748 if (uid > gimple_uid (cand->incremented_at)
5749 && (closest_after == NULL
5750 || uid < gimple_uid (closest_after->stmt)))
5751 closest_after = use;
5754 if (closest_before != NULL
5755 && autoinc_possible_for_pair (data, closest_before, cand))
5756 cand->ainc_use = closest_before;
5757 else if (closest_after != NULL
5758 && autoinc_possible_for_pair (data, closest_after, cand))
5759 cand->ainc_use = closest_after;
5763 /* Finds the candidates for the induction variables. */
5765 static void
5766 find_iv_candidates (struct ivopts_data *data)
5768 /* Add commonly used ivs. */
5769 add_standard_iv_candidates (data);
5771 /* Add old induction variables. */
5772 add_iv_candidate_for_bivs (data);
5774 /* Add induction variables derived from uses. */
5775 add_iv_candidate_for_uses (data);
5777 set_autoinc_for_original_candidates (data);
5779 /* Record the important candidates. */
5780 record_important_candidates (data);
5783 /* Determines costs of basing the use of the iv on an iv candidate. */
5785 static void
5786 determine_use_iv_costs (struct ivopts_data *data)
5788 unsigned i, j;
5789 struct iv_use *use;
5790 struct iv_cand *cand;
5791 bitmap to_clear = BITMAP_ALLOC (NULL);
5793 alloc_use_cost_map (data);
5795 for (i = 0; i < n_iv_uses (data); i++)
5797 use = iv_use (data, i);
5799 if (data->consider_all_candidates)
5801 for (j = 0; j < n_iv_cands (data); j++)
5803 cand = iv_cand (data, j);
5804 determine_use_iv_cost (data, use, cand);
5807 else
5809 bitmap_iterator bi;
5811 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5813 cand = iv_cand (data, j);
5814 if (!determine_use_iv_cost (data, use, cand))
5815 bitmap_set_bit (to_clear, j);
5818 /* Remove the candidates for that the cost is infinite from
5819 the list of related candidates. */
5820 bitmap_and_compl_into (use->related_cands, to_clear);
5821 bitmap_clear (to_clear);
5825 BITMAP_FREE (to_clear);
5827 if (dump_file && (dump_flags & TDF_DETAILS))
5829 fprintf (dump_file, "Use-candidate costs:\n");
5831 for (i = 0; i < n_iv_uses (data); i++)
5833 use = iv_use (data, i);
5835 fprintf (dump_file, "Use %d:\n", i);
5836 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
5837 for (j = 0; j < use->n_map_members; j++)
5839 if (!use->cost_map[j].cand
5840 || infinite_cost_p (use->cost_map[j].cost))
5841 continue;
5843 fprintf (dump_file, " %d\t%d\t%d\t",
5844 use->cost_map[j].cand->id,
5845 use->cost_map[j].cost.cost,
5846 use->cost_map[j].cost.complexity);
5847 if (use->cost_map[j].depends_on)
5848 bitmap_print (dump_file,
5849 use->cost_map[j].depends_on, "","");
5850 if (use->cost_map[j].inv_expr_id != -1)
5851 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5852 fprintf (dump_file, "\n");
5855 fprintf (dump_file, "\n");
5857 fprintf (dump_file, "\n");
5861 /* Determines cost of the candidate CAND. */
5863 static void
5864 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5866 comp_cost cost_base;
5867 unsigned cost, cost_step;
5868 tree base;
5870 if (!cand->iv)
5872 cand->cost = 0;
5873 return;
5876 /* There are two costs associated with the candidate -- its increment
5877 and its initialization. The second is almost negligible for any loop
5878 that rolls enough, so we take it just very little into account. */
5880 base = cand->iv->base;
5881 cost_base = force_var_cost (data, base, NULL);
5882 /* It will be exceptional that the iv register happens to be initialized with
5883 the proper value at no cost. In general, there will at least be a regcopy
5884 or a const set. */
5885 if (cost_base.cost == 0)
5886 cost_base.cost = COSTS_N_INSNS (1);
5887 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5889 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5891 /* Prefer the original ivs unless we may gain something by replacing it.
5892 The reason is to make debugging simpler; so this is not relevant for
5893 artificial ivs created by other optimization passes. */
5894 if (cand->pos != IP_ORIGINAL
5895 || !SSA_NAME_VAR (cand->var_before)
5896 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5897 cost++;
5899 /* Prefer not to insert statements into latch unless there are some
5900 already (so that we do not create unnecessary jumps). */
5901 if (cand->pos == IP_END
5902 && empty_block_p (ip_end_pos (data->current_loop)))
5903 cost++;
5905 cand->cost = cost;
5906 cand->cost_step = cost_step;
5909 /* Determines costs of computation of the candidates. */
5911 static void
5912 determine_iv_costs (struct ivopts_data *data)
5914 unsigned i;
5916 if (dump_file && (dump_flags & TDF_DETAILS))
5918 fprintf (dump_file, "Candidate costs:\n");
5919 fprintf (dump_file, " cand\tcost\n");
5922 for (i = 0; i < n_iv_cands (data); i++)
5924 struct iv_cand *cand = iv_cand (data, i);
5926 determine_iv_cost (data, cand);
5928 if (dump_file && (dump_flags & TDF_DETAILS))
5929 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5932 if (dump_file && (dump_flags & TDF_DETAILS))
5933 fprintf (dump_file, "\n");
5936 /* Calculates cost for having SIZE induction variables. */
5938 static unsigned
5939 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5941 /* We add size to the cost, so that we prefer eliminating ivs
5942 if possible. */
5943 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5944 data->body_includes_call);
5947 /* For each size of the induction variable set determine the penalty. */
5949 static void
5950 determine_set_costs (struct ivopts_data *data)
5952 unsigned j, n;
5953 gphi *phi;
5954 gphi_iterator psi;
5955 tree op;
5956 struct loop *loop = data->current_loop;
5957 bitmap_iterator bi;
5959 if (dump_file && (dump_flags & TDF_DETAILS))
5961 fprintf (dump_file, "Global costs:\n");
5962 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5963 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5964 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5965 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5968 n = 0;
5969 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5971 phi = psi.phi ();
5972 op = PHI_RESULT (phi);
5974 if (virtual_operand_p (op))
5975 continue;
5977 if (get_iv (data, op))
5978 continue;
5980 n++;
5983 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5985 struct version_info *info = ver_info (data, j);
5987 if (info->inv_id && info->has_nonlin_use)
5988 n++;
5991 data->regs_used = n;
5992 if (dump_file && (dump_flags & TDF_DETAILS))
5993 fprintf (dump_file, " regs_used %d\n", n);
5995 if (dump_file && (dump_flags & TDF_DETAILS))
5997 fprintf (dump_file, " cost for size:\n");
5998 fprintf (dump_file, " ivs\tcost\n");
5999 for (j = 0; j <= 2 * target_avail_regs; j++)
6000 fprintf (dump_file, " %d\t%d\n", j,
6001 ivopts_global_cost_for_size (data, j));
6002 fprintf (dump_file, "\n");
6006 /* Returns true if A is a cheaper cost pair than B. */
6008 static bool
6009 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
6011 int cmp;
6013 if (!a)
6014 return false;
6016 if (!b)
6017 return true;
6019 cmp = compare_costs (a->cost, b->cost);
6020 if (cmp < 0)
6021 return true;
6023 if (cmp > 0)
6024 return false;
6026 /* In case the costs are the same, prefer the cheaper candidate. */
6027 if (a->cand->cost < b->cand->cost)
6028 return true;
6030 return false;
6034 /* Returns candidate by that USE is expressed in IVS. */
6036 static struct cost_pair *
6037 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
6039 return ivs->cand_for_use[use->id];
6042 /* Computes the cost field of IVS structure. */
6044 static void
6045 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
6047 comp_cost cost = ivs->cand_use_cost;
6049 cost.cost += ivs->cand_cost;
6051 cost.cost += ivopts_global_cost_for_size (data,
6052 ivs->n_regs + ivs->num_used_inv_expr);
6054 ivs->cost = cost;
6057 /* Remove invariants in set INVS to set IVS. */
6059 static void
6060 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
6062 bitmap_iterator bi;
6063 unsigned iid;
6065 if (!invs)
6066 return;
6068 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6070 ivs->n_invariant_uses[iid]--;
6071 if (ivs->n_invariant_uses[iid] == 0)
6072 ivs->n_regs--;
6076 /* Set USE not to be expressed by any candidate in IVS. */
6078 static void
6079 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
6080 struct iv_use *use)
6082 unsigned uid = use->id, cid;
6083 struct cost_pair *cp;
6085 cp = ivs->cand_for_use[uid];
6086 if (!cp)
6087 return;
6088 cid = cp->cand->id;
6090 ivs->bad_uses++;
6091 ivs->cand_for_use[uid] = NULL;
6092 ivs->n_cand_uses[cid]--;
6094 if (ivs->n_cand_uses[cid] == 0)
6096 bitmap_clear_bit (ivs->cands, cid);
6097 /* Do not count the pseudocandidates. */
6098 if (cp->cand->iv)
6099 ivs->n_regs--;
6100 ivs->n_cands--;
6101 ivs->cand_cost -= cp->cand->cost;
6103 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
6106 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
6108 iv_ca_set_remove_invariants (ivs, cp->depends_on);
6110 if (cp->inv_expr_id != -1)
6112 ivs->used_inv_expr[cp->inv_expr_id]--;
6113 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
6114 ivs->num_used_inv_expr--;
6116 iv_ca_recount_cost (data, ivs);
6119 /* Add invariants in set INVS to set IVS. */
6121 static void
6122 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
6124 bitmap_iterator bi;
6125 unsigned iid;
6127 if (!invs)
6128 return;
6130 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6132 ivs->n_invariant_uses[iid]++;
6133 if (ivs->n_invariant_uses[iid] == 1)
6134 ivs->n_regs++;
6138 /* Set cost pair for USE in set IVS to CP. */
6140 static void
6141 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
6142 struct iv_use *use, struct cost_pair *cp)
6144 unsigned uid = use->id, cid;
6146 if (ivs->cand_for_use[uid] == cp)
6147 return;
6149 if (ivs->cand_for_use[uid])
6150 iv_ca_set_no_cp (data, ivs, use);
6152 if (cp)
6154 cid = cp->cand->id;
6156 ivs->bad_uses--;
6157 ivs->cand_for_use[uid] = cp;
6158 ivs->n_cand_uses[cid]++;
6159 if (ivs->n_cand_uses[cid] == 1)
6161 bitmap_set_bit (ivs->cands, cid);
6162 /* Do not count the pseudocandidates. */
6163 if (cp->cand->iv)
6164 ivs->n_regs++;
6165 ivs->n_cands++;
6166 ivs->cand_cost += cp->cand->cost;
6168 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
6171 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
6172 iv_ca_set_add_invariants (ivs, cp->depends_on);
6174 if (cp->inv_expr_id != -1)
6176 ivs->used_inv_expr[cp->inv_expr_id]++;
6177 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
6178 ivs->num_used_inv_expr++;
6180 iv_ca_recount_cost (data, ivs);
6184 /* Extend set IVS by expressing USE by some of the candidates in it
6185 if possible. Consider all important candidates if candidates in
6186 set IVS don't give any result. */
6188 static void
6189 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
6190 struct iv_use *use)
6192 struct cost_pair *best_cp = NULL, *cp;
6193 bitmap_iterator bi;
6194 unsigned i;
6195 struct iv_cand *cand;
6197 gcc_assert (ivs->upto >= use->id);
6198 ivs->upto++;
6199 ivs->bad_uses++;
6201 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6203 cand = iv_cand (data, i);
6204 cp = get_use_iv_cost (data, use, cand);
6205 if (cheaper_cost_pair (cp, best_cp))
6206 best_cp = cp;
6209 if (best_cp == NULL)
6211 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6213 cand = iv_cand (data, i);
6214 cp = get_use_iv_cost (data, use, cand);
6215 if (cheaper_cost_pair (cp, best_cp))
6216 best_cp = cp;
6220 iv_ca_set_cp (data, ivs, use, best_cp);
6223 /* Get cost for assignment IVS. */
6225 static comp_cost
6226 iv_ca_cost (struct iv_ca *ivs)
6228 /* This was a conditional expression but it triggered a bug in
6229 Sun C 5.5. */
6230 if (ivs->bad_uses)
6231 return infinite_cost;
6232 else
6233 return ivs->cost;
6236 /* Returns true if all dependences of CP are among invariants in IVS. */
6238 static bool
6239 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
6241 unsigned i;
6242 bitmap_iterator bi;
6244 if (!cp->depends_on)
6245 return true;
6247 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
6249 if (ivs->n_invariant_uses[i] == 0)
6250 return false;
6253 return true;
6256 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
6257 it before NEXT_CHANGE. */
6259 static struct iv_ca_delta *
6260 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
6261 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
6263 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6265 change->use = use;
6266 change->old_cp = old_cp;
6267 change->new_cp = new_cp;
6268 change->next_change = next_change;
6270 return change;
6273 /* Joins two lists of changes L1 and L2. Destructive -- old lists
6274 are rewritten. */
6276 static struct iv_ca_delta *
6277 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6279 struct iv_ca_delta *last;
6281 if (!l2)
6282 return l1;
6284 if (!l1)
6285 return l2;
6287 for (last = l1; last->next_change; last = last->next_change)
6288 continue;
6289 last->next_change = l2;
6291 return l1;
6294 /* Reverse the list of changes DELTA, forming the inverse to it. */
6296 static struct iv_ca_delta *
6297 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6299 struct iv_ca_delta *act, *next, *prev = NULL;
6301 for (act = delta; act; act = next)
6303 next = act->next_change;
6304 act->next_change = prev;
6305 prev = act;
6307 std::swap (act->old_cp, act->new_cp);
6310 return prev;
6313 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
6314 reverted instead. */
6316 static void
6317 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
6318 struct iv_ca_delta *delta, bool forward)
6320 struct cost_pair *from, *to;
6321 struct iv_ca_delta *act;
6323 if (!forward)
6324 delta = iv_ca_delta_reverse (delta);
6326 for (act = delta; act; act = act->next_change)
6328 from = act->old_cp;
6329 to = act->new_cp;
6330 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
6331 iv_ca_set_cp (data, ivs, act->use, to);
6334 if (!forward)
6335 iv_ca_delta_reverse (delta);
6338 /* Returns true if CAND is used in IVS. */
6340 static bool
6341 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
6343 return ivs->n_cand_uses[cand->id] > 0;
6346 /* Returns number of induction variable candidates in the set IVS. */
6348 static unsigned
6349 iv_ca_n_cands (struct iv_ca *ivs)
6351 return ivs->n_cands;
6354 /* Free the list of changes DELTA. */
6356 static void
6357 iv_ca_delta_free (struct iv_ca_delta **delta)
6359 struct iv_ca_delta *act, *next;
6361 for (act = *delta; act; act = next)
6363 next = act->next_change;
6364 free (act);
6367 *delta = NULL;
6370 /* Allocates new iv candidates assignment. */
6372 static struct iv_ca *
6373 iv_ca_new (struct ivopts_data *data)
6375 struct iv_ca *nw = XNEW (struct iv_ca);
6377 nw->upto = 0;
6378 nw->bad_uses = 0;
6379 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
6380 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
6381 nw->cands = BITMAP_ALLOC (NULL);
6382 nw->n_cands = 0;
6383 nw->n_regs = 0;
6384 nw->cand_use_cost = no_cost;
6385 nw->cand_cost = 0;
6386 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
6387 nw->cost = no_cost;
6388 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
6389 nw->num_used_inv_expr = 0;
6391 return nw;
6394 /* Free memory occupied by the set IVS. */
6396 static void
6397 iv_ca_free (struct iv_ca **ivs)
6399 free ((*ivs)->cand_for_use);
6400 free ((*ivs)->n_cand_uses);
6401 BITMAP_FREE ((*ivs)->cands);
6402 free ((*ivs)->n_invariant_uses);
6403 free ((*ivs)->used_inv_expr);
6404 free (*ivs);
6405 *ivs = NULL;
6408 /* Dumps IVS to FILE. */
6410 static void
6411 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6413 const char *pref = " invariants ";
6414 unsigned i;
6415 comp_cost cost = iv_ca_cost (ivs);
6417 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
6418 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
6419 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6420 bitmap_print (file, ivs->cands, " candidates: ","\n");
6422 for (i = 0; i < ivs->upto; i++)
6424 struct iv_use *use = iv_use (data, i);
6425 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
6426 if (cp)
6427 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
6428 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
6429 else
6430 fprintf (file, " use:%d --> ??\n", use->id);
6433 for (i = 1; i <= data->max_inv_id; i++)
6434 if (ivs->n_invariant_uses[i])
6436 fprintf (file, "%s%d", pref, i);
6437 pref = ", ";
6439 fprintf (file, "\n\n");
6442 /* Try changing candidate in IVS to CAND for each use. Return cost of the
6443 new set, and store differences in DELTA. Number of induction variables
6444 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6445 the function will try to find a solution with mimimal iv candidates. */
6447 static comp_cost
6448 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6449 struct iv_cand *cand, struct iv_ca_delta **delta,
6450 unsigned *n_ivs, bool min_ncand)
6452 unsigned i;
6453 comp_cost cost;
6454 struct iv_use *use;
6455 struct cost_pair *old_cp, *new_cp;
6457 *delta = NULL;
6458 for (i = 0; i < ivs->upto; i++)
6460 use = iv_use (data, i);
6461 old_cp = iv_ca_cand_for_use (ivs, use);
6463 if (old_cp
6464 && old_cp->cand == cand)
6465 continue;
6467 new_cp = get_use_iv_cost (data, use, cand);
6468 if (!new_cp)
6469 continue;
6471 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6472 continue;
6474 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6475 continue;
6477 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6480 iv_ca_delta_commit (data, ivs, *delta, true);
6481 cost = iv_ca_cost (ivs);
6482 if (n_ivs)
6483 *n_ivs = iv_ca_n_cands (ivs);
6484 iv_ca_delta_commit (data, ivs, *delta, false);
6486 return cost;
6489 /* Try narrowing set IVS by removing CAND. Return the cost of
6490 the new set and store the differences in DELTA. START is
6491 the candidate with which we start narrowing. */
6493 static comp_cost
6494 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6495 struct iv_cand *cand, struct iv_cand *start,
6496 struct iv_ca_delta **delta)
6498 unsigned i, ci;
6499 struct iv_use *use;
6500 struct cost_pair *old_cp, *new_cp, *cp;
6501 bitmap_iterator bi;
6502 struct iv_cand *cnd;
6503 comp_cost cost, best_cost, acost;
6505 *delta = NULL;
6506 for (i = 0; i < n_iv_uses (data); i++)
6508 use = iv_use (data, i);
6510 old_cp = iv_ca_cand_for_use (ivs, use);
6511 if (old_cp->cand != cand)
6512 continue;
6514 best_cost = iv_ca_cost (ivs);
6515 /* Start narrowing with START. */
6516 new_cp = get_use_iv_cost (data, use, start);
6518 if (data->consider_all_candidates)
6520 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6522 if (ci == cand->id || (start && ci == start->id))
6523 continue;
6525 cnd = iv_cand (data, ci);
6527 cp = get_use_iv_cost (data, use, cnd);
6528 if (!cp)
6529 continue;
6531 iv_ca_set_cp (data, ivs, use, cp);
6532 acost = iv_ca_cost (ivs);
6534 if (compare_costs (acost, best_cost) < 0)
6536 best_cost = acost;
6537 new_cp = cp;
6541 else
6543 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
6545 if (ci == cand->id || (start && ci == start->id))
6546 continue;
6548 cnd = iv_cand (data, ci);
6550 cp = get_use_iv_cost (data, use, cnd);
6551 if (!cp)
6552 continue;
6554 iv_ca_set_cp (data, ivs, use, cp);
6555 acost = iv_ca_cost (ivs);
6557 if (compare_costs (acost, best_cost) < 0)
6559 best_cost = acost;
6560 new_cp = cp;
6564 /* Restore to old cp for use. */
6565 iv_ca_set_cp (data, ivs, use, old_cp);
6567 if (!new_cp)
6569 iv_ca_delta_free (delta);
6570 return infinite_cost;
6573 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6576 iv_ca_delta_commit (data, ivs, *delta, true);
6577 cost = iv_ca_cost (ivs);
6578 iv_ca_delta_commit (data, ivs, *delta, false);
6580 return cost;
6583 /* Try optimizing the set of candidates IVS by removing candidates different
6584 from to EXCEPT_CAND from it. Return cost of the new set, and store
6585 differences in DELTA. */
6587 static comp_cost
6588 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6589 struct iv_cand *except_cand, struct iv_ca_delta **delta)
6591 bitmap_iterator bi;
6592 struct iv_ca_delta *act_delta, *best_delta;
6593 unsigned i;
6594 comp_cost best_cost, acost;
6595 struct iv_cand *cand;
6597 best_delta = NULL;
6598 best_cost = iv_ca_cost (ivs);
6600 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6602 cand = iv_cand (data, i);
6604 if (cand == except_cand)
6605 continue;
6607 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6609 if (compare_costs (acost, best_cost) < 0)
6611 best_cost = acost;
6612 iv_ca_delta_free (&best_delta);
6613 best_delta = act_delta;
6615 else
6616 iv_ca_delta_free (&act_delta);
6619 if (!best_delta)
6621 *delta = NULL;
6622 return best_cost;
6625 /* Recurse to possibly remove other unnecessary ivs. */
6626 iv_ca_delta_commit (data, ivs, best_delta, true);
6627 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6628 iv_ca_delta_commit (data, ivs, best_delta, false);
6629 *delta = iv_ca_delta_join (best_delta, *delta);
6630 return best_cost;
6633 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6634 cheaper local cost for USE than BEST_CP. Return pointer to
6635 the corresponding cost_pair, otherwise just return BEST_CP. */
6637 static struct cost_pair*
6638 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
6639 unsigned int cand_idx, struct iv_cand *old_cand,
6640 struct cost_pair *best_cp)
6642 struct iv_cand *cand;
6643 struct cost_pair *cp;
6645 gcc_assert (old_cand != NULL && best_cp != NULL);
6646 if (cand_idx == old_cand->id)
6647 return best_cp;
6649 cand = iv_cand (data, cand_idx);
6650 cp = get_use_iv_cost (data, use, cand);
6651 if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6652 return cp;
6654 return best_cp;
6657 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6658 which are used by more than one iv uses. For each of those candidates,
6659 this function tries to represent iv uses under that candidate using
6660 other ones with lower local cost, then tries to prune the new set.
6661 If the new set has lower cost, It returns the new cost after recording
6662 candidate replacement in list DELTA. */
6664 static comp_cost
6665 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6666 struct iv_ca_delta **delta)
6668 bitmap_iterator bi, bj;
6669 unsigned int i, j, k;
6670 struct iv_use *use;
6671 struct iv_cand *cand;
6672 comp_cost orig_cost, acost;
6673 struct iv_ca_delta *act_delta, *tmp_delta;
6674 struct cost_pair *old_cp, *best_cp = NULL;
6676 *delta = NULL;
6677 orig_cost = iv_ca_cost (ivs);
6679 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6681 if (ivs->n_cand_uses[i] == 1
6682 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6683 continue;
6685 cand = iv_cand (data, i);
6687 act_delta = NULL;
6688 /* Represent uses under current candidate using other ones with
6689 lower local cost. */
6690 for (j = 0; j < ivs->upto; j++)
6692 use = iv_use (data, j);
6693 old_cp = iv_ca_cand_for_use (ivs, use);
6695 if (old_cp->cand != cand)
6696 continue;
6698 best_cp = old_cp;
6699 if (data->consider_all_candidates)
6700 for (k = 0; k < n_iv_cands (data); k++)
6701 best_cp = cheaper_cost_with_cand (data, use, k,
6702 old_cp->cand, best_cp);
6703 else
6704 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
6705 best_cp = cheaper_cost_with_cand (data, use, k,
6706 old_cp->cand, best_cp);
6708 if (best_cp == old_cp)
6709 continue;
6711 act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
6713 /* No need for further prune. */
6714 if (!act_delta)
6715 continue;
6717 /* Prune the new candidate set. */
6718 iv_ca_delta_commit (data, ivs, act_delta, true);
6719 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6720 iv_ca_delta_commit (data, ivs, act_delta, false);
6721 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6723 if (compare_costs (acost, orig_cost) < 0)
6725 *delta = act_delta;
6726 return acost;
6728 else
6729 iv_ca_delta_free (&act_delta);
6732 return orig_cost;
6735 /* Tries to extend the sets IVS in the best possible way in order
6736 to express the USE. If ORIGINALP is true, prefer candidates from
6737 the original set of IVs, otherwise favor important candidates not
6738 based on any memory object. */
6740 static bool
6741 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6742 struct iv_use *use, bool originalp)
6744 comp_cost best_cost, act_cost;
6745 unsigned i;
6746 bitmap_iterator bi;
6747 struct iv_cand *cand;
6748 struct iv_ca_delta *best_delta = NULL, *act_delta;
6749 struct cost_pair *cp;
6751 iv_ca_add_use (data, ivs, use);
6752 best_cost = iv_ca_cost (ivs);
6753 cp = iv_ca_cand_for_use (ivs, use);
6754 if (cp)
6756 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6757 iv_ca_set_no_cp (data, ivs, use);
6760 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6761 first try important candidates not based on any memory object. Only if
6762 this fails, try the specific ones. Rationale -- in loops with many
6763 variables the best choice often is to use just one generic biv. If we
6764 added here many ivs specific to the uses, the optimization algorithm later
6765 would be likely to get stuck in a local minimum, thus causing us to create
6766 too many ivs. The approach from few ivs to more seems more likely to be
6767 successful -- starting from few ivs, replacing an expensive use by a
6768 specific iv should always be a win. */
6769 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, i, bi)
6771 cand = iv_cand (data, i);
6773 if (originalp && cand->pos !=IP_ORIGINAL)
6774 continue;
6776 if (!originalp && cand->iv->base_object != NULL_TREE)
6777 continue;
6779 if (iv_ca_cand_used_p (ivs, cand))
6780 continue;
6782 cp = get_use_iv_cost (data, use, cand);
6783 if (!cp)
6784 continue;
6786 iv_ca_set_cp (data, ivs, use, cp);
6787 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6788 true);
6789 iv_ca_set_no_cp (data, ivs, use);
6790 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6792 if (compare_costs (act_cost, best_cost) < 0)
6794 best_cost = act_cost;
6796 iv_ca_delta_free (&best_delta);
6797 best_delta = act_delta;
6799 else
6800 iv_ca_delta_free (&act_delta);
6803 if (infinite_cost_p (best_cost))
6805 for (i = 0; i < use->n_map_members; i++)
6807 cp = use->cost_map + i;
6808 cand = cp->cand;
6809 if (!cand)
6810 continue;
6812 /* Already tried this. */
6813 if (cand->important)
6815 if (originalp && cand->pos == IP_ORIGINAL)
6816 continue;
6817 if (!originalp && cand->iv->base_object == NULL_TREE)
6818 continue;
6821 if (iv_ca_cand_used_p (ivs, cand))
6822 continue;
6824 act_delta = NULL;
6825 iv_ca_set_cp (data, ivs, use, cp);
6826 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6827 iv_ca_set_no_cp (data, ivs, use);
6828 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6829 cp, act_delta);
6831 if (compare_costs (act_cost, best_cost) < 0)
6833 best_cost = act_cost;
6835 if (best_delta)
6836 iv_ca_delta_free (&best_delta);
6837 best_delta = act_delta;
6839 else
6840 iv_ca_delta_free (&act_delta);
6844 iv_ca_delta_commit (data, ivs, best_delta, true);
6845 iv_ca_delta_free (&best_delta);
6847 return !infinite_cost_p (best_cost);
6850 /* Finds an initial assignment of candidates to uses. */
6852 static struct iv_ca *
6853 get_initial_solution (struct ivopts_data *data, bool originalp)
6855 struct iv_ca *ivs = iv_ca_new (data);
6856 unsigned i;
6858 for (i = 0; i < n_iv_uses (data); i++)
6859 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6861 iv_ca_free (&ivs);
6862 return NULL;
6865 return ivs;
6868 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6869 points to a bool variable, this function tries to break local
6870 optimal fixed-point by replacing candidates in IVS if it's true. */
6872 static bool
6873 try_improve_iv_set (struct ivopts_data *data,
6874 struct iv_ca *ivs, bool *try_replace_p)
6876 unsigned i, n_ivs;
6877 comp_cost acost, best_cost = iv_ca_cost (ivs);
6878 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6879 struct iv_cand *cand;
6881 /* Try extending the set of induction variables by one. */
6882 for (i = 0; i < n_iv_cands (data); i++)
6884 cand = iv_cand (data, i);
6886 if (iv_ca_cand_used_p (ivs, cand))
6887 continue;
6889 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6890 if (!act_delta)
6891 continue;
6893 /* If we successfully added the candidate and the set is small enough,
6894 try optimizing it by removing other candidates. */
6895 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6897 iv_ca_delta_commit (data, ivs, act_delta, true);
6898 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6899 iv_ca_delta_commit (data, ivs, act_delta, false);
6900 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6903 if (compare_costs (acost, best_cost) < 0)
6905 best_cost = acost;
6906 iv_ca_delta_free (&best_delta);
6907 best_delta = act_delta;
6909 else
6910 iv_ca_delta_free (&act_delta);
6913 if (!best_delta)
6915 /* Try removing the candidates from the set instead. */
6916 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6918 if (!best_delta && *try_replace_p)
6920 *try_replace_p = false;
6921 /* So far candidate selecting algorithm tends to choose fewer IVs
6922 so that it can handle cases in which loops have many variables
6923 but the best choice is often to use only one general biv. One
6924 weakness is it can't handle opposite cases, in which different
6925 candidates should be chosen with respect to each use. To solve
6926 the problem, we replace candidates in a manner described by the
6927 comments of iv_ca_replace, thus give general algorithm a chance
6928 to break local optimal fixed-point in these cases. */
6929 best_cost = iv_ca_replace (data, ivs, &best_delta);
6932 if (!best_delta)
6933 return false;
6936 iv_ca_delta_commit (data, ivs, best_delta, true);
6937 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6938 iv_ca_delta_free (&best_delta);
6939 return true;
6942 /* Attempts to find the optimal set of induction variables. We do simple
6943 greedy heuristic -- we try to replace at most one candidate in the selected
6944 solution and remove the unused ivs while this improves the cost. */
6946 static struct iv_ca *
6947 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6949 struct iv_ca *set;
6950 bool try_replace_p = true;
6952 /* Get the initial solution. */
6953 set = get_initial_solution (data, originalp);
6954 if (!set)
6956 if (dump_file && (dump_flags & TDF_DETAILS))
6957 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6958 return NULL;
6961 if (dump_file && (dump_flags & TDF_DETAILS))
6963 fprintf (dump_file, "Initial set of candidates:\n");
6964 iv_ca_dump (data, dump_file, set);
6967 while (try_improve_iv_set (data, set, &try_replace_p))
6969 if (dump_file && (dump_flags & TDF_DETAILS))
6971 fprintf (dump_file, "Improved to:\n");
6972 iv_ca_dump (data, dump_file, set);
6976 return set;
6979 static struct iv_ca *
6980 find_optimal_iv_set (struct ivopts_data *data)
6982 unsigned i;
6983 struct iv_ca *set, *origset;
6984 struct iv_use *use;
6985 comp_cost cost, origcost;
6987 /* Determine the cost based on a strategy that starts with original IVs,
6988 and try again using a strategy that prefers candidates not based
6989 on any IVs. */
6990 origset = find_optimal_iv_set_1 (data, true);
6991 set = find_optimal_iv_set_1 (data, false);
6993 if (!origset && !set)
6994 return NULL;
6996 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6997 cost = set ? iv_ca_cost (set) : infinite_cost;
6999 if (dump_file && (dump_flags & TDF_DETAILS))
7001 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
7002 origcost.cost, origcost.complexity);
7003 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
7004 cost.cost, cost.complexity);
7007 /* Choose the one with the best cost. */
7008 if (compare_costs (origcost, cost) <= 0)
7010 if (set)
7011 iv_ca_free (&set);
7012 set = origset;
7014 else if (origset)
7015 iv_ca_free (&origset);
7017 for (i = 0; i < n_iv_uses (data); i++)
7019 use = iv_use (data, i);
7020 use->selected = iv_ca_cand_for_use (set, use)->cand;
7023 return set;
7026 /* Creates a new induction variable corresponding to CAND. */
7028 static void
7029 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
7031 gimple_stmt_iterator incr_pos;
7032 tree base;
7033 bool after = false;
7035 if (!cand->iv)
7036 return;
7038 switch (cand->pos)
7040 case IP_NORMAL:
7041 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
7042 break;
7044 case IP_END:
7045 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
7046 after = true;
7047 break;
7049 case IP_AFTER_USE:
7050 after = true;
7051 /* fall through */
7052 case IP_BEFORE_USE:
7053 incr_pos = gsi_for_stmt (cand->incremented_at);
7054 break;
7056 case IP_ORIGINAL:
7057 /* Mark that the iv is preserved. */
7058 name_info (data, cand->var_before)->preserve_biv = true;
7059 name_info (data, cand->var_after)->preserve_biv = true;
7061 /* Rewrite the increment so that it uses var_before directly. */
7062 find_interesting_uses_op (data, cand->var_after)->selected = cand;
7063 return;
7066 gimple_add_tmp_var (cand->var_before);
7068 base = unshare_expr (cand->iv->base);
7070 create_iv (base, unshare_expr (cand->iv->step),
7071 cand->var_before, data->current_loop,
7072 &incr_pos, after, &cand->var_before, &cand->var_after);
7075 /* Creates new induction variables described in SET. */
7077 static void
7078 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
7080 unsigned i;
7081 struct iv_cand *cand;
7082 bitmap_iterator bi;
7084 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7086 cand = iv_cand (data, i);
7087 create_new_iv (data, cand);
7090 if (dump_file && (dump_flags & TDF_DETAILS))
7092 fprintf (dump_file, "Selected IV set for loop %d",
7093 data->current_loop->num);
7094 if (data->loop_loc != UNKNOWN_LOCATION)
7095 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7096 LOCATION_LINE (data->loop_loc));
7097 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
7098 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7100 cand = iv_cand (data, i);
7101 dump_cand (dump_file, cand);
7103 fprintf (dump_file, "\n");
7107 /* Rewrites USE (definition of iv used in a nonlinear expression)
7108 using candidate CAND. */
7110 static void
7111 rewrite_use_nonlinear_expr (struct ivopts_data *data,
7112 struct iv_use *use, struct iv_cand *cand)
7114 tree comp;
7115 tree op, tgt;
7116 gassign *ass;
7117 gimple_stmt_iterator bsi;
7119 /* An important special case -- if we are asked to express value of
7120 the original iv by itself, just exit; there is no need to
7121 introduce a new computation (that might also need casting the
7122 variable to unsigned and back). */
7123 if (cand->pos == IP_ORIGINAL
7124 && cand->incremented_at == use->stmt)
7126 enum tree_code stmt_code;
7128 gcc_assert (is_gimple_assign (use->stmt));
7129 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7131 /* Check whether we may leave the computation unchanged.
7132 This is the case only if it does not rely on other
7133 computations in the loop -- otherwise, the computation
7134 we rely upon may be removed in remove_unused_ivs,
7135 thus leading to ICE. */
7136 stmt_code = gimple_assign_rhs_code (use->stmt);
7137 if (stmt_code == PLUS_EXPR
7138 || stmt_code == MINUS_EXPR
7139 || stmt_code == POINTER_PLUS_EXPR)
7141 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
7142 op = gimple_assign_rhs2 (use->stmt);
7143 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
7144 op = gimple_assign_rhs1 (use->stmt);
7145 else
7146 op = NULL_TREE;
7148 else
7149 op = NULL_TREE;
7151 if (op && expr_invariant_in_loop_p (data->current_loop, op))
7152 return;
7155 comp = get_computation (data->current_loop, use, cand);
7156 gcc_assert (comp != NULL_TREE);
7158 switch (gimple_code (use->stmt))
7160 case GIMPLE_PHI:
7161 tgt = PHI_RESULT (use->stmt);
7163 /* If we should keep the biv, do not replace it. */
7164 if (name_info (data, tgt)->preserve_biv)
7165 return;
7167 bsi = gsi_after_labels (gimple_bb (use->stmt));
7168 break;
7170 case GIMPLE_ASSIGN:
7171 tgt = gimple_assign_lhs (use->stmt);
7172 bsi = gsi_for_stmt (use->stmt);
7173 break;
7175 default:
7176 gcc_unreachable ();
7179 if (!valid_gimple_rhs_p (comp)
7180 || (gimple_code (use->stmt) != GIMPLE_PHI
7181 /* We can't allow re-allocating the stmt as it might be pointed
7182 to still. */
7183 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7184 >= gimple_num_ops (gsi_stmt (bsi)))))
7186 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
7187 true, GSI_SAME_STMT);
7188 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7190 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7191 /* As this isn't a plain copy we have to reset alignment
7192 information. */
7193 if (SSA_NAME_PTR_INFO (comp))
7194 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7198 if (gimple_code (use->stmt) == GIMPLE_PHI)
7200 ass = gimple_build_assign (tgt, comp);
7201 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7203 bsi = gsi_for_stmt (use->stmt);
7204 remove_phi_node (&bsi, false);
7206 else
7208 gimple_assign_set_rhs_from_tree (&bsi, comp);
7209 use->stmt = gsi_stmt (bsi);
7213 /* Performs a peephole optimization to reorder the iv update statement with
7214 a mem ref to enable instruction combining in later phases. The mem ref uses
7215 the iv value before the update, so the reordering transformation requires
7216 adjustment of the offset. CAND is the selected IV_CAND.
7218 Example:
7220 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
7221 iv2 = iv1 + 1;
7223 if (t < val) (1)
7224 goto L;
7225 goto Head;
7228 directly propagating t over to (1) will introduce overlapping live range
7229 thus increase register pressure. This peephole transform it into:
7232 iv2 = iv1 + 1;
7233 t = MEM_REF (base, iv2, 8, 8);
7234 if (t < val)
7235 goto L;
7236 goto Head;
7239 static void
7240 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7242 tree var_after;
7243 gimple *iv_update, *stmt;
7244 basic_block bb;
7245 gimple_stmt_iterator gsi, gsi_iv;
7247 if (cand->pos != IP_NORMAL)
7248 return;
7250 var_after = cand->var_after;
7251 iv_update = SSA_NAME_DEF_STMT (var_after);
7253 bb = gimple_bb (iv_update);
7254 gsi = gsi_last_nondebug_bb (bb);
7255 stmt = gsi_stmt (gsi);
7257 /* Only handle conditional statement for now. */
7258 if (gimple_code (stmt) != GIMPLE_COND)
7259 return;
7261 gsi_prev_nondebug (&gsi);
7262 stmt = gsi_stmt (gsi);
7263 if (stmt != iv_update)
7264 return;
7266 gsi_prev_nondebug (&gsi);
7267 if (gsi_end_p (gsi))
7268 return;
7270 stmt = gsi_stmt (gsi);
7271 if (gimple_code (stmt) != GIMPLE_ASSIGN)
7272 return;
7274 if (stmt != use->stmt)
7275 return;
7277 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7278 return;
7280 if (dump_file && (dump_flags & TDF_DETAILS))
7282 fprintf (dump_file, "Reordering \n");
7283 print_gimple_stmt (dump_file, iv_update, 0, 0);
7284 print_gimple_stmt (dump_file, use->stmt, 0, 0);
7285 fprintf (dump_file, "\n");
7288 gsi = gsi_for_stmt (use->stmt);
7289 gsi_iv = gsi_for_stmt (iv_update);
7290 gsi_move_before (&gsi_iv, &gsi);
7292 cand->pos = IP_BEFORE_USE;
7293 cand->incremented_at = use->stmt;
7296 /* Rewrites USE (address that is an iv) using candidate CAND. */
7298 static void
7299 rewrite_use_address_1 (struct ivopts_data *data,
7300 struct iv_use *use, struct iv_cand *cand)
7302 aff_tree aff;
7303 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7304 tree base_hint = NULL_TREE;
7305 tree ref, iv;
7306 bool ok;
7308 adjust_iv_update_pos (cand, use);
7309 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
7310 gcc_assert (ok);
7311 unshare_aff_combination (&aff);
7313 /* To avoid undefined overflow problems, all IV candidates use unsigned
7314 integer types. The drawback is that this makes it impossible for
7315 create_mem_ref to distinguish an IV that is based on a memory object
7316 from one that represents simply an offset.
7318 To work around this problem, we pass a hint to create_mem_ref that
7319 indicates which variable (if any) in aff is an IV based on a memory
7320 object. Note that we only consider the candidate. If this is not
7321 based on an object, the base of the reference is in some subexpression
7322 of the use -- but these will use pointer types, so they are recognized
7323 by the create_mem_ref heuristics anyway. */
7324 if (cand->iv->base_object)
7325 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
7327 iv = var_at_stmt (data->current_loop, cand, use->stmt);
7328 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
7329 reference_alias_ptr_type (*use->op_p),
7330 iv, base_hint, data->speed);
7331 copy_ref_info (ref, *use->op_p);
7332 *use->op_p = ref;
7335 /* Rewrites USE (address that is an iv) using candidate CAND. If it's the
7336 first use of a group, rewrites sub uses in the group too. */
7338 static void
7339 rewrite_use_address (struct ivopts_data *data,
7340 struct iv_use *use, struct iv_cand *cand)
7342 struct iv_use *next;
7344 gcc_assert (use->sub_id == 0);
7345 rewrite_use_address_1 (data, use, cand);
7346 update_stmt (use->stmt);
7348 for (next = use->next; next != NULL; next = next->next)
7350 rewrite_use_address_1 (data, next, cand);
7351 update_stmt (next->stmt);
7354 return;
7357 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7358 candidate CAND. */
7360 static void
7361 rewrite_use_compare (struct ivopts_data *data,
7362 struct iv_use *use, struct iv_cand *cand)
7364 tree comp, *var_p, op, bound;
7365 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7366 enum tree_code compare;
7367 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
7368 bool ok;
7370 bound = cp->value;
7371 if (bound)
7373 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7374 tree var_type = TREE_TYPE (var);
7375 gimple_seq stmts;
7377 if (dump_file && (dump_flags & TDF_DETAILS))
7379 fprintf (dump_file, "Replacing exit test: ");
7380 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7382 compare = cp->comp;
7383 bound = unshare_expr (fold_convert (var_type, bound));
7384 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7385 if (stmts)
7386 gsi_insert_seq_on_edge_immediate (
7387 loop_preheader_edge (data->current_loop),
7388 stmts);
7390 gcond *cond_stmt = as_a <gcond *> (use->stmt);
7391 gimple_cond_set_lhs (cond_stmt, var);
7392 gimple_cond_set_code (cond_stmt, compare);
7393 gimple_cond_set_rhs (cond_stmt, op);
7394 return;
7397 /* The induction variable elimination failed; just express the original
7398 giv. */
7399 comp = get_computation (data->current_loop, use, cand);
7400 gcc_assert (comp != NULL_TREE);
7402 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
7403 gcc_assert (ok);
7405 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
7406 true, GSI_SAME_STMT);
7409 /* Rewrites USE using candidate CAND. */
7411 static void
7412 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
7414 switch (use->type)
7416 case USE_NONLINEAR_EXPR:
7417 rewrite_use_nonlinear_expr (data, use, cand);
7418 break;
7420 case USE_ADDRESS:
7421 rewrite_use_address (data, use, cand);
7422 break;
7424 case USE_COMPARE:
7425 rewrite_use_compare (data, use, cand);
7426 break;
7428 default:
7429 gcc_unreachable ();
7432 update_stmt (use->stmt);
7435 /* Rewrite the uses using the selected induction variables. */
7437 static void
7438 rewrite_uses (struct ivopts_data *data)
7440 unsigned i;
7441 struct iv_cand *cand;
7442 struct iv_use *use;
7444 for (i = 0; i < n_iv_uses (data); i++)
7446 use = iv_use (data, i);
7447 cand = use->selected;
7448 gcc_assert (cand);
7450 rewrite_use (data, use, cand);
7454 /* Removes the ivs that are not used after rewriting. */
7456 static void
7457 remove_unused_ivs (struct ivopts_data *data)
7459 unsigned j;
7460 bitmap_iterator bi;
7461 bitmap toremove = BITMAP_ALLOC (NULL);
7463 /* Figure out an order in which to release SSA DEFs so that we don't
7464 release something that we'd have to propagate into a debug stmt
7465 afterwards. */
7466 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7468 struct version_info *info;
7470 info = ver_info (data, j);
7471 if (info->iv
7472 && !integer_zerop (info->iv->step)
7473 && !info->inv_id
7474 && !info->iv->have_use_for
7475 && !info->preserve_biv)
7477 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7479 tree def = info->iv->ssa_name;
7481 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7483 imm_use_iterator imm_iter;
7484 use_operand_p use_p;
7485 gimple *stmt;
7486 int count = 0;
7488 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7490 if (!gimple_debug_bind_p (stmt))
7491 continue;
7493 /* We just want to determine whether to do nothing
7494 (count == 0), to substitute the computed
7495 expression into a single use of the SSA DEF by
7496 itself (count == 1), or to use a debug temp
7497 because the SSA DEF is used multiple times or as
7498 part of a larger expression (count > 1). */
7499 count++;
7500 if (gimple_debug_bind_get_value (stmt) != def)
7501 count++;
7503 if (count > 1)
7504 BREAK_FROM_IMM_USE_STMT (imm_iter);
7507 if (!count)
7508 continue;
7510 struct iv_use dummy_use;
7511 struct iv_cand *best_cand = NULL, *cand;
7512 unsigned i, best_pref = 0, cand_pref;
7514 memset (&dummy_use, 0, sizeof (dummy_use));
7515 dummy_use.iv = info->iv;
7516 for (i = 0; i < n_iv_uses (data) && i < 64; i++)
7518 cand = iv_use (data, i)->selected;
7519 if (cand == best_cand)
7520 continue;
7521 cand_pref = operand_equal_p (cand->iv->step,
7522 info->iv->step, 0)
7523 ? 4 : 0;
7524 cand_pref
7525 += TYPE_MODE (TREE_TYPE (cand->iv->base))
7526 == TYPE_MODE (TREE_TYPE (info->iv->base))
7527 ? 2 : 0;
7528 cand_pref
7529 += TREE_CODE (cand->iv->base) == INTEGER_CST
7530 ? 1 : 0;
7531 if (best_cand == NULL || best_pref < cand_pref)
7533 best_cand = cand;
7534 best_pref = cand_pref;
7538 if (!best_cand)
7539 continue;
7541 tree comp = get_computation_at (data->current_loop,
7542 &dummy_use, best_cand,
7543 SSA_NAME_DEF_STMT (def));
7544 if (!comp)
7545 continue;
7547 if (count > 1)
7549 tree vexpr = make_node (DEBUG_EXPR_DECL);
7550 DECL_ARTIFICIAL (vexpr) = 1;
7551 TREE_TYPE (vexpr) = TREE_TYPE (comp);
7552 if (SSA_NAME_VAR (def))
7553 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
7554 else
7555 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
7556 gdebug *def_temp
7557 = gimple_build_debug_bind (vexpr, comp, NULL);
7558 gimple_stmt_iterator gsi;
7560 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7561 gsi = gsi_after_labels (gimple_bb
7562 (SSA_NAME_DEF_STMT (def)));
7563 else
7564 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7566 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7567 comp = vexpr;
7570 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7572 if (!gimple_debug_bind_p (stmt))
7573 continue;
7575 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7576 SET_USE (use_p, comp);
7578 update_stmt (stmt);
7584 release_defs_bitset (toremove);
7586 BITMAP_FREE (toremove);
7589 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7590 for hash_map::traverse. */
7592 bool
7593 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7595 free (value);
7596 return true;
7599 /* Frees data allocated by the optimization of a single loop. */
7601 static void
7602 free_loop_data (struct ivopts_data *data)
7604 unsigned i, j;
7605 bitmap_iterator bi;
7606 tree obj;
7608 if (data->niters)
7610 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7611 delete data->niters;
7612 data->niters = NULL;
7615 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7617 struct version_info *info;
7619 info = ver_info (data, i);
7620 info->iv = NULL;
7621 info->has_nonlin_use = false;
7622 info->preserve_biv = false;
7623 info->inv_id = 0;
7625 bitmap_clear (data->relevant);
7626 bitmap_clear (data->important_candidates);
7628 for (i = 0; i < n_iv_uses (data); i++)
7630 struct iv_use *use = iv_use (data, i);
7631 struct iv_use *pre = use, *sub = use->next;
7633 while (sub)
7635 gcc_assert (sub->related_cands == NULL);
7636 gcc_assert (sub->n_map_members == 0 && sub->cost_map == NULL);
7638 pre = sub;
7639 sub = sub->next;
7640 free (pre);
7643 BITMAP_FREE (use->related_cands);
7644 for (j = 0; j < use->n_map_members; j++)
7645 if (use->cost_map[j].depends_on)
7646 BITMAP_FREE (use->cost_map[j].depends_on);
7647 free (use->cost_map);
7648 free (use);
7650 data->iv_uses.truncate (0);
7652 for (i = 0; i < n_iv_cands (data); i++)
7654 struct iv_cand *cand = iv_cand (data, i);
7656 if (cand->depends_on)
7657 BITMAP_FREE (cand->depends_on);
7658 free (cand);
7660 data->iv_candidates.truncate (0);
7662 if (data->version_info_size < num_ssa_names)
7664 data->version_info_size = 2 * num_ssa_names;
7665 free (data->version_info);
7666 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7669 data->max_inv_id = 0;
7671 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7672 SET_DECL_RTL (obj, NULL_RTX);
7674 decl_rtl_to_reset.truncate (0);
7676 data->inv_expr_tab->empty ();
7677 data->inv_expr_id = 0;
7679 data->iv_common_cand_tab->empty ();
7680 data->iv_common_cands.truncate (0);
7683 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7684 loop tree. */
7686 static void
7687 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7689 free_loop_data (data);
7690 free (data->version_info);
7691 BITMAP_FREE (data->relevant);
7692 BITMAP_FREE (data->important_candidates);
7694 decl_rtl_to_reset.release ();
7695 data->iv_uses.release ();
7696 data->iv_candidates.release ();
7697 delete data->inv_expr_tab;
7698 data->inv_expr_tab = NULL;
7699 free_affine_expand_cache (&data->name_expansion_cache);
7700 delete data->iv_common_cand_tab;
7701 data->iv_common_cand_tab = NULL;
7702 data->iv_common_cands.release ();
7703 obstack_free (&data->iv_obstack, NULL);
7706 /* Returns true if the loop body BODY includes any function calls. */
7708 static bool
7709 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7711 gimple_stmt_iterator gsi;
7712 unsigned i;
7714 for (i = 0; i < num_nodes; i++)
7715 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7717 gimple *stmt = gsi_stmt (gsi);
7718 if (is_gimple_call (stmt)
7719 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7720 return true;
7722 return false;
7725 /* Optimizes the LOOP. Returns true if anything changed. */
7727 static bool
7728 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7730 bool changed = false;
7731 struct iv_ca *iv_ca;
7732 edge exit = single_dom_exit (loop);
7733 basic_block *body;
7735 gcc_assert (!data->niters);
7736 data->current_loop = loop;
7737 data->loop_loc = find_loop_location (loop);
7738 data->speed = optimize_loop_for_speed_p (loop);
7740 if (dump_file && (dump_flags & TDF_DETAILS))
7742 fprintf (dump_file, "Processing loop %d", loop->num);
7743 if (data->loop_loc != UNKNOWN_LOCATION)
7744 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7745 LOCATION_LINE (data->loop_loc));
7746 fprintf (dump_file, "\n");
7748 if (exit)
7750 fprintf (dump_file, " single exit %d -> %d, exit condition ",
7751 exit->src->index, exit->dest->index);
7752 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7753 fprintf (dump_file, "\n");
7756 fprintf (dump_file, "\n");
7759 body = get_loop_body (loop);
7760 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7761 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7762 free (body);
7764 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7766 /* For each ssa name determines whether it behaves as an induction variable
7767 in some loop. */
7768 if (!find_induction_variables (data))
7769 goto finish;
7771 /* Finds interesting uses (item 1). */
7772 find_interesting_uses (data);
7773 group_address_uses (data);
7774 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
7775 goto finish;
7777 /* Finds candidates for the induction variables (item 2). */
7778 find_iv_candidates (data);
7780 /* Calculates the costs (item 3, part 1). */
7781 determine_iv_costs (data);
7782 determine_use_iv_costs (data);
7783 determine_set_costs (data);
7785 /* Find the optimal set of induction variables (item 3, part 2). */
7786 iv_ca = find_optimal_iv_set (data);
7787 if (!iv_ca)
7788 goto finish;
7789 changed = true;
7791 /* Create the new induction variables (item 4, part 1). */
7792 create_new_ivs (data, iv_ca);
7793 iv_ca_free (&iv_ca);
7795 /* Rewrite the uses (item 4, part 2). */
7796 rewrite_uses (data);
7798 /* Remove the ivs that are unused after rewriting. */
7799 remove_unused_ivs (data);
7801 /* We have changed the structure of induction variables; it might happen
7802 that definitions in the scev database refer to some of them that were
7803 eliminated. */
7804 scev_reset ();
7806 finish:
7807 free_loop_data (data);
7809 return changed;
7812 /* Main entry point. Optimizes induction variables in loops. */
7814 void
7815 tree_ssa_iv_optimize (void)
7817 struct loop *loop;
7818 struct ivopts_data data;
7820 tree_ssa_iv_optimize_init (&data);
7822 /* Optimize the loops starting with the innermost ones. */
7823 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7825 if (dump_file && (dump_flags & TDF_DETAILS))
7826 flow_loop_dump (loop, dump_file, NULL, 1);
7828 tree_ssa_iv_optimize_loop (&data, loop);
7831 tree_ssa_iv_optimize_finalize (&data);