2015-09-25 Vladimir Makarov <vmakarov@redhat.com>
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob1ddd8bdfd21b9501b1677a155c4fee52db943aea
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "backend.h"
68 #include "cfghooks.h"
69 #include "tree.h"
70 #include "gimple.h"
71 #include "rtl.h"
72 #include "ssa.h"
73 #include "alias.h"
74 #include "fold-const.h"
75 #include "stor-layout.h"
76 #include "tm_p.h"
77 #include "gimple-pretty-print.h"
78 #include "internal-fn.h"
79 #include "tree-eh.h"
80 #include "gimplify.h"
81 #include "gimple-iterator.h"
82 #include "gimplify-me.h"
83 #include "cgraph.h"
84 #include "tree-cfg.h"
85 #include "tree-ssa-loop-ivopts.h"
86 #include "tree-ssa-loop-manip.h"
87 #include "tree-ssa-loop-niter.h"
88 #include "tree-ssa-loop.h"
89 #include "flags.h"
90 #include "insn-config.h"
91 #include "expmed.h"
92 #include "dojump.h"
93 #include "explow.h"
94 #include "calls.h"
95 #include "emit-rtl.h"
96 #include "varasm.h"
97 #include "stmt.h"
98 #include "expr.h"
99 #include "tree-dfa.h"
100 #include "tree-ssa.h"
101 #include "cfgloop.h"
102 #include "tree-pass.h"
103 #include "tree-chrec.h"
104 #include "tree-scalar-evolution.h"
105 #include "params.h"
106 #include "langhooks.h"
107 #include "tree-affine.h"
108 #include "target.h"
109 #include "tree-inline.h"
110 #include "tree-ssa-propagate.h"
111 #include "tree-ssa-address.h"
112 #include "builtins.h"
113 #include "tree-vectorizer.h"
115 /* FIXME: Expressions are expanded to RTL in this pass to determine the
116 cost of different addressing modes. This should be moved to a TBD
117 interface between the GIMPLE and RTL worlds. */
118 #include "recog.h"
120 /* The infinite cost. */
121 #define INFTY 10000000
123 #define AVG_LOOP_NITER(LOOP) 5
125 /* Returns the expected number of loop iterations for LOOP.
126 The average trip count is computed from profile data if it
127 exists. */
129 static inline HOST_WIDE_INT
130 avg_loop_niter (struct loop *loop)
132 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
133 if (niter == -1)
134 return AVG_LOOP_NITER (loop);
136 return niter;
139 /* Representation of the induction variable. */
140 struct iv
142 tree base; /* Initial value of the iv. */
143 tree base_object; /* A memory object to that the induction variable points. */
144 tree step; /* Step of the iv (constant only). */
145 tree ssa_name; /* The ssa name with the value. */
146 unsigned use_id; /* The identifier in the use if it is the case. */
147 bool biv_p; /* Is it a biv? */
148 bool have_use_for; /* Do we already have a use for it? */
149 bool no_overflow; /* True if the iv doesn't overflow. */
150 bool have_address_use;/* For biv, indicate if it's used in any address
151 type use. */
154 /* Per-ssa version information (induction variable descriptions, etc.). */
155 struct version_info
157 tree name; /* The ssa name. */
158 struct iv *iv; /* Induction variable description. */
159 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
160 an expression that is not an induction variable. */
161 bool preserve_biv; /* For the original biv, whether to preserve it. */
162 unsigned inv_id; /* Id of an invariant. */
165 /* Types of uses. */
166 enum use_type
168 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
169 USE_ADDRESS, /* Use in an address. */
170 USE_COMPARE /* Use is a compare. */
173 /* Cost of a computation. */
174 struct comp_cost
176 int cost; /* The runtime cost. */
177 unsigned complexity; /* The estimate of the complexity of the code for
178 the computation (in no concrete units --
179 complexity field should be larger for more
180 complex expressions and addressing modes). */
183 static const comp_cost no_cost = {0, 0};
184 static const comp_cost infinite_cost = {INFTY, INFTY};
186 /* The candidate - cost pair. */
187 struct cost_pair
189 struct iv_cand *cand; /* The candidate. */
190 comp_cost cost; /* The cost. */
191 bitmap depends_on; /* The list of invariants that have to be
192 preserved. */
193 tree value; /* For final value elimination, the expression for
194 the final value of the iv. For iv elimination,
195 the new bound to compare with. */
196 enum tree_code comp; /* For iv elimination, the comparison. */
197 int inv_expr_id; /* Loop invariant expression id. */
200 /* Use. */
201 struct iv_use
203 unsigned id; /* The id of the use. */
204 unsigned sub_id; /* The id of the sub use. */
205 enum use_type type; /* Type of the use. */
206 struct iv *iv; /* The induction variable it is based on. */
207 gimple *stmt; /* Statement in that it occurs. */
208 tree *op_p; /* The place where it occurs. */
209 bitmap related_cands; /* The set of "related" iv candidates, plus the common
210 important ones. */
212 unsigned n_map_members; /* Number of candidates in the cost_map list. */
213 struct cost_pair *cost_map;
214 /* The costs wrto the iv candidates. */
216 struct iv_cand *selected;
217 /* The selected candidate. */
219 struct iv_use *next; /* The next sub use. */
220 tree addr_base; /* Base address with const offset stripped. */
221 unsigned HOST_WIDE_INT addr_offset;
222 /* Const offset stripped from base address. */
225 /* The position where the iv is computed. */
226 enum iv_position
228 IP_NORMAL, /* At the end, just before the exit condition. */
229 IP_END, /* At the end of the latch block. */
230 IP_BEFORE_USE, /* Immediately before a specific use. */
231 IP_AFTER_USE, /* Immediately after a specific use. */
232 IP_ORIGINAL /* The original biv. */
235 /* The induction variable candidate. */
236 struct iv_cand
238 unsigned id; /* The number of the candidate. */
239 bool important; /* Whether this is an "important" candidate, i.e. such
240 that it should be considered by all uses. */
241 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
242 gimple *incremented_at;/* For original biv, the statement where it is
243 incremented. */
244 tree var_before; /* The variable used for it before increment. */
245 tree var_after; /* The variable used for it after increment. */
246 struct iv *iv; /* The value of the candidate. NULL for
247 "pseudocandidate" used to indicate the possibility
248 to replace the final value of an iv by direct
249 computation of the value. */
250 unsigned cost; /* Cost of the candidate. */
251 unsigned cost_step; /* Cost of the candidate's increment operation. */
252 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
253 where it is incremented. */
254 bitmap depends_on; /* The list of invariants that are used in step of the
255 biv. */
256 struct iv *orig_iv; /* The original iv if this cand is added from biv with
257 smaller type. */
260 /* Loop invariant expression hashtable entry. */
261 struct iv_inv_expr_ent
263 tree expr;
264 int id;
265 hashval_t hash;
268 /* The data used by the induction variable optimizations. */
270 typedef struct iv_use *iv_use_p;
272 typedef struct iv_cand *iv_cand_p;
274 /* Hashtable helpers. */
276 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
278 static inline hashval_t hash (const iv_inv_expr_ent *);
279 static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
282 /* Hash function for loop invariant expressions. */
284 inline hashval_t
285 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
287 return expr->hash;
290 /* Hash table equality function for expressions. */
292 inline bool
293 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
294 const iv_inv_expr_ent *expr2)
296 return expr1->hash == expr2->hash
297 && operand_equal_p (expr1->expr, expr2->expr, 0);
300 struct ivopts_data
302 /* The currently optimized loop. */
303 struct loop *current_loop;
304 source_location loop_loc;
306 /* Numbers of iterations for all exits of the current loop. */
307 hash_map<edge, tree_niter_desc *> *niters;
309 /* Number of registers used in it. */
310 unsigned regs_used;
312 /* The size of version_info array allocated. */
313 unsigned version_info_size;
315 /* The array of information for the ssa names. */
316 struct version_info *version_info;
318 /* The hashtable of loop invariant expressions created
319 by ivopt. */
320 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
322 /* Loop invariant expression id. */
323 int inv_expr_id;
325 /* The bitmap of indices in version_info whose value was changed. */
326 bitmap relevant;
328 /* The uses of induction variables. */
329 vec<iv_use_p> iv_uses;
331 /* The candidates. */
332 vec<iv_cand_p> iv_candidates;
334 /* A bitmap of important candidates. */
335 bitmap important_candidates;
337 /* Cache used by tree_to_aff_combination_expand. */
338 hash_map<tree, name_expansion *> *name_expansion_cache;
340 /* The maximum invariant id. */
341 unsigned max_inv_id;
343 /* Number of no_overflow BIVs which are not used in memory address. */
344 unsigned bivs_not_used_in_addr;
346 /* Obstack for iv structure. */
347 struct obstack iv_obstack;
349 /* Whether to consider just related and important candidates when replacing a
350 use. */
351 bool consider_all_candidates;
353 /* Are we optimizing for speed? */
354 bool speed;
356 /* Whether the loop body includes any function calls. */
357 bool body_includes_call;
359 /* Whether the loop body can only be exited via single exit. */
360 bool loop_single_exit_p;
363 /* An assignment of iv candidates to uses. */
365 struct iv_ca
367 /* The number of uses covered by the assignment. */
368 unsigned upto;
370 /* Number of uses that cannot be expressed by the candidates in the set. */
371 unsigned bad_uses;
373 /* Candidate assigned to a use, together with the related costs. */
374 struct cost_pair **cand_for_use;
376 /* Number of times each candidate is used. */
377 unsigned *n_cand_uses;
379 /* The candidates used. */
380 bitmap cands;
382 /* The number of candidates in the set. */
383 unsigned n_cands;
385 /* Total number of registers needed. */
386 unsigned n_regs;
388 /* Total cost of expressing uses. */
389 comp_cost cand_use_cost;
391 /* Total cost of candidates. */
392 unsigned cand_cost;
394 /* Number of times each invariant is used. */
395 unsigned *n_invariant_uses;
397 /* The array holding the number of uses of each loop
398 invariant expressions created by ivopt. */
399 unsigned *used_inv_expr;
401 /* The number of created loop invariants. */
402 unsigned num_used_inv_expr;
404 /* Total cost of the assignment. */
405 comp_cost cost;
408 /* Difference of two iv candidate assignments. */
410 struct iv_ca_delta
412 /* Changed use. */
413 struct iv_use *use;
415 /* An old assignment (for rollback purposes). */
416 struct cost_pair *old_cp;
418 /* A new assignment. */
419 struct cost_pair *new_cp;
421 /* Next change in the list. */
422 struct iv_ca_delta *next_change;
425 /* Bound on number of candidates below that all candidates are considered. */
427 #define CONSIDER_ALL_CANDIDATES_BOUND \
428 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
430 /* If there are more iv occurrences, we just give up (it is quite unlikely that
431 optimizing such a loop would help, and it would take ages). */
433 #define MAX_CONSIDERED_USES \
434 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
436 /* If there are at most this number of ivs in the set, try removing unnecessary
437 ivs from the set always. */
439 #define ALWAYS_PRUNE_CAND_SET_BOUND \
440 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
442 /* The list of trees for that the decl_rtl field must be reset is stored
443 here. */
445 static vec<tree> decl_rtl_to_reset;
447 static comp_cost force_expr_to_var_cost (tree, bool);
449 /* Number of uses recorded in DATA. */
451 static inline unsigned
452 n_iv_uses (struct ivopts_data *data)
454 return data->iv_uses.length ();
457 /* Ith use recorded in DATA. */
459 static inline struct iv_use *
460 iv_use (struct ivopts_data *data, unsigned i)
462 return data->iv_uses[i];
465 /* Number of candidates recorded in DATA. */
467 static inline unsigned
468 n_iv_cands (struct ivopts_data *data)
470 return data->iv_candidates.length ();
473 /* Ith candidate recorded in DATA. */
475 static inline struct iv_cand *
476 iv_cand (struct ivopts_data *data, unsigned i)
478 return data->iv_candidates[i];
481 /* The single loop exit if it dominates the latch, NULL otherwise. */
483 edge
484 single_dom_exit (struct loop *loop)
486 edge exit = single_exit (loop);
488 if (!exit)
489 return NULL;
491 if (!just_once_each_iteration_p (loop, exit->src))
492 return NULL;
494 return exit;
497 /* Dumps information about the induction variable IV to FILE. */
499 void
500 dump_iv (FILE *file, struct iv *iv, bool dump_name)
502 if (iv->ssa_name && dump_name)
504 fprintf (file, "ssa name ");
505 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
506 fprintf (file, "\n");
509 fprintf (file, " type ");
510 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
511 fprintf (file, "\n");
513 if (iv->step)
515 fprintf (file, " base ");
516 print_generic_expr (file, iv->base, TDF_SLIM);
517 fprintf (file, "\n");
519 fprintf (file, " step ");
520 print_generic_expr (file, iv->step, TDF_SLIM);
521 fprintf (file, "\n");
523 else
525 fprintf (file, " invariant ");
526 print_generic_expr (file, iv->base, TDF_SLIM);
527 fprintf (file, "\n");
530 if (iv->base_object)
532 fprintf (file, " base object ");
533 print_generic_expr (file, iv->base_object, TDF_SLIM);
534 fprintf (file, "\n");
537 if (iv->biv_p)
538 fprintf (file, " is a biv\n");
540 if (iv->no_overflow)
541 fprintf (file, " iv doesn't overflow wrto loop niter\n");
544 /* Dumps information about the USE to FILE. */
546 void
547 dump_use (FILE *file, struct iv_use *use)
549 fprintf (file, "use %d", use->id);
550 if (use->sub_id)
551 fprintf (file, ".%d", use->sub_id);
553 fprintf (file, "\n");
555 switch (use->type)
557 case USE_NONLINEAR_EXPR:
558 fprintf (file, " generic\n");
559 break;
561 case USE_ADDRESS:
562 fprintf (file, " address\n");
563 break;
565 case USE_COMPARE:
566 fprintf (file, " compare\n");
567 break;
569 default:
570 gcc_unreachable ();
573 fprintf (file, " in statement ");
574 print_gimple_stmt (file, use->stmt, 0, 0);
575 fprintf (file, "\n");
577 fprintf (file, " at position ");
578 if (use->op_p)
579 print_generic_expr (file, *use->op_p, TDF_SLIM);
580 fprintf (file, "\n");
582 dump_iv (file, use->iv, false);
584 if (use->related_cands)
586 fprintf (file, " related candidates ");
587 dump_bitmap (file, use->related_cands);
591 /* Dumps information about the uses to FILE. */
593 void
594 dump_uses (FILE *file, struct ivopts_data *data)
596 unsigned i;
597 struct iv_use *use;
599 for (i = 0; i < n_iv_uses (data); i++)
601 use = iv_use (data, i);
604 dump_use (file, use);
605 use = use->next;
607 while (use);
608 fprintf (file, "\n");
612 /* Dumps information about induction variable candidate CAND to FILE. */
614 void
615 dump_cand (FILE *file, struct iv_cand *cand)
617 struct iv *iv = cand->iv;
619 fprintf (file, "candidate %d%s\n",
620 cand->id, cand->important ? " (important)" : "");
622 if (cand->depends_on)
624 fprintf (file, " depends on ");
625 dump_bitmap (file, cand->depends_on);
628 if (!iv)
630 fprintf (file, " final value replacement\n");
631 return;
634 if (cand->var_before)
636 fprintf (file, " var_before ");
637 print_generic_expr (file, cand->var_before, TDF_SLIM);
638 fprintf (file, "\n");
640 if (cand->var_after)
642 fprintf (file, " var_after ");
643 print_generic_expr (file, cand->var_after, TDF_SLIM);
644 fprintf (file, "\n");
647 switch (cand->pos)
649 case IP_NORMAL:
650 fprintf (file, " incremented before exit test\n");
651 break;
653 case IP_BEFORE_USE:
654 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
655 break;
657 case IP_AFTER_USE:
658 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
659 break;
661 case IP_END:
662 fprintf (file, " incremented at end\n");
663 break;
665 case IP_ORIGINAL:
666 fprintf (file, " original biv\n");
667 break;
670 dump_iv (file, iv, false);
673 /* Returns the info for ssa version VER. */
675 static inline struct version_info *
676 ver_info (struct ivopts_data *data, unsigned ver)
678 return data->version_info + ver;
681 /* Returns the info for ssa name NAME. */
683 static inline struct version_info *
684 name_info (struct ivopts_data *data, tree name)
686 return ver_info (data, SSA_NAME_VERSION (name));
689 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
690 emitted in LOOP. */
692 static bool
693 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
695 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
697 gcc_assert (bb);
699 if (sbb == loop->latch)
700 return true;
702 if (sbb != bb)
703 return false;
705 return stmt == last_stmt (bb);
708 /* Returns true if STMT if after the place where the original induction
709 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
710 if the positions are identical. */
712 static bool
713 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
715 basic_block cand_bb = gimple_bb (cand->incremented_at);
716 basic_block stmt_bb = gimple_bb (stmt);
718 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
719 return false;
721 if (stmt_bb != cand_bb)
722 return true;
724 if (true_if_equal
725 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
726 return true;
727 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
730 /* Returns true if STMT if after the place where the induction variable
731 CAND is incremented in LOOP. */
733 static bool
734 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
736 switch (cand->pos)
738 case IP_END:
739 return false;
741 case IP_NORMAL:
742 return stmt_after_ip_normal_pos (loop, stmt);
744 case IP_ORIGINAL:
745 case IP_AFTER_USE:
746 return stmt_after_inc_pos (cand, stmt, false);
748 case IP_BEFORE_USE:
749 return stmt_after_inc_pos (cand, stmt, true);
751 default:
752 gcc_unreachable ();
756 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
758 static bool
759 abnormal_ssa_name_p (tree exp)
761 if (!exp)
762 return false;
764 if (TREE_CODE (exp) != SSA_NAME)
765 return false;
767 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
770 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
771 abnormal phi node. Callback for for_each_index. */
773 static bool
774 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
775 void *data ATTRIBUTE_UNUSED)
777 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
779 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
780 return false;
781 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
782 return false;
785 return !abnormal_ssa_name_p (*index);
788 /* Returns true if EXPR contains a ssa name that occurs in an
789 abnormal phi node. */
791 bool
792 contains_abnormal_ssa_name_p (tree expr)
794 enum tree_code code;
795 enum tree_code_class codeclass;
797 if (!expr)
798 return false;
800 code = TREE_CODE (expr);
801 codeclass = TREE_CODE_CLASS (code);
803 if (code == SSA_NAME)
804 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
806 if (code == INTEGER_CST
807 || is_gimple_min_invariant (expr))
808 return false;
810 if (code == ADDR_EXPR)
811 return !for_each_index (&TREE_OPERAND (expr, 0),
812 idx_contains_abnormal_ssa_name_p,
813 NULL);
815 if (code == COND_EXPR)
816 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
817 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
818 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
820 switch (codeclass)
822 case tcc_binary:
823 case tcc_comparison:
824 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
825 return true;
827 /* Fallthru. */
828 case tcc_unary:
829 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
830 return true;
832 break;
834 default:
835 gcc_unreachable ();
838 return false;
841 /* Returns the structure describing number of iterations determined from
842 EXIT of DATA->current_loop, or NULL if something goes wrong. */
844 static struct tree_niter_desc *
845 niter_for_exit (struct ivopts_data *data, edge exit)
847 struct tree_niter_desc *desc;
848 tree_niter_desc **slot;
850 if (!data->niters)
852 data->niters = new hash_map<edge, tree_niter_desc *>;
853 slot = NULL;
855 else
856 slot = data->niters->get (exit);
858 if (!slot)
860 /* Try to determine number of iterations. We cannot safely work with ssa
861 names that appear in phi nodes on abnormal edges, so that we do not
862 create overlapping life ranges for them (PR 27283). */
863 desc = XNEW (struct tree_niter_desc);
864 if (!number_of_iterations_exit (data->current_loop,
865 exit, desc, true)
866 || contains_abnormal_ssa_name_p (desc->niter))
868 XDELETE (desc);
869 desc = NULL;
871 data->niters->put (exit, desc);
873 else
874 desc = *slot;
876 return desc;
879 /* Returns the structure describing number of iterations determined from
880 single dominating exit of DATA->current_loop, or NULL if something
881 goes wrong. */
883 static struct tree_niter_desc *
884 niter_for_single_dom_exit (struct ivopts_data *data)
886 edge exit = single_dom_exit (data->current_loop);
888 if (!exit)
889 return NULL;
891 return niter_for_exit (data, exit);
894 /* Initializes data structures used by the iv optimization pass, stored
895 in DATA. */
897 static void
898 tree_ssa_iv_optimize_init (struct ivopts_data *data)
900 data->version_info_size = 2 * num_ssa_names;
901 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
902 data->relevant = BITMAP_ALLOC (NULL);
903 data->important_candidates = BITMAP_ALLOC (NULL);
904 data->max_inv_id = 0;
905 data->niters = NULL;
906 data->iv_uses.create (20);
907 data->iv_candidates.create (20);
908 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
909 data->inv_expr_id = 0;
910 data->name_expansion_cache = NULL;
911 decl_rtl_to_reset.create (20);
912 gcc_obstack_init (&data->iv_obstack);
915 /* Returns a memory object to that EXPR points. In case we are able to
916 determine that it does not point to any such object, NULL is returned. */
918 static tree
919 determine_base_object (tree expr)
921 enum tree_code code = TREE_CODE (expr);
922 tree base, obj;
924 /* If this is a pointer casted to any type, we need to determine
925 the base object for the pointer; so handle conversions before
926 throwing away non-pointer expressions. */
927 if (CONVERT_EXPR_P (expr))
928 return determine_base_object (TREE_OPERAND (expr, 0));
930 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
931 return NULL_TREE;
933 switch (code)
935 case INTEGER_CST:
936 return NULL_TREE;
938 case ADDR_EXPR:
939 obj = TREE_OPERAND (expr, 0);
940 base = get_base_address (obj);
942 if (!base)
943 return expr;
945 if (TREE_CODE (base) == MEM_REF)
946 return determine_base_object (TREE_OPERAND (base, 0));
948 return fold_convert (ptr_type_node,
949 build_fold_addr_expr (base));
951 case POINTER_PLUS_EXPR:
952 return determine_base_object (TREE_OPERAND (expr, 0));
954 case PLUS_EXPR:
955 case MINUS_EXPR:
956 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
957 gcc_unreachable ();
959 default:
960 return fold_convert (ptr_type_node, expr);
964 /* Return true if address expression with non-DECL_P operand appears
965 in EXPR. */
967 static bool
968 contain_complex_addr_expr (tree expr)
970 bool res = false;
972 STRIP_NOPS (expr);
973 switch (TREE_CODE (expr))
975 case POINTER_PLUS_EXPR:
976 case PLUS_EXPR:
977 case MINUS_EXPR:
978 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
979 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
980 break;
982 case ADDR_EXPR:
983 return (!DECL_P (TREE_OPERAND (expr, 0)));
985 default:
986 return false;
989 return res;
992 /* Allocates an induction variable with given initial value BASE and step STEP
993 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
995 static struct iv *
996 alloc_iv (struct ivopts_data *data, tree base, tree step,
997 bool no_overflow = false)
999 tree expr = base;
1000 struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1001 sizeof (struct iv));
1002 gcc_assert (step != NULL_TREE);
1004 /* Lower address expression in base except ones with DECL_P as operand.
1005 By doing this:
1006 1) More accurate cost can be computed for address expressions;
1007 2) Duplicate candidates won't be created for bases in different
1008 forms, like &a[0] and &a. */
1009 STRIP_NOPS (expr);
1010 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1011 || contain_complex_addr_expr (expr))
1013 aff_tree comb;
1014 tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1015 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1018 iv->base = base;
1019 iv->base_object = determine_base_object (base);
1020 iv->step = step;
1021 iv->biv_p = false;
1022 iv->have_use_for = false;
1023 iv->use_id = 0;
1024 iv->ssa_name = NULL_TREE;
1025 iv->no_overflow = no_overflow;
1026 iv->have_address_use = false;
1028 return iv;
1031 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1032 doesn't overflow. */
1034 static void
1035 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1036 bool no_overflow)
1038 struct version_info *info = name_info (data, iv);
1040 gcc_assert (!info->iv);
1042 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1043 info->iv = alloc_iv (data, base, step, no_overflow);
1044 info->iv->ssa_name = iv;
1047 /* Finds induction variable declaration for VAR. */
1049 static struct iv *
1050 get_iv (struct ivopts_data *data, tree var)
1052 basic_block bb;
1053 tree type = TREE_TYPE (var);
1055 if (!POINTER_TYPE_P (type)
1056 && !INTEGRAL_TYPE_P (type))
1057 return NULL;
1059 if (!name_info (data, var)->iv)
1061 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1063 if (!bb
1064 || !flow_bb_inside_loop_p (data->current_loop, bb))
1065 set_iv (data, var, var, build_int_cst (type, 0), true);
1068 return name_info (data, var)->iv;
1071 /* Return the first non-invariant ssa var found in EXPR. */
1073 static tree
1074 extract_single_var_from_expr (tree expr)
1076 int i, n;
1077 tree tmp;
1078 enum tree_code code;
1080 if (!expr || is_gimple_min_invariant (expr))
1081 return NULL;
1083 code = TREE_CODE (expr);
1084 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1086 n = TREE_OPERAND_LENGTH (expr);
1087 for (i = 0; i < n; i++)
1089 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1091 if (tmp)
1092 return tmp;
1095 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1098 /* Finds basic ivs. */
1100 static bool
1101 find_bivs (struct ivopts_data *data)
1103 gphi *phi;
1104 affine_iv iv;
1105 tree step, type, base, stop;
1106 bool found = false;
1107 struct loop *loop = data->current_loop;
1108 gphi_iterator psi;
1110 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1112 phi = psi.phi ();
1114 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1115 continue;
1117 if (virtual_operand_p (PHI_RESULT (phi)))
1118 continue;
1120 if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1121 continue;
1123 if (integer_zerop (iv.step))
1124 continue;
1126 step = iv.step;
1127 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1128 /* Stop expanding iv base at the first ssa var referred by iv step.
1129 Ideally we should stop at any ssa var, because that's expensive
1130 and unusual to happen, we just do it on the first one.
1132 See PR64705 for the rationale. */
1133 stop = extract_single_var_from_expr (step);
1134 base = expand_simple_operations (base, stop);
1135 if (contains_abnormal_ssa_name_p (base)
1136 || contains_abnormal_ssa_name_p (step))
1137 continue;
1139 type = TREE_TYPE (PHI_RESULT (phi));
1140 base = fold_convert (type, base);
1141 if (step)
1143 if (POINTER_TYPE_P (type))
1144 step = convert_to_ptrofftype (step);
1145 else
1146 step = fold_convert (type, step);
1149 set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1150 found = true;
1153 return found;
1156 /* Marks basic ivs. */
1158 static void
1159 mark_bivs (struct ivopts_data *data)
1161 gphi *phi;
1162 gimple *def;
1163 tree var;
1164 struct iv *iv, *incr_iv;
1165 struct loop *loop = data->current_loop;
1166 basic_block incr_bb;
1167 gphi_iterator psi;
1169 data->bivs_not_used_in_addr = 0;
1170 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1172 phi = psi.phi ();
1174 iv = get_iv (data, PHI_RESULT (phi));
1175 if (!iv)
1176 continue;
1178 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1179 def = SSA_NAME_DEF_STMT (var);
1180 /* Don't mark iv peeled from other one as biv. */
1181 if (def
1182 && gimple_code (def) == GIMPLE_PHI
1183 && gimple_bb (def) == loop->header)
1184 continue;
1186 incr_iv = get_iv (data, var);
1187 if (!incr_iv)
1188 continue;
1190 /* If the increment is in the subloop, ignore it. */
1191 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1192 if (incr_bb->loop_father != data->current_loop
1193 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1194 continue;
1196 iv->biv_p = true;
1197 incr_iv->biv_p = true;
1198 if (iv->no_overflow)
1199 data->bivs_not_used_in_addr++;
1200 if (incr_iv->no_overflow)
1201 data->bivs_not_used_in_addr++;
1205 /* Checks whether STMT defines a linear induction variable and stores its
1206 parameters to IV. */
1208 static bool
1209 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1211 tree lhs, stop;
1212 struct loop *loop = data->current_loop;
1214 iv->base = NULL_TREE;
1215 iv->step = NULL_TREE;
1217 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1218 return false;
1220 lhs = gimple_assign_lhs (stmt);
1221 if (TREE_CODE (lhs) != SSA_NAME)
1222 return false;
1224 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1225 return false;
1227 /* Stop expanding iv base at the first ssa var referred by iv step.
1228 Ideally we should stop at any ssa var, because that's expensive
1229 and unusual to happen, we just do it on the first one.
1231 See PR64705 for the rationale. */
1232 stop = extract_single_var_from_expr (iv->step);
1233 iv->base = expand_simple_operations (iv->base, stop);
1234 if (contains_abnormal_ssa_name_p (iv->base)
1235 || contains_abnormal_ssa_name_p (iv->step))
1236 return false;
1238 /* If STMT could throw, then do not consider STMT as defining a GIV.
1239 While this will suppress optimizations, we can not safely delete this
1240 GIV and associated statements, even if it appears it is not used. */
1241 if (stmt_could_throw_p (stmt))
1242 return false;
1244 return true;
1247 /* Finds general ivs in statement STMT. */
1249 static void
1250 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1252 affine_iv iv;
1254 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1255 return;
1257 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1260 /* Finds general ivs in basic block BB. */
1262 static void
1263 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1265 gimple_stmt_iterator bsi;
1267 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1268 find_givs_in_stmt (data, gsi_stmt (bsi));
1271 /* Finds general ivs. */
1273 static void
1274 find_givs (struct ivopts_data *data)
1276 struct loop *loop = data->current_loop;
1277 basic_block *body = get_loop_body_in_dom_order (loop);
1278 unsigned i;
1280 for (i = 0; i < loop->num_nodes; i++)
1281 find_givs_in_bb (data, body[i]);
1282 free (body);
1285 /* For each ssa name defined in LOOP determines whether it is an induction
1286 variable and if so, its initial value and step. */
1288 static bool
1289 find_induction_variables (struct ivopts_data *data)
1291 unsigned i;
1292 bitmap_iterator bi;
1294 if (!find_bivs (data))
1295 return false;
1297 find_givs (data);
1298 mark_bivs (data);
1300 if (dump_file && (dump_flags & TDF_DETAILS))
1302 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1304 if (niter)
1306 fprintf (dump_file, " number of iterations ");
1307 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1308 if (!integer_zerop (niter->may_be_zero))
1310 fprintf (dump_file, "; zero if ");
1311 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1313 fprintf (dump_file, "\n\n");
1316 fprintf (dump_file, "Induction variables:\n\n");
1318 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1320 if (ver_info (data, i)->iv)
1321 dump_iv (dump_file, ver_info (data, i)->iv, true);
1325 return true;
1328 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1329 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1330 is the const offset stripped from IV base. For uses of other types,
1331 ADDR_BASE and ADDR_OFFSET are zero by default. */
1333 static struct iv_use *
1334 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1335 gimple *stmt, enum use_type use_type, tree addr_base = NULL,
1336 unsigned HOST_WIDE_INT addr_offset = 0)
1338 struct iv_use *use = XCNEW (struct iv_use);
1340 use->id = n_iv_uses (data);
1341 use->sub_id = 0;
1342 use->type = use_type;
1343 use->iv = iv;
1344 use->stmt = stmt;
1345 use->op_p = use_p;
1346 use->related_cands = BITMAP_ALLOC (NULL);
1347 use->next = NULL;
1348 use->addr_base = addr_base;
1349 use->addr_offset = addr_offset;
1351 data->iv_uses.safe_push (use);
1353 return use;
1356 /* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
1357 The sub use is recorded under the one whose use id is ID_GROUP. */
1359 static struct iv_use *
1360 record_sub_use (struct ivopts_data *data, tree *use_p,
1361 struct iv *iv, gimple *stmt, enum use_type use_type,
1362 tree addr_base, unsigned HOST_WIDE_INT addr_offset,
1363 unsigned int id_group)
1365 struct iv_use *use = XCNEW (struct iv_use);
1366 struct iv_use *group = iv_use (data, id_group);
1368 use->id = group->id;
1369 use->sub_id = 0;
1370 use->type = use_type;
1371 use->iv = iv;
1372 use->stmt = stmt;
1373 use->op_p = use_p;
1374 use->related_cands = NULL;
1375 use->addr_base = addr_base;
1376 use->addr_offset = addr_offset;
1378 /* Sub use list is maintained in offset ascending order. */
1379 if (addr_offset <= group->addr_offset)
1381 use->related_cands = group->related_cands;
1382 group->related_cands = NULL;
1383 use->next = group;
1384 data->iv_uses[id_group] = use;
1386 else
1388 struct iv_use *pre;
1391 pre = group;
1392 group = group->next;
1394 while (group && addr_offset > group->addr_offset);
1395 use->next = pre->next;
1396 pre->next = use;
1399 return use;
1402 /* Checks whether OP is a loop-level invariant and if so, records it.
1403 NONLINEAR_USE is true if the invariant is used in a way we do not
1404 handle specially. */
1406 static void
1407 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1409 basic_block bb;
1410 struct version_info *info;
1412 if (TREE_CODE (op) != SSA_NAME
1413 || virtual_operand_p (op))
1414 return;
1416 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1417 if (bb
1418 && flow_bb_inside_loop_p (data->current_loop, bb))
1419 return;
1421 info = name_info (data, op);
1422 info->name = op;
1423 info->has_nonlin_use |= nonlinear_use;
1424 if (!info->inv_id)
1425 info->inv_id = ++data->max_inv_id;
1426 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1429 /* Checks whether the use OP is interesting and if so, records it. */
1431 static struct iv_use *
1432 find_interesting_uses_op (struct ivopts_data *data, tree op)
1434 struct iv *iv;
1435 gimple *stmt;
1436 struct iv_use *use;
1438 if (TREE_CODE (op) != SSA_NAME)
1439 return NULL;
1441 iv = get_iv (data, op);
1442 if (!iv)
1443 return NULL;
1445 if (iv->have_use_for)
1447 use = iv_use (data, iv->use_id);
1449 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1450 return use;
1453 if (integer_zerop (iv->step))
1455 record_invariant (data, op, true);
1456 return NULL;
1458 iv->have_use_for = true;
1460 stmt = SSA_NAME_DEF_STMT (op);
1461 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1462 || is_gimple_assign (stmt));
1464 use = record_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR);
1465 iv->use_id = use->id;
1467 return use;
1470 /* Given a condition in statement STMT, checks whether it is a compare
1471 of an induction variable and an invariant. If this is the case,
1472 CONTROL_VAR is set to location of the iv, BOUND to the location of
1473 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1474 induction variable descriptions, and true is returned. If this is not
1475 the case, CONTROL_VAR and BOUND are set to the arguments of the
1476 condition and false is returned. */
1478 static bool
1479 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1480 tree **control_var, tree **bound,
1481 struct iv **iv_var, struct iv **iv_bound)
1483 /* The objects returned when COND has constant operands. */
1484 static struct iv const_iv;
1485 static tree zero;
1486 tree *op0 = &zero, *op1 = &zero;
1487 struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1488 bool ret = false;
1490 if (gimple_code (stmt) == GIMPLE_COND)
1492 gcond *cond_stmt = as_a <gcond *> (stmt);
1493 op0 = gimple_cond_lhs_ptr (cond_stmt);
1494 op1 = gimple_cond_rhs_ptr (cond_stmt);
1496 else
1498 op0 = gimple_assign_rhs1_ptr (stmt);
1499 op1 = gimple_assign_rhs2_ptr (stmt);
1502 zero = integer_zero_node;
1503 const_iv.step = integer_zero_node;
1505 if (TREE_CODE (*op0) == SSA_NAME)
1506 iv0 = get_iv (data, *op0);
1507 if (TREE_CODE (*op1) == SSA_NAME)
1508 iv1 = get_iv (data, *op1);
1510 /* Exactly one of the compared values must be an iv, and the other one must
1511 be an invariant. */
1512 if (!iv0 || !iv1)
1513 goto end;
1515 if (integer_zerop (iv0->step))
1517 /* Control variable may be on the other side. */
1518 std::swap (op0, op1);
1519 std::swap (iv0, iv1);
1521 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1523 end:
1524 if (control_var)
1525 *control_var = op0;
1526 if (iv_var)
1527 *iv_var = iv0;
1528 if (bound)
1529 *bound = op1;
1530 if (iv_bound)
1531 *iv_bound = iv1;
1533 return ret;
1536 /* Checks whether the condition in STMT is interesting and if so,
1537 records it. */
1539 static void
1540 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1542 tree *var_p, *bound_p;
1543 struct iv *var_iv;
1545 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1547 find_interesting_uses_op (data, *var_p);
1548 find_interesting_uses_op (data, *bound_p);
1549 return;
1552 record_use (data, NULL, var_iv, stmt, USE_COMPARE);
1555 /* Returns the outermost loop EXPR is obviously invariant in
1556 relative to the loop LOOP, i.e. if all its operands are defined
1557 outside of the returned loop. Returns NULL if EXPR is not
1558 even obviously invariant in LOOP. */
1560 struct loop *
1561 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1563 basic_block def_bb;
1564 unsigned i, len;
1566 if (is_gimple_min_invariant (expr))
1567 return current_loops->tree_root;
1569 if (TREE_CODE (expr) == SSA_NAME)
1571 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1572 if (def_bb)
1574 if (flow_bb_inside_loop_p (loop, def_bb))
1575 return NULL;
1576 return superloop_at_depth (loop,
1577 loop_depth (def_bb->loop_father) + 1);
1580 return current_loops->tree_root;
1583 if (!EXPR_P (expr))
1584 return NULL;
1586 unsigned maxdepth = 0;
1587 len = TREE_OPERAND_LENGTH (expr);
1588 for (i = 0; i < len; i++)
1590 struct loop *ivloop;
1591 if (!TREE_OPERAND (expr, i))
1592 continue;
1594 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1595 if (!ivloop)
1596 return NULL;
1597 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1600 return superloop_at_depth (loop, maxdepth);
1603 /* Returns true if expression EXPR is obviously invariant in LOOP,
1604 i.e. if all its operands are defined outside of the LOOP. LOOP
1605 should not be the function body. */
1607 bool
1608 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1610 basic_block def_bb;
1611 unsigned i, len;
1613 gcc_assert (loop_depth (loop) > 0);
1615 if (is_gimple_min_invariant (expr))
1616 return true;
1618 if (TREE_CODE (expr) == SSA_NAME)
1620 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1621 if (def_bb
1622 && flow_bb_inside_loop_p (loop, def_bb))
1623 return false;
1625 return true;
1628 if (!EXPR_P (expr))
1629 return false;
1631 len = TREE_OPERAND_LENGTH (expr);
1632 for (i = 0; i < len; i++)
1633 if (TREE_OPERAND (expr, i)
1634 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1635 return false;
1637 return true;
1640 /* Given expression EXPR which computes inductive values with respect
1641 to loop recorded in DATA, this function returns biv from which EXPR
1642 is derived by tracing definition chains of ssa variables in EXPR. */
1644 static struct iv*
1645 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1647 struct iv *iv;
1648 unsigned i, n;
1649 tree e2, e1;
1650 enum tree_code code;
1651 gimple *stmt;
1653 if (expr == NULL_TREE)
1654 return NULL;
1656 if (is_gimple_min_invariant (expr))
1657 return NULL;
1659 code = TREE_CODE (expr);
1660 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1662 n = TREE_OPERAND_LENGTH (expr);
1663 for (i = 0; i < n; i++)
1665 iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1666 if (iv)
1667 return iv;
1671 /* Stop if it's not ssa name. */
1672 if (code != SSA_NAME)
1673 return NULL;
1675 iv = get_iv (data, expr);
1676 if (!iv || integer_zerop (iv->step))
1677 return NULL;
1678 else if (iv->biv_p)
1679 return iv;
1681 stmt = SSA_NAME_DEF_STMT (expr);
1682 if (gphi *phi = dyn_cast <gphi *> (stmt))
1684 ssa_op_iter iter;
1685 use_operand_p use_p;
1687 if (virtual_operand_p (gimple_phi_result (phi)))
1688 return NULL;
1690 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1692 tree use = USE_FROM_PTR (use_p);
1693 iv = find_deriving_biv_for_expr (data, use);
1694 if (iv)
1695 return iv;
1697 return NULL;
1699 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1700 return NULL;
1702 e1 = gimple_assign_rhs1 (stmt);
1703 code = gimple_assign_rhs_code (stmt);
1704 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1705 return find_deriving_biv_for_expr (data, e1);
1707 switch (code)
1709 case MULT_EXPR:
1710 case PLUS_EXPR:
1711 case MINUS_EXPR:
1712 case POINTER_PLUS_EXPR:
1713 /* Increments, decrements and multiplications by a constant
1714 are simple. */
1715 e2 = gimple_assign_rhs2 (stmt);
1716 iv = find_deriving_biv_for_expr (data, e2);
1717 if (iv)
1718 return iv;
1720 /* Fallthru. */
1721 CASE_CONVERT:
1722 /* Casts are simple. */
1723 return find_deriving_biv_for_expr (data, e1);
1725 default:
1726 break;
1729 return NULL;
1732 /* Record BIV, its predecessor and successor that they are used in
1733 address type uses. */
1735 static void
1736 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1738 unsigned i;
1739 tree type, base_1, base_2;
1740 bitmap_iterator bi;
1742 if (!biv || !biv->biv_p || integer_zerop (biv->step)
1743 || biv->have_address_use || !biv->no_overflow)
1744 return;
1746 type = TREE_TYPE (biv->base);
1747 if (!INTEGRAL_TYPE_P (type))
1748 return;
1750 biv->have_address_use = true;
1751 data->bivs_not_used_in_addr--;
1752 base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1753 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1755 struct iv *iv = ver_info (data, i)->iv;
1757 if (!iv || !iv->biv_p || integer_zerop (iv->step)
1758 || iv->have_address_use || !iv->no_overflow)
1759 continue;
1761 if (type != TREE_TYPE (iv->base)
1762 || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1763 continue;
1765 if (!operand_equal_p (biv->step, iv->step, 0))
1766 continue;
1768 base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
1769 if (operand_equal_p (base_1, iv->base, 0)
1770 || operand_equal_p (base_2, biv->base, 0))
1772 iv->have_address_use = true;
1773 data->bivs_not_used_in_addr--;
1778 /* Cumulates the steps of indices into DATA and replaces their values with the
1779 initial ones. Returns false when the value of the index cannot be determined.
1780 Callback for for_each_index. */
1782 struct ifs_ivopts_data
1784 struct ivopts_data *ivopts_data;
1785 gimple *stmt;
1786 tree step;
1789 static bool
1790 idx_find_step (tree base, tree *idx, void *data)
1792 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1793 struct iv *iv;
1794 bool use_overflow_semantics = false;
1795 tree step, iv_base, iv_step, lbound, off;
1796 struct loop *loop = dta->ivopts_data->current_loop;
1798 /* If base is a component ref, require that the offset of the reference
1799 be invariant. */
1800 if (TREE_CODE (base) == COMPONENT_REF)
1802 off = component_ref_field_offset (base);
1803 return expr_invariant_in_loop_p (loop, off);
1806 /* If base is array, first check whether we will be able to move the
1807 reference out of the loop (in order to take its address in strength
1808 reduction). In order for this to work we need both lower bound
1809 and step to be loop invariants. */
1810 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1812 /* Moreover, for a range, the size needs to be invariant as well. */
1813 if (TREE_CODE (base) == ARRAY_RANGE_REF
1814 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1815 return false;
1817 step = array_ref_element_size (base);
1818 lbound = array_ref_low_bound (base);
1820 if (!expr_invariant_in_loop_p (loop, step)
1821 || !expr_invariant_in_loop_p (loop, lbound))
1822 return false;
1825 if (TREE_CODE (*idx) != SSA_NAME)
1826 return true;
1828 iv = get_iv (dta->ivopts_data, *idx);
1829 if (!iv)
1830 return false;
1832 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1833 *&x[0], which is not folded and does not trigger the
1834 ARRAY_REF path below. */
1835 *idx = iv->base;
1837 if (integer_zerop (iv->step))
1838 return true;
1840 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1842 step = array_ref_element_size (base);
1844 /* We only handle addresses whose step is an integer constant. */
1845 if (TREE_CODE (step) != INTEGER_CST)
1846 return false;
1848 else
1849 /* The step for pointer arithmetics already is 1 byte. */
1850 step = size_one_node;
1852 iv_base = iv->base;
1853 iv_step = iv->step;
1854 if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
1855 use_overflow_semantics = true;
1857 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1858 sizetype, &iv_base, &iv_step, dta->stmt,
1859 use_overflow_semantics))
1861 /* The index might wrap. */
1862 return false;
1865 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1866 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1868 if (dta->ivopts_data->bivs_not_used_in_addr)
1870 if (!iv->biv_p)
1871 iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
1873 record_biv_for_address_use (dta->ivopts_data, iv);
1875 return true;
1878 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1879 object is passed to it in DATA. */
1881 static bool
1882 idx_record_use (tree base, tree *idx,
1883 void *vdata)
1885 struct ivopts_data *data = (struct ivopts_data *) vdata;
1886 find_interesting_uses_op (data, *idx);
1887 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1889 find_interesting_uses_op (data, array_ref_element_size (base));
1890 find_interesting_uses_op (data, array_ref_low_bound (base));
1892 return true;
1895 /* If we can prove that TOP = cst * BOT for some constant cst,
1896 store cst to MUL and return true. Otherwise return false.
1897 The returned value is always sign-extended, regardless of the
1898 signedness of TOP and BOT. */
1900 static bool
1901 constant_multiple_of (tree top, tree bot, widest_int *mul)
1903 tree mby;
1904 enum tree_code code;
1905 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1906 widest_int res, p0, p1;
1908 STRIP_NOPS (top);
1909 STRIP_NOPS (bot);
1911 if (operand_equal_p (top, bot, 0))
1913 *mul = 1;
1914 return true;
1917 code = TREE_CODE (top);
1918 switch (code)
1920 case MULT_EXPR:
1921 mby = TREE_OPERAND (top, 1);
1922 if (TREE_CODE (mby) != INTEGER_CST)
1923 return false;
1925 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1926 return false;
1928 *mul = wi::sext (res * wi::to_widest (mby), precision);
1929 return true;
1931 case PLUS_EXPR:
1932 case MINUS_EXPR:
1933 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1934 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1935 return false;
1937 if (code == MINUS_EXPR)
1938 p1 = -p1;
1939 *mul = wi::sext (p0 + p1, precision);
1940 return true;
1942 case INTEGER_CST:
1943 if (TREE_CODE (bot) != INTEGER_CST)
1944 return false;
1946 p0 = widest_int::from (top, SIGNED);
1947 p1 = widest_int::from (bot, SIGNED);
1948 if (p1 == 0)
1949 return false;
1950 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1951 return res == 0;
1953 default:
1954 return false;
1958 /* Return true if memory reference REF with step STEP may be unaligned. */
1960 static bool
1961 may_be_unaligned_p (tree ref, tree step)
1963 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1964 thus they are not misaligned. */
1965 if (TREE_CODE (ref) == TARGET_MEM_REF)
1966 return false;
1968 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1969 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1970 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1972 unsigned HOST_WIDE_INT bitpos;
1973 unsigned int ref_align;
1974 get_object_alignment_1 (ref, &ref_align, &bitpos);
1975 if (ref_align < align
1976 || (bitpos % align) != 0
1977 || (bitpos % BITS_PER_UNIT) != 0)
1978 return true;
1980 unsigned int trailing_zeros = tree_ctz (step);
1981 if (trailing_zeros < HOST_BITS_PER_INT
1982 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1983 return true;
1985 return false;
1988 /* Return true if EXPR may be non-addressable. */
1990 bool
1991 may_be_nonaddressable_p (tree expr)
1993 switch (TREE_CODE (expr))
1995 case TARGET_MEM_REF:
1996 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1997 target, thus they are always addressable. */
1998 return false;
2000 case COMPONENT_REF:
2001 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2002 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2004 case VIEW_CONVERT_EXPR:
2005 /* This kind of view-conversions may wrap non-addressable objects
2006 and make them look addressable. After some processing the
2007 non-addressability may be uncovered again, causing ADDR_EXPRs
2008 of inappropriate objects to be built. */
2009 if (is_gimple_reg (TREE_OPERAND (expr, 0))
2010 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2011 return true;
2013 /* ... fall through ... */
2015 case ARRAY_REF:
2016 case ARRAY_RANGE_REF:
2017 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2019 CASE_CONVERT:
2020 return true;
2022 default:
2023 break;
2026 return false;
2029 static tree
2030 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
2032 /* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
2033 If there is an existing use which has same stripped iv base and step,
2034 this function records this one as a sub use to that; otherwise records
2035 it as a normal one. */
2037 static struct iv_use *
2038 record_group_use (struct ivopts_data *data, tree *use_p,
2039 struct iv *iv, gimple *stmt, enum use_type use_type)
2041 unsigned int i;
2042 struct iv_use *use;
2043 tree addr_base;
2044 unsigned HOST_WIDE_INT addr_offset;
2046 /* Only support sub use for address type uses, that is, with base
2047 object. */
2048 if (!iv->base_object)
2049 return record_use (data, use_p, iv, stmt, use_type);
2051 addr_base = strip_offset (iv->base, &addr_offset);
2052 for (i = 0; i < n_iv_uses (data); i++)
2054 use = iv_use (data, i);
2055 if (use->type != USE_ADDRESS || !use->iv->base_object)
2056 continue;
2058 /* Check if it has the same stripped base and step. */
2059 if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
2060 && operand_equal_p (iv->step, use->iv->step, 0)
2061 && operand_equal_p (addr_base, use->addr_base, 0))
2062 break;
2065 if (i == n_iv_uses (data))
2066 return record_use (data, use_p, iv, stmt,
2067 use_type, addr_base, addr_offset);
2068 else
2069 return record_sub_use (data, use_p, iv, stmt,
2070 use_type, addr_base, addr_offset, i);
2073 /* Finds addresses in *OP_P inside STMT. */
2075 static void
2076 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2077 tree *op_p)
2079 tree base = *op_p, step = size_zero_node;
2080 struct iv *civ;
2081 struct ifs_ivopts_data ifs_ivopts_data;
2083 /* Do not play with volatile memory references. A bit too conservative,
2084 perhaps, but safe. */
2085 if (gimple_has_volatile_ops (stmt))
2086 goto fail;
2088 /* Ignore bitfields for now. Not really something terribly complicated
2089 to handle. TODO. */
2090 if (TREE_CODE (base) == BIT_FIELD_REF)
2091 goto fail;
2093 base = unshare_expr (base);
2095 if (TREE_CODE (base) == TARGET_MEM_REF)
2097 tree type = build_pointer_type (TREE_TYPE (base));
2098 tree astep;
2100 if (TMR_BASE (base)
2101 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2103 civ = get_iv (data, TMR_BASE (base));
2104 if (!civ)
2105 goto fail;
2107 TMR_BASE (base) = civ->base;
2108 step = civ->step;
2110 if (TMR_INDEX2 (base)
2111 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2113 civ = get_iv (data, TMR_INDEX2 (base));
2114 if (!civ)
2115 goto fail;
2117 TMR_INDEX2 (base) = civ->base;
2118 step = civ->step;
2120 if (TMR_INDEX (base)
2121 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2123 civ = get_iv (data, TMR_INDEX (base));
2124 if (!civ)
2125 goto fail;
2127 TMR_INDEX (base) = civ->base;
2128 astep = civ->step;
2130 if (astep)
2132 if (TMR_STEP (base))
2133 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2135 step = fold_build2 (PLUS_EXPR, type, step, astep);
2139 if (integer_zerop (step))
2140 goto fail;
2141 base = tree_mem_ref_addr (type, base);
2143 else
2145 ifs_ivopts_data.ivopts_data = data;
2146 ifs_ivopts_data.stmt = stmt;
2147 ifs_ivopts_data.step = size_zero_node;
2148 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2149 || integer_zerop (ifs_ivopts_data.step))
2150 goto fail;
2151 step = ifs_ivopts_data.step;
2153 /* Check that the base expression is addressable. This needs
2154 to be done after substituting bases of IVs into it. */
2155 if (may_be_nonaddressable_p (base))
2156 goto fail;
2158 /* Moreover, on strict alignment platforms, check that it is
2159 sufficiently aligned. */
2160 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2161 goto fail;
2163 base = build_fold_addr_expr (base);
2165 /* Substituting bases of IVs into the base expression might
2166 have caused folding opportunities. */
2167 if (TREE_CODE (base) == ADDR_EXPR)
2169 tree *ref = &TREE_OPERAND (base, 0);
2170 while (handled_component_p (*ref))
2171 ref = &TREE_OPERAND (*ref, 0);
2172 if (TREE_CODE (*ref) == MEM_REF)
2174 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2175 TREE_OPERAND (*ref, 0),
2176 TREE_OPERAND (*ref, 1));
2177 if (tem)
2178 *ref = tem;
2183 civ = alloc_iv (data, base, step);
2184 record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2185 return;
2187 fail:
2188 for_each_index (op_p, idx_record_use, data);
2191 /* Finds and records invariants used in STMT. */
2193 static void
2194 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2196 ssa_op_iter iter;
2197 use_operand_p use_p;
2198 tree op;
2200 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2202 op = USE_FROM_PTR (use_p);
2203 record_invariant (data, op, false);
2207 /* Finds interesting uses of induction variables in the statement STMT. */
2209 static void
2210 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2212 struct iv *iv;
2213 tree op, *lhs, *rhs;
2214 ssa_op_iter iter;
2215 use_operand_p use_p;
2216 enum tree_code code;
2218 find_invariants_stmt (data, stmt);
2220 if (gimple_code (stmt) == GIMPLE_COND)
2222 find_interesting_uses_cond (data, stmt);
2223 return;
2226 if (is_gimple_assign (stmt))
2228 lhs = gimple_assign_lhs_ptr (stmt);
2229 rhs = gimple_assign_rhs1_ptr (stmt);
2231 if (TREE_CODE (*lhs) == SSA_NAME)
2233 /* If the statement defines an induction variable, the uses are not
2234 interesting by themselves. */
2236 iv = get_iv (data, *lhs);
2238 if (iv && !integer_zerop (iv->step))
2239 return;
2242 code = gimple_assign_rhs_code (stmt);
2243 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2244 && (REFERENCE_CLASS_P (*rhs)
2245 || is_gimple_val (*rhs)))
2247 if (REFERENCE_CLASS_P (*rhs))
2248 find_interesting_uses_address (data, stmt, rhs);
2249 else
2250 find_interesting_uses_op (data, *rhs);
2252 if (REFERENCE_CLASS_P (*lhs))
2253 find_interesting_uses_address (data, stmt, lhs);
2254 return;
2256 else if (TREE_CODE_CLASS (code) == tcc_comparison)
2258 find_interesting_uses_cond (data, stmt);
2259 return;
2262 /* TODO -- we should also handle address uses of type
2264 memory = call (whatever);
2268 call (memory). */
2271 if (gimple_code (stmt) == GIMPLE_PHI
2272 && gimple_bb (stmt) == data->current_loop->header)
2274 iv = get_iv (data, PHI_RESULT (stmt));
2276 if (iv && !integer_zerop (iv->step))
2277 return;
2280 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2282 op = USE_FROM_PTR (use_p);
2284 if (TREE_CODE (op) != SSA_NAME)
2285 continue;
2287 iv = get_iv (data, op);
2288 if (!iv)
2289 continue;
2291 find_interesting_uses_op (data, op);
2295 /* Finds interesting uses of induction variables outside of loops
2296 on loop exit edge EXIT. */
2298 static void
2299 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2301 gphi *phi;
2302 gphi_iterator psi;
2303 tree def;
2305 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2307 phi = psi.phi ();
2308 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2309 if (!virtual_operand_p (def))
2310 find_interesting_uses_op (data, def);
2314 /* Finds uses of the induction variables that are interesting. */
2316 static void
2317 find_interesting_uses (struct ivopts_data *data)
2319 basic_block bb;
2320 gimple_stmt_iterator bsi;
2321 basic_block *body = get_loop_body (data->current_loop);
2322 unsigned i;
2323 struct version_info *info;
2324 edge e;
2326 if (dump_file && (dump_flags & TDF_DETAILS))
2327 fprintf (dump_file, "Uses:\n\n");
2329 for (i = 0; i < data->current_loop->num_nodes; i++)
2331 edge_iterator ei;
2332 bb = body[i];
2334 FOR_EACH_EDGE (e, ei, bb->succs)
2335 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2336 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2337 find_interesting_uses_outside (data, e);
2339 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2340 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2341 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2342 if (!is_gimple_debug (gsi_stmt (bsi)))
2343 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2346 if (dump_file && (dump_flags & TDF_DETAILS))
2348 bitmap_iterator bi;
2350 fprintf (dump_file, "\n");
2352 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2354 info = ver_info (data, i);
2355 if (info->inv_id)
2357 fprintf (dump_file, " ");
2358 print_generic_expr (dump_file, info->name, TDF_SLIM);
2359 fprintf (dump_file, " is invariant (%d)%s\n",
2360 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2364 fprintf (dump_file, "\n");
2367 free (body);
2370 /* Compute maximum offset of [base + offset] addressing mode
2371 for memory reference represented by USE. */
2373 static HOST_WIDE_INT
2374 compute_max_addr_offset (struct iv_use *use)
2376 int width;
2377 rtx reg, addr;
2378 HOST_WIDE_INT i, off;
2379 unsigned list_index, num;
2380 addr_space_t as;
2381 machine_mode mem_mode, addr_mode;
2382 static vec<HOST_WIDE_INT> max_offset_list;
2384 as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2385 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2387 num = max_offset_list.length ();
2388 list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2389 if (list_index >= num)
2391 max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2392 for (; num < max_offset_list.length (); num++)
2393 max_offset_list[num] = -1;
2396 off = max_offset_list[list_index];
2397 if (off != -1)
2398 return off;
2400 addr_mode = targetm.addr_space.address_mode (as);
2401 reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2402 addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2404 width = GET_MODE_BITSIZE (addr_mode) - 1;
2405 if (width > (HOST_BITS_PER_WIDE_INT - 1))
2406 width = HOST_BITS_PER_WIDE_INT - 1;
2408 for (i = width; i > 0; i--)
2410 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
2411 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2412 if (memory_address_addr_space_p (mem_mode, addr, as))
2413 break;
2415 /* For some strict-alignment targets, the offset must be naturally
2416 aligned. Try an aligned offset if mem_mode is not QImode. */
2417 off = ((unsigned HOST_WIDE_INT) 1 << i);
2418 if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2420 off -= GET_MODE_SIZE (mem_mode);
2421 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2422 if (memory_address_addr_space_p (mem_mode, addr, as))
2423 break;
2426 if (i == 0)
2427 off = 0;
2429 max_offset_list[list_index] = off;
2430 return off;
2433 /* Check if all small groups should be split. Return true if and
2434 only if:
2436 1) At least one groups contain two uses with different offsets.
2437 2) No group contains more than two uses with different offsets.
2439 Return false otherwise. We want to split such groups because:
2441 1) Small groups don't have much benefit and may interfer with
2442 general candidate selection.
2443 2) Size for problem with only small groups is usually small and
2444 general algorithm can handle it well.
2446 TODO -- Above claim may not hold when auto increment is supported. */
2448 static bool
2449 split_all_small_groups (struct ivopts_data *data)
2451 bool split_p = false;
2452 unsigned int i, n, distinct;
2453 struct iv_use *pre, *use;
2455 n = n_iv_uses (data);
2456 for (i = 0; i < n; i++)
2458 use = iv_use (data, i);
2459 if (!use->next)
2460 continue;
2462 distinct = 1;
2463 gcc_assert (use->type == USE_ADDRESS);
2464 for (pre = use, use = use->next; use; pre = use, use = use->next)
2466 if (pre->addr_offset != use->addr_offset)
2467 distinct++;
2469 if (distinct > 2)
2470 return false;
2472 if (distinct == 2)
2473 split_p = true;
2476 return split_p;
2479 /* For each group of address type uses, this function further groups
2480 these uses according to the maximum offset supported by target's
2481 [base + offset] addressing mode. */
2483 static void
2484 group_address_uses (struct ivopts_data *data)
2486 HOST_WIDE_INT max_offset = -1;
2487 unsigned int i, n, sub_id;
2488 struct iv_use *pre, *use;
2489 unsigned HOST_WIDE_INT addr_offset_first;
2491 /* Reset max offset to split all small groups. */
2492 if (split_all_small_groups (data))
2493 max_offset = 0;
2495 n = n_iv_uses (data);
2496 for (i = 0; i < n; i++)
2498 use = iv_use (data, i);
2499 if (!use->next)
2500 continue;
2502 gcc_assert (use->type == USE_ADDRESS);
2503 if (max_offset != 0)
2504 max_offset = compute_max_addr_offset (use);
2506 while (use)
2508 sub_id = 0;
2509 addr_offset_first = use->addr_offset;
2510 /* Only uses with offset that can fit in offset part against
2511 the first use can be grouped together. */
2512 for (pre = use, use = use->next;
2513 use && (use->addr_offset - addr_offset_first
2514 <= (unsigned HOST_WIDE_INT) max_offset);
2515 pre = use, use = use->next)
2517 use->id = pre->id;
2518 use->sub_id = ++sub_id;
2521 /* Break the list and create new group. */
2522 if (use)
2524 pre->next = NULL;
2525 use->id = n_iv_uses (data);
2526 use->related_cands = BITMAP_ALLOC (NULL);
2527 data->iv_uses.safe_push (use);
2532 if (dump_file && (dump_flags & TDF_DETAILS))
2533 dump_uses (dump_file, data);
2536 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2537 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2538 we are at the top-level of the processed address. */
2540 static tree
2541 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2542 HOST_WIDE_INT *offset)
2544 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2545 enum tree_code code;
2546 tree type, orig_type = TREE_TYPE (expr);
2547 HOST_WIDE_INT off0, off1, st;
2548 tree orig_expr = expr;
2550 STRIP_NOPS (expr);
2552 type = TREE_TYPE (expr);
2553 code = TREE_CODE (expr);
2554 *offset = 0;
2556 switch (code)
2558 case INTEGER_CST:
2559 if (!cst_and_fits_in_hwi (expr)
2560 || integer_zerop (expr))
2561 return orig_expr;
2563 *offset = int_cst_value (expr);
2564 return build_int_cst (orig_type, 0);
2566 case POINTER_PLUS_EXPR:
2567 case PLUS_EXPR:
2568 case MINUS_EXPR:
2569 op0 = TREE_OPERAND (expr, 0);
2570 op1 = TREE_OPERAND (expr, 1);
2572 op0 = strip_offset_1 (op0, false, false, &off0);
2573 op1 = strip_offset_1 (op1, false, false, &off1);
2575 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2576 if (op0 == TREE_OPERAND (expr, 0)
2577 && op1 == TREE_OPERAND (expr, 1))
2578 return orig_expr;
2580 if (integer_zerop (op1))
2581 expr = op0;
2582 else if (integer_zerop (op0))
2584 if (code == MINUS_EXPR)
2585 expr = fold_build1 (NEGATE_EXPR, type, op1);
2586 else
2587 expr = op1;
2589 else
2590 expr = fold_build2 (code, type, op0, op1);
2592 return fold_convert (orig_type, expr);
2594 case MULT_EXPR:
2595 op1 = TREE_OPERAND (expr, 1);
2596 if (!cst_and_fits_in_hwi (op1))
2597 return orig_expr;
2599 op0 = TREE_OPERAND (expr, 0);
2600 op0 = strip_offset_1 (op0, false, false, &off0);
2601 if (op0 == TREE_OPERAND (expr, 0))
2602 return orig_expr;
2604 *offset = off0 * int_cst_value (op1);
2605 if (integer_zerop (op0))
2606 expr = op0;
2607 else
2608 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2610 return fold_convert (orig_type, expr);
2612 case ARRAY_REF:
2613 case ARRAY_RANGE_REF:
2614 if (!inside_addr)
2615 return orig_expr;
2617 step = array_ref_element_size (expr);
2618 if (!cst_and_fits_in_hwi (step))
2619 break;
2621 st = int_cst_value (step);
2622 op1 = TREE_OPERAND (expr, 1);
2623 op1 = strip_offset_1 (op1, false, false, &off1);
2624 *offset = off1 * st;
2626 if (top_compref
2627 && integer_zerop (op1))
2629 /* Strip the component reference completely. */
2630 op0 = TREE_OPERAND (expr, 0);
2631 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2632 *offset += off0;
2633 return op0;
2635 break;
2637 case COMPONENT_REF:
2639 tree field;
2641 if (!inside_addr)
2642 return orig_expr;
2644 tmp = component_ref_field_offset (expr);
2645 field = TREE_OPERAND (expr, 1);
2646 if (top_compref
2647 && cst_and_fits_in_hwi (tmp)
2648 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2650 HOST_WIDE_INT boffset, abs_off;
2652 /* Strip the component reference completely. */
2653 op0 = TREE_OPERAND (expr, 0);
2654 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2655 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2656 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2657 if (boffset < 0)
2658 abs_off = -abs_off;
2660 *offset = off0 + int_cst_value (tmp) + abs_off;
2661 return op0;
2664 break;
2666 case ADDR_EXPR:
2667 op0 = TREE_OPERAND (expr, 0);
2668 op0 = strip_offset_1 (op0, true, true, &off0);
2669 *offset += off0;
2671 if (op0 == TREE_OPERAND (expr, 0))
2672 return orig_expr;
2674 expr = build_fold_addr_expr (op0);
2675 return fold_convert (orig_type, expr);
2677 case MEM_REF:
2678 /* ??? Offset operand? */
2679 inside_addr = false;
2680 break;
2682 default:
2683 return orig_expr;
2686 /* Default handling of expressions for that we want to recurse into
2687 the first operand. */
2688 op0 = TREE_OPERAND (expr, 0);
2689 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2690 *offset += off0;
2692 if (op0 == TREE_OPERAND (expr, 0)
2693 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2694 return orig_expr;
2696 expr = copy_node (expr);
2697 TREE_OPERAND (expr, 0) = op0;
2698 if (op1)
2699 TREE_OPERAND (expr, 1) = op1;
2701 /* Inside address, we might strip the top level component references,
2702 thus changing type of the expression. Handling of ADDR_EXPR
2703 will fix that. */
2704 expr = fold_convert (orig_type, expr);
2706 return expr;
2709 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2711 static tree
2712 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2714 HOST_WIDE_INT off;
2715 tree core = strip_offset_1 (expr, false, false, &off);
2716 *offset = off;
2717 return core;
2720 /* Returns variant of TYPE that can be used as base for different uses.
2721 We return unsigned type with the same precision, which avoids problems
2722 with overflows. */
2724 static tree
2725 generic_type_for (tree type)
2727 if (POINTER_TYPE_P (type))
2728 return unsigned_type_for (type);
2730 if (TYPE_UNSIGNED (type))
2731 return type;
2733 return unsigned_type_for (type);
2736 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2737 the bitmap to that we should store it. */
2739 static struct ivopts_data *fd_ivopts_data;
2740 static tree
2741 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2743 bitmap *depends_on = (bitmap *) data;
2744 struct version_info *info;
2746 if (TREE_CODE (*expr_p) != SSA_NAME)
2747 return NULL_TREE;
2748 info = name_info (fd_ivopts_data, *expr_p);
2750 if (!info->inv_id || info->has_nonlin_use)
2751 return NULL_TREE;
2753 if (!*depends_on)
2754 *depends_on = BITMAP_ALLOC (NULL);
2755 bitmap_set_bit (*depends_on, info->inv_id);
2757 return NULL_TREE;
2760 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2761 position to POS. If USE is not NULL, the candidate is set as related to
2762 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2763 replacement of the final value of the iv by a direct computation. */
2765 static struct iv_cand *
2766 add_candidate_1 (struct ivopts_data *data,
2767 tree base, tree step, bool important, enum iv_position pos,
2768 struct iv_use *use, gimple *incremented_at,
2769 struct iv *orig_iv = NULL)
2771 unsigned i;
2772 struct iv_cand *cand = NULL;
2773 tree type, orig_type;
2775 /* For non-original variables, make sure their values are computed in a type
2776 that does not invoke undefined behavior on overflows (since in general,
2777 we cannot prove that these induction variables are non-wrapping). */
2778 if (pos != IP_ORIGINAL)
2780 orig_type = TREE_TYPE (base);
2781 type = generic_type_for (orig_type);
2782 if (type != orig_type)
2784 base = fold_convert (type, base);
2785 step = fold_convert (type, step);
2789 for (i = 0; i < n_iv_cands (data); i++)
2791 cand = iv_cand (data, i);
2793 if (cand->pos != pos)
2794 continue;
2796 if (cand->incremented_at != incremented_at
2797 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2798 && cand->ainc_use != use))
2799 continue;
2801 if (!cand->iv)
2803 if (!base && !step)
2804 break;
2806 continue;
2809 if (!base && !step)
2810 continue;
2812 if (operand_equal_p (base, cand->iv->base, 0)
2813 && operand_equal_p (step, cand->iv->step, 0)
2814 && (TYPE_PRECISION (TREE_TYPE (base))
2815 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2816 break;
2819 if (i == n_iv_cands (data))
2821 cand = XCNEW (struct iv_cand);
2822 cand->id = i;
2824 if (!base && !step)
2825 cand->iv = NULL;
2826 else
2827 cand->iv = alloc_iv (data, base, step);
2829 cand->pos = pos;
2830 if (pos != IP_ORIGINAL && cand->iv)
2832 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2833 cand->var_after = cand->var_before;
2835 cand->important = important;
2836 cand->incremented_at = incremented_at;
2837 data->iv_candidates.safe_push (cand);
2839 if (step
2840 && TREE_CODE (step) != INTEGER_CST)
2842 fd_ivopts_data = data;
2843 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2846 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2847 cand->ainc_use = use;
2848 else
2849 cand->ainc_use = NULL;
2851 cand->orig_iv = orig_iv;
2852 if (dump_file && (dump_flags & TDF_DETAILS))
2853 dump_cand (dump_file, cand);
2856 if (important && !cand->important)
2858 cand->important = true;
2859 if (dump_file && (dump_flags & TDF_DETAILS))
2860 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2863 if (use)
2865 bitmap_set_bit (use->related_cands, i);
2866 if (dump_file && (dump_flags & TDF_DETAILS))
2867 fprintf (dump_file, "Candidate %d is related to use %d\n",
2868 cand->id, use->id);
2871 return cand;
2874 /* Returns true if incrementing the induction variable at the end of the LOOP
2875 is allowed.
2877 The purpose is to avoid splitting latch edge with a biv increment, thus
2878 creating a jump, possibly confusing other optimization passes and leaving
2879 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2880 is not available (so we do not have a better alternative), or if the latch
2881 edge is already nonempty. */
2883 static bool
2884 allow_ip_end_pos_p (struct loop *loop)
2886 if (!ip_normal_pos (loop))
2887 return true;
2889 if (!empty_block_p (ip_end_pos (loop)))
2890 return true;
2892 return false;
2895 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2896 Important field is set to IMPORTANT. */
2898 static void
2899 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2900 bool important, struct iv_use *use)
2902 basic_block use_bb = gimple_bb (use->stmt);
2903 machine_mode mem_mode;
2904 unsigned HOST_WIDE_INT cstepi;
2906 /* If we insert the increment in any position other than the standard
2907 ones, we must ensure that it is incremented once per iteration.
2908 It must not be in an inner nested loop, or one side of an if
2909 statement. */
2910 if (use_bb->loop_father != data->current_loop
2911 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2912 || stmt_could_throw_p (use->stmt)
2913 || !cst_and_fits_in_hwi (step))
2914 return;
2916 cstepi = int_cst_value (step);
2918 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2919 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2920 || USE_STORE_PRE_INCREMENT (mem_mode))
2921 && GET_MODE_SIZE (mem_mode) == cstepi)
2922 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2923 || USE_STORE_PRE_DECREMENT (mem_mode))
2924 && GET_MODE_SIZE (mem_mode) == -cstepi))
2926 enum tree_code code = MINUS_EXPR;
2927 tree new_base;
2928 tree new_step = step;
2930 if (POINTER_TYPE_P (TREE_TYPE (base)))
2932 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2933 code = POINTER_PLUS_EXPR;
2935 else
2936 new_step = fold_convert (TREE_TYPE (base), new_step);
2937 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2938 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2939 use->stmt);
2941 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2942 || USE_STORE_POST_INCREMENT (mem_mode))
2943 && GET_MODE_SIZE (mem_mode) == cstepi)
2944 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2945 || USE_STORE_POST_DECREMENT (mem_mode))
2946 && GET_MODE_SIZE (mem_mode) == -cstepi))
2948 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2949 use->stmt);
2953 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2954 position to POS. If USE is not NULL, the candidate is set as related to
2955 it. The candidate computation is scheduled before exit condition and at
2956 the end of loop. */
2958 static void
2959 add_candidate (struct ivopts_data *data,
2960 tree base, tree step, bool important, struct iv_use *use,
2961 struct iv *orig_iv = NULL)
2963 gcc_assert (use == NULL || use->sub_id == 0);
2965 if (ip_normal_pos (data->current_loop))
2966 add_candidate_1 (data, base, step, important,
2967 IP_NORMAL, use, NULL, orig_iv);
2968 if (ip_end_pos (data->current_loop)
2969 && allow_ip_end_pos_p (data->current_loop))
2970 add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
2973 /* Adds standard iv candidates. */
2975 static void
2976 add_standard_iv_candidates (struct ivopts_data *data)
2978 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2980 /* The same for a double-integer type if it is still fast enough. */
2981 if (TYPE_PRECISION
2982 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2983 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2984 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2985 build_int_cst (long_integer_type_node, 1), true, NULL);
2987 /* The same for a double-integer type if it is still fast enough. */
2988 if (TYPE_PRECISION
2989 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2990 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2991 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2992 build_int_cst (long_long_integer_type_node, 1), true, NULL);
2996 /* Adds candidates bases on the old induction variable IV. */
2998 static void
2999 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3001 gimple *phi;
3002 tree def;
3003 struct iv_cand *cand;
3005 /* Check if this biv is used in address type use. */
3006 if (iv->no_overflow && iv->have_address_use
3007 && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3008 && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3010 tree base = fold_convert (sizetype, iv->base);
3011 tree step = fold_convert (sizetype, iv->step);
3013 /* Add iv cand of same precision as index part in TARGET_MEM_REF. */
3014 add_candidate (data, base, step, true, NULL, iv);
3015 /* Add iv cand of the original type only if it has nonlinear use. */
3016 if (iv->have_use_for)
3017 add_candidate (data, iv->base, iv->step, true, NULL);
3019 else
3020 add_candidate (data, iv->base, iv->step, true, NULL);
3022 /* The same, but with initial value zero. */
3023 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3024 add_candidate (data, size_int (0), iv->step, true, NULL);
3025 else
3026 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3027 iv->step, true, NULL);
3029 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3030 if (gimple_code (phi) == GIMPLE_PHI)
3032 /* Additionally record the possibility of leaving the original iv
3033 untouched. */
3034 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3035 /* Don't add candidate if it's from another PHI node because
3036 it's an affine iv appearing in the form of PEELED_CHREC. */
3037 phi = SSA_NAME_DEF_STMT (def);
3038 if (gimple_code (phi) != GIMPLE_PHI)
3040 cand = add_candidate_1 (data,
3041 iv->base, iv->step, true, IP_ORIGINAL, NULL,
3042 SSA_NAME_DEF_STMT (def));
3043 cand->var_before = iv->ssa_name;
3044 cand->var_after = def;
3046 else
3047 gcc_assert (gimple_bb (phi) == data->current_loop->header);
3051 /* Adds candidates based on the old induction variables. */
3053 static void
3054 add_iv_candidate_for_bivs (struct ivopts_data *data)
3056 unsigned i;
3057 struct iv *iv;
3058 bitmap_iterator bi;
3060 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3062 iv = ver_info (data, i)->iv;
3063 if (iv && iv->biv_p && !integer_zerop (iv->step))
3064 add_iv_candidate_for_biv (data, iv);
3068 /* Adds candidates based on the value of USE's iv. */
3070 static void
3071 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3073 unsigned HOST_WIDE_INT offset;
3074 tree base;
3075 tree basetype;
3076 struct iv *iv = use->iv;
3078 add_candidate (data, iv->base, iv->step, false, use);
3080 /* The same, but with initial value zero. Make such variable important,
3081 since it is generic enough so that possibly many uses may be based
3082 on it. */
3083 basetype = TREE_TYPE (iv->base);
3084 if (POINTER_TYPE_P (basetype))
3085 basetype = sizetype;
3086 add_candidate (data, build_int_cst (basetype, 0), iv->step, true, use);
3088 /* Third, try removing the constant offset. Make sure to even
3089 add a candidate for &a[0] vs. (T *)&a. */
3090 base = strip_offset (iv->base, &offset);
3091 if (offset || base != iv->base)
3092 add_candidate (data, base, iv->step, false, use);
3094 /* At last, add auto-incremental candidates. Make such variables
3095 important since other iv uses with same base object may be based
3096 on it. */
3097 if (use != NULL && use->type == USE_ADDRESS)
3098 add_autoinc_candidates (data, iv->base, iv->step, true, use);
3101 /* Adds candidates based on the uses. */
3103 static void
3104 add_iv_candidate_for_uses (struct ivopts_data *data)
3106 unsigned i;
3108 for (i = 0; i < n_iv_uses (data); i++)
3110 struct iv_use *use = iv_use (data, i);
3112 if (!use)
3113 continue;
3115 switch (use->type)
3117 case USE_NONLINEAR_EXPR:
3118 case USE_COMPARE:
3119 case USE_ADDRESS:
3120 /* Just add the ivs based on the value of the iv used here. */
3121 add_iv_candidate_for_use (data, use);
3122 break;
3124 default:
3125 gcc_unreachable ();
3130 /* Record important candidates and add them to related_cands bitmaps
3131 if needed. */
3133 static void
3134 record_important_candidates (struct ivopts_data *data)
3136 unsigned i;
3137 struct iv_use *use;
3139 for (i = 0; i < n_iv_cands (data); i++)
3141 struct iv_cand *cand = iv_cand (data, i);
3143 if (cand->important)
3144 bitmap_set_bit (data->important_candidates, i);
3147 data->consider_all_candidates = (n_iv_cands (data)
3148 <= CONSIDER_ALL_CANDIDATES_BOUND);
3150 if (data->consider_all_candidates)
3152 /* We will not need "related_cands" bitmaps in this case,
3153 so release them to decrease peak memory consumption. */
3154 for (i = 0; i < n_iv_uses (data); i++)
3156 use = iv_use (data, i);
3157 BITMAP_FREE (use->related_cands);
3160 else
3162 /* Add important candidates to the related_cands bitmaps. */
3163 for (i = 0; i < n_iv_uses (data); i++)
3164 bitmap_ior_into (iv_use (data, i)->related_cands,
3165 data->important_candidates);
3169 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3170 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3171 we allocate a simple list to every use. */
3173 static void
3174 alloc_use_cost_map (struct ivopts_data *data)
3176 unsigned i, size, s;
3178 for (i = 0; i < n_iv_uses (data); i++)
3180 struct iv_use *use = iv_use (data, i);
3182 if (data->consider_all_candidates)
3183 size = n_iv_cands (data);
3184 else
3186 s = bitmap_count_bits (use->related_cands);
3188 /* Round up to the power of two, so that moduling by it is fast. */
3189 size = s ? (1 << ceil_log2 (s)) : 1;
3192 use->n_map_members = size;
3193 use->cost_map = XCNEWVEC (struct cost_pair, size);
3197 /* Returns description of computation cost of expression whose runtime
3198 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
3200 static comp_cost
3201 new_cost (unsigned runtime, unsigned complexity)
3203 comp_cost cost;
3205 cost.cost = runtime;
3206 cost.complexity = complexity;
3208 return cost;
3211 /* Returns true if COST is infinite. */
3213 static bool
3214 infinite_cost_p (comp_cost cost)
3216 return cost.cost == INFTY;
3219 /* Adds costs COST1 and COST2. */
3221 static comp_cost
3222 add_costs (comp_cost cost1, comp_cost cost2)
3224 if (infinite_cost_p (cost1) || infinite_cost_p (cost2))
3225 return infinite_cost;
3227 cost1.cost += cost2.cost;
3228 cost1.complexity += cost2.complexity;
3230 return cost1;
3232 /* Subtracts costs COST1 and COST2. */
3234 static comp_cost
3235 sub_costs (comp_cost cost1, comp_cost cost2)
3237 cost1.cost -= cost2.cost;
3238 cost1.complexity -= cost2.complexity;
3240 return cost1;
3243 /* Returns a negative number if COST1 < COST2, a positive number if
3244 COST1 > COST2, and 0 if COST1 = COST2. */
3246 static int
3247 compare_costs (comp_cost cost1, comp_cost cost2)
3249 if (cost1.cost == cost2.cost)
3250 return cost1.complexity - cost2.complexity;
3252 return cost1.cost - cost2.cost;
3255 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
3256 on invariants DEPENDS_ON and that the value used in expressing it
3257 is VALUE, and in case of iv elimination the comparison operator is COMP. */
3259 static void
3260 set_use_iv_cost (struct ivopts_data *data,
3261 struct iv_use *use, struct iv_cand *cand,
3262 comp_cost cost, bitmap depends_on, tree value,
3263 enum tree_code comp, int inv_expr_id)
3265 unsigned i, s;
3267 if (infinite_cost_p (cost))
3269 BITMAP_FREE (depends_on);
3270 return;
3273 if (data->consider_all_candidates)
3275 use->cost_map[cand->id].cand = cand;
3276 use->cost_map[cand->id].cost = cost;
3277 use->cost_map[cand->id].depends_on = depends_on;
3278 use->cost_map[cand->id].value = value;
3279 use->cost_map[cand->id].comp = comp;
3280 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
3281 return;
3284 /* n_map_members is a power of two, so this computes modulo. */
3285 s = cand->id & (use->n_map_members - 1);
3286 for (i = s; i < use->n_map_members; i++)
3287 if (!use->cost_map[i].cand)
3288 goto found;
3289 for (i = 0; i < s; i++)
3290 if (!use->cost_map[i].cand)
3291 goto found;
3293 gcc_unreachable ();
3295 found:
3296 use->cost_map[i].cand = cand;
3297 use->cost_map[i].cost = cost;
3298 use->cost_map[i].depends_on = depends_on;
3299 use->cost_map[i].value = value;
3300 use->cost_map[i].comp = comp;
3301 use->cost_map[i].inv_expr_id = inv_expr_id;
3304 /* Gets cost of (USE, CANDIDATE) pair. */
3306 static struct cost_pair *
3307 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
3308 struct iv_cand *cand)
3310 unsigned i, s;
3311 struct cost_pair *ret;
3313 if (!cand)
3314 return NULL;
3316 if (data->consider_all_candidates)
3318 ret = use->cost_map + cand->id;
3319 if (!ret->cand)
3320 return NULL;
3322 return ret;
3325 /* n_map_members is a power of two, so this computes modulo. */
3326 s = cand->id & (use->n_map_members - 1);
3327 for (i = s; i < use->n_map_members; i++)
3328 if (use->cost_map[i].cand == cand)
3329 return use->cost_map + i;
3330 else if (use->cost_map[i].cand == NULL)
3331 return NULL;
3332 for (i = 0; i < s; i++)
3333 if (use->cost_map[i].cand == cand)
3334 return use->cost_map + i;
3335 else if (use->cost_map[i].cand == NULL)
3336 return NULL;
3338 return NULL;
3341 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3342 static rtx
3343 produce_memory_decl_rtl (tree obj, int *regno)
3345 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3346 machine_mode address_mode = targetm.addr_space.address_mode (as);
3347 rtx x;
3349 gcc_assert (obj);
3350 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3352 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3353 x = gen_rtx_SYMBOL_REF (address_mode, name);
3354 SET_SYMBOL_REF_DECL (x, obj);
3355 x = gen_rtx_MEM (DECL_MODE (obj), x);
3356 set_mem_addr_space (x, as);
3357 targetm.encode_section_info (obj, x, true);
3359 else
3361 x = gen_raw_REG (address_mode, (*regno)++);
3362 x = gen_rtx_MEM (DECL_MODE (obj), x);
3363 set_mem_addr_space (x, as);
3366 return x;
3369 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3370 walk_tree. DATA contains the actual fake register number. */
3372 static tree
3373 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3375 tree obj = NULL_TREE;
3376 rtx x = NULL_RTX;
3377 int *regno = (int *) data;
3379 switch (TREE_CODE (*expr_p))
3381 case ADDR_EXPR:
3382 for (expr_p = &TREE_OPERAND (*expr_p, 0);
3383 handled_component_p (*expr_p);
3384 expr_p = &TREE_OPERAND (*expr_p, 0))
3385 continue;
3386 obj = *expr_p;
3387 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3388 x = produce_memory_decl_rtl (obj, regno);
3389 break;
3391 case SSA_NAME:
3392 *ws = 0;
3393 obj = SSA_NAME_VAR (*expr_p);
3394 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3395 if (!obj)
3396 return NULL_TREE;
3397 if (!DECL_RTL_SET_P (obj))
3398 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3399 break;
3401 case VAR_DECL:
3402 case PARM_DECL:
3403 case RESULT_DECL:
3404 *ws = 0;
3405 obj = *expr_p;
3407 if (DECL_RTL_SET_P (obj))
3408 break;
3410 if (DECL_MODE (obj) == BLKmode)
3411 x = produce_memory_decl_rtl (obj, regno);
3412 else
3413 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3415 break;
3417 default:
3418 break;
3421 if (x)
3423 decl_rtl_to_reset.safe_push (obj);
3424 SET_DECL_RTL (obj, x);
3427 return NULL_TREE;
3430 /* Determines cost of the computation of EXPR. */
3432 static unsigned
3433 computation_cost (tree expr, bool speed)
3435 rtx_insn *seq;
3436 rtx rslt;
3437 tree type = TREE_TYPE (expr);
3438 unsigned cost;
3439 /* Avoid using hard regs in ways which may be unsupported. */
3440 int regno = LAST_VIRTUAL_REGISTER + 1;
3441 struct cgraph_node *node = cgraph_node::get (current_function_decl);
3442 enum node_frequency real_frequency = node->frequency;
3444 node->frequency = NODE_FREQUENCY_NORMAL;
3445 crtl->maybe_hot_insn_p = speed;
3446 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3447 start_sequence ();
3448 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3449 seq = get_insns ();
3450 end_sequence ();
3451 default_rtl_profile ();
3452 node->frequency = real_frequency;
3454 cost = seq_cost (seq, speed);
3455 if (MEM_P (rslt))
3456 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3457 TYPE_ADDR_SPACE (type), speed);
3458 else if (!REG_P (rslt))
3459 cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3461 return cost;
3464 /* Returns variable containing the value of candidate CAND at statement AT. */
3466 static tree
3467 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3469 if (stmt_after_increment (loop, cand, stmt))
3470 return cand->var_after;
3471 else
3472 return cand->var_before;
3475 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3476 same precision that is at least as wide as the precision of TYPE, stores
3477 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3478 type of A and B. */
3480 static tree
3481 determine_common_wider_type (tree *a, tree *b)
3483 tree wider_type = NULL;
3484 tree suba, subb;
3485 tree atype = TREE_TYPE (*a);
3487 if (CONVERT_EXPR_P (*a))
3489 suba = TREE_OPERAND (*a, 0);
3490 wider_type = TREE_TYPE (suba);
3491 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3492 return atype;
3494 else
3495 return atype;
3497 if (CONVERT_EXPR_P (*b))
3499 subb = TREE_OPERAND (*b, 0);
3500 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3501 return atype;
3503 else
3504 return atype;
3506 *a = suba;
3507 *b = subb;
3508 return wider_type;
3511 /* Determines the expression by that USE is expressed from induction variable
3512 CAND at statement AT in LOOP. The expression is stored in a decomposed
3513 form into AFF. Returns false if USE cannot be expressed using CAND. */
3515 static bool
3516 get_computation_aff (struct loop *loop,
3517 struct iv_use *use, struct iv_cand *cand, gimple *at,
3518 struct aff_tree *aff)
3520 tree ubase = use->iv->base;
3521 tree ustep = use->iv->step;
3522 tree cbase = cand->iv->base;
3523 tree cstep = cand->iv->step, cstep_common;
3524 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3525 tree common_type, var;
3526 tree uutype;
3527 aff_tree cbase_aff, var_aff;
3528 widest_int rat;
3530 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3532 /* We do not have a precision to express the values of use. */
3533 return false;
3536 var = var_at_stmt (loop, cand, at);
3537 uutype = unsigned_type_for (utype);
3539 /* If the conversion is not noop, perform it. */
3540 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3542 if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3543 && (CONVERT_EXPR_P (cstep) || TREE_CODE (cstep) == INTEGER_CST))
3545 tree inner_base, inner_step, inner_type;
3546 inner_base = TREE_OPERAND (cbase, 0);
3547 if (CONVERT_EXPR_P (cstep))
3548 inner_step = TREE_OPERAND (cstep, 0);
3549 else
3550 inner_step = cstep;
3552 inner_type = TREE_TYPE (inner_base);
3553 /* If candidate is added from a biv whose type is smaller than
3554 ctype, we know both candidate and the biv won't overflow.
3555 In this case, it's safe to skip the convertion in candidate.
3556 As an example, (unsigned short)((unsigned long)A) equals to
3557 (unsigned short)A, if A has a type no larger than short. */
3558 if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3560 cbase = inner_base;
3561 cstep = inner_step;
3564 cstep = fold_convert (uutype, cstep);
3565 cbase = fold_convert (uutype, cbase);
3566 var = fold_convert (uutype, var);
3569 if (!constant_multiple_of (ustep, cstep, &rat))
3570 return false;
3572 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3573 type, we achieve better folding by computing their difference in this
3574 wider type, and cast the result to UUTYPE. We do not need to worry about
3575 overflows, as all the arithmetics will in the end be performed in UUTYPE
3576 anyway. */
3577 common_type = determine_common_wider_type (&ubase, &cbase);
3579 /* use = ubase - ratio * cbase + ratio * var. */
3580 tree_to_aff_combination (ubase, common_type, aff);
3581 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3582 tree_to_aff_combination (var, uutype, &var_aff);
3584 /* We need to shift the value if we are after the increment. */
3585 if (stmt_after_increment (loop, cand, at))
3587 aff_tree cstep_aff;
3589 if (common_type != uutype)
3590 cstep_common = fold_convert (common_type, cstep);
3591 else
3592 cstep_common = cstep;
3594 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3595 aff_combination_add (&cbase_aff, &cstep_aff);
3598 aff_combination_scale (&cbase_aff, -rat);
3599 aff_combination_add (aff, &cbase_aff);
3600 if (common_type != uutype)
3601 aff_combination_convert (aff, uutype);
3603 aff_combination_scale (&var_aff, rat);
3604 aff_combination_add (aff, &var_aff);
3606 return true;
3609 /* Return the type of USE. */
3611 static tree
3612 get_use_type (struct iv_use *use)
3614 tree base_type = TREE_TYPE (use->iv->base);
3615 tree type;
3617 if (use->type == USE_ADDRESS)
3619 /* The base_type may be a void pointer. Create a pointer type based on
3620 the mem_ref instead. */
3621 type = build_pointer_type (TREE_TYPE (*use->op_p));
3622 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3623 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3625 else
3626 type = base_type;
3628 return type;
3631 /* Determines the expression by that USE is expressed from induction variable
3632 CAND at statement AT in LOOP. The computation is unshared. */
3634 static tree
3635 get_computation_at (struct loop *loop,
3636 struct iv_use *use, struct iv_cand *cand, gimple *at)
3638 aff_tree aff;
3639 tree type = get_use_type (use);
3641 if (!get_computation_aff (loop, use, cand, at, &aff))
3642 return NULL_TREE;
3643 unshare_aff_combination (&aff);
3644 return fold_convert (type, aff_combination_to_tree (&aff));
3647 /* Determines the expression by that USE is expressed from induction variable
3648 CAND in LOOP. The computation is unshared. */
3650 static tree
3651 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3653 return get_computation_at (loop, use, cand, use->stmt);
3656 /* Adjust the cost COST for being in loop setup rather than loop body.
3657 If we're optimizing for space, the loop setup overhead is constant;
3658 if we're optimizing for speed, amortize it over the per-iteration cost. */
3659 static unsigned
3660 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3662 if (cost == INFTY)
3663 return cost;
3664 else if (optimize_loop_for_speed_p (data->current_loop))
3665 return cost / avg_loop_niter (data->current_loop);
3666 else
3667 return cost;
3670 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3671 validity for a memory reference accessing memory of mode MODE in
3672 address space AS. */
3675 bool
3676 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3677 addr_space_t as)
3679 #define MAX_RATIO 128
3680 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3681 static vec<sbitmap> valid_mult_list;
3682 sbitmap valid_mult;
3684 if (data_index >= valid_mult_list.length ())
3685 valid_mult_list.safe_grow_cleared (data_index + 1);
3687 valid_mult = valid_mult_list[data_index];
3688 if (!valid_mult)
3690 machine_mode address_mode = targetm.addr_space.address_mode (as);
3691 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3692 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3693 rtx addr, scaled;
3694 HOST_WIDE_INT i;
3696 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3697 bitmap_clear (valid_mult);
3698 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3699 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3700 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3702 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3703 if (memory_address_addr_space_p (mode, addr, as)
3704 || memory_address_addr_space_p (mode, scaled, as))
3705 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3708 if (dump_file && (dump_flags & TDF_DETAILS))
3710 fprintf (dump_file, " allowed multipliers:");
3711 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3712 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3713 fprintf (dump_file, " %d", (int) i);
3714 fprintf (dump_file, "\n");
3715 fprintf (dump_file, "\n");
3718 valid_mult_list[data_index] = valid_mult;
3721 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3722 return false;
3724 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3727 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3728 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3729 variable is omitted. Compute the cost for a memory reference that accesses
3730 a memory location of mode MEM_MODE in address space AS.
3732 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3733 size of MEM_MODE / RATIO) is available. To make this determination, we
3734 look at the size of the increment to be made, which is given in CSTEP.
3735 CSTEP may be zero if the step is unknown.
3736 STMT_AFTER_INC is true iff the statement we're looking at is after the
3737 increment of the original biv.
3739 TODO -- there must be some better way. This all is quite crude. */
3741 enum ainc_type
3743 AINC_PRE_INC, /* Pre increment. */
3744 AINC_PRE_DEC, /* Pre decrement. */
3745 AINC_POST_INC, /* Post increment. */
3746 AINC_POST_DEC, /* Post decrement. */
3747 AINC_NONE /* Also the number of auto increment types. */
3750 typedef struct address_cost_data_s
3752 HOST_WIDE_INT min_offset, max_offset;
3753 unsigned costs[2][2][2][2];
3754 unsigned ainc_costs[AINC_NONE];
3755 } *address_cost_data;
3758 static comp_cost
3759 get_address_cost (bool symbol_present, bool var_present,
3760 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3761 HOST_WIDE_INT cstep, machine_mode mem_mode,
3762 addr_space_t as, bool speed,
3763 bool stmt_after_inc, bool *may_autoinc)
3765 machine_mode address_mode = targetm.addr_space.address_mode (as);
3766 static vec<address_cost_data> address_cost_data_list;
3767 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3768 address_cost_data data;
3769 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3770 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3771 unsigned cost, acost, complexity;
3772 enum ainc_type autoinc_type;
3773 bool offset_p, ratio_p, autoinc;
3774 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3775 unsigned HOST_WIDE_INT mask;
3776 unsigned bits;
3778 if (data_index >= address_cost_data_list.length ())
3779 address_cost_data_list.safe_grow_cleared (data_index + 1);
3781 data = address_cost_data_list[data_index];
3782 if (!data)
3784 HOST_WIDE_INT i;
3785 HOST_WIDE_INT rat, off = 0;
3786 int old_cse_not_expected, width;
3787 unsigned sym_p, var_p, off_p, rat_p, add_c;
3788 rtx_insn *seq;
3789 rtx addr, base;
3790 rtx reg0, reg1;
3792 data = (address_cost_data) xcalloc (1, sizeof (*data));
3794 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3796 width = GET_MODE_BITSIZE (address_mode) - 1;
3797 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3798 width = HOST_BITS_PER_WIDE_INT - 1;
3799 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3801 for (i = width; i >= 0; i--)
3803 off = -((unsigned HOST_WIDE_INT) 1 << i);
3804 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3805 if (memory_address_addr_space_p (mem_mode, addr, as))
3806 break;
3808 data->min_offset = (i == -1? 0 : off);
3810 for (i = width; i >= 0; i--)
3812 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3813 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3814 if (memory_address_addr_space_p (mem_mode, addr, as))
3815 break;
3816 /* For some strict-alignment targets, the offset must be naturally
3817 aligned. Try an aligned offset if mem_mode is not QImode. */
3818 off = mem_mode != QImode
3819 ? ((unsigned HOST_WIDE_INT) 1 << i)
3820 - GET_MODE_SIZE (mem_mode)
3821 : 0;
3822 if (off > 0)
3824 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3825 if (memory_address_addr_space_p (mem_mode, addr, as))
3826 break;
3829 if (i == -1)
3830 off = 0;
3831 data->max_offset = off;
3833 if (dump_file && (dump_flags & TDF_DETAILS))
3835 fprintf (dump_file, "get_address_cost:\n");
3836 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3837 GET_MODE_NAME (mem_mode),
3838 data->min_offset);
3839 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3840 GET_MODE_NAME (mem_mode),
3841 data->max_offset);
3844 rat = 1;
3845 for (i = 2; i <= MAX_RATIO; i++)
3846 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3848 rat = i;
3849 break;
3852 /* Compute the cost of various addressing modes. */
3853 acost = 0;
3854 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3855 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3857 if (USE_LOAD_PRE_DECREMENT (mem_mode)
3858 || USE_STORE_PRE_DECREMENT (mem_mode))
3860 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3861 has_predec[mem_mode]
3862 = memory_address_addr_space_p (mem_mode, addr, as);
3864 if (has_predec[mem_mode])
3865 data->ainc_costs[AINC_PRE_DEC]
3866 = address_cost (addr, mem_mode, as, speed);
3868 if (USE_LOAD_POST_DECREMENT (mem_mode)
3869 || USE_STORE_POST_DECREMENT (mem_mode))
3871 addr = gen_rtx_POST_DEC (address_mode, reg0);
3872 has_postdec[mem_mode]
3873 = memory_address_addr_space_p (mem_mode, addr, as);
3875 if (has_postdec[mem_mode])
3876 data->ainc_costs[AINC_POST_DEC]
3877 = address_cost (addr, mem_mode, as, speed);
3879 if (USE_LOAD_PRE_INCREMENT (mem_mode)
3880 || USE_STORE_PRE_DECREMENT (mem_mode))
3882 addr = gen_rtx_PRE_INC (address_mode, reg0);
3883 has_preinc[mem_mode]
3884 = memory_address_addr_space_p (mem_mode, addr, as);
3886 if (has_preinc[mem_mode])
3887 data->ainc_costs[AINC_PRE_INC]
3888 = address_cost (addr, mem_mode, as, speed);
3890 if (USE_LOAD_POST_INCREMENT (mem_mode)
3891 || USE_STORE_POST_INCREMENT (mem_mode))
3893 addr = gen_rtx_POST_INC (address_mode, reg0);
3894 has_postinc[mem_mode]
3895 = memory_address_addr_space_p (mem_mode, addr, as);
3897 if (has_postinc[mem_mode])
3898 data->ainc_costs[AINC_POST_INC]
3899 = address_cost (addr, mem_mode, as, speed);
3901 for (i = 0; i < 16; i++)
3903 sym_p = i & 1;
3904 var_p = (i >> 1) & 1;
3905 off_p = (i >> 2) & 1;
3906 rat_p = (i >> 3) & 1;
3908 addr = reg0;
3909 if (rat_p)
3910 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3911 gen_int_mode (rat, address_mode));
3913 if (var_p)
3914 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3916 if (sym_p)
3918 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3919 /* ??? We can run into trouble with some backends by presenting
3920 it with symbols which haven't been properly passed through
3921 targetm.encode_section_info. By setting the local bit, we
3922 enhance the probability of things working. */
3923 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3925 if (off_p)
3926 base = gen_rtx_fmt_e (CONST, address_mode,
3927 gen_rtx_fmt_ee
3928 (PLUS, address_mode, base,
3929 gen_int_mode (off, address_mode)));
3931 else if (off_p)
3932 base = gen_int_mode (off, address_mode);
3933 else
3934 base = NULL_RTX;
3936 if (base)
3937 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3939 start_sequence ();
3940 /* To avoid splitting addressing modes, pretend that no cse will
3941 follow. */
3942 old_cse_not_expected = cse_not_expected;
3943 cse_not_expected = true;
3944 addr = memory_address_addr_space (mem_mode, addr, as);
3945 cse_not_expected = old_cse_not_expected;
3946 seq = get_insns ();
3947 end_sequence ();
3949 acost = seq_cost (seq, speed);
3950 acost += address_cost (addr, mem_mode, as, speed);
3952 if (!acost)
3953 acost = 1;
3954 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3957 /* On some targets, it is quite expensive to load symbol to a register,
3958 which makes addresses that contain symbols look much more expensive.
3959 However, the symbol will have to be loaded in any case before the
3960 loop (and quite likely we have it in register already), so it does not
3961 make much sense to penalize them too heavily. So make some final
3962 tweaks for the SYMBOL_PRESENT modes:
3964 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3965 var is cheaper, use this mode with small penalty.
3966 If VAR_PRESENT is true, try whether the mode with
3967 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3968 if this is the case, use it. */
3969 add_c = add_cost (speed, address_mode);
3970 for (i = 0; i < 8; i++)
3972 var_p = i & 1;
3973 off_p = (i >> 1) & 1;
3974 rat_p = (i >> 2) & 1;
3976 acost = data->costs[0][1][off_p][rat_p] + 1;
3977 if (var_p)
3978 acost += add_c;
3980 if (acost < data->costs[1][var_p][off_p][rat_p])
3981 data->costs[1][var_p][off_p][rat_p] = acost;
3984 if (dump_file && (dump_flags & TDF_DETAILS))
3986 fprintf (dump_file, "Address costs:\n");
3988 for (i = 0; i < 16; i++)
3990 sym_p = i & 1;
3991 var_p = (i >> 1) & 1;
3992 off_p = (i >> 2) & 1;
3993 rat_p = (i >> 3) & 1;
3995 fprintf (dump_file, " ");
3996 if (sym_p)
3997 fprintf (dump_file, "sym + ");
3998 if (var_p)
3999 fprintf (dump_file, "var + ");
4000 if (off_p)
4001 fprintf (dump_file, "cst + ");
4002 if (rat_p)
4003 fprintf (dump_file, "rat * ");
4005 acost = data->costs[sym_p][var_p][off_p][rat_p];
4006 fprintf (dump_file, "index costs %d\n", acost);
4008 if (has_predec[mem_mode] || has_postdec[mem_mode]
4009 || has_preinc[mem_mode] || has_postinc[mem_mode])
4010 fprintf (dump_file, " May include autoinc/dec\n");
4011 fprintf (dump_file, "\n");
4014 address_cost_data_list[data_index] = data;
4017 bits = GET_MODE_BITSIZE (address_mode);
4018 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
4019 offset &= mask;
4020 if ((offset >> (bits - 1) & 1))
4021 offset |= ~mask;
4022 s_offset = offset;
4024 autoinc = false;
4025 autoinc_type = AINC_NONE;
4026 msize = GET_MODE_SIZE (mem_mode);
4027 autoinc_offset = offset;
4028 if (stmt_after_inc)
4029 autoinc_offset += ratio * cstep;
4030 if (symbol_present || var_present || ratio != 1)
4031 autoinc = false;
4032 else
4034 if (has_postinc[mem_mode] && autoinc_offset == 0
4035 && msize == cstep)
4036 autoinc_type = AINC_POST_INC;
4037 else if (has_postdec[mem_mode] && autoinc_offset == 0
4038 && msize == -cstep)
4039 autoinc_type = AINC_POST_DEC;
4040 else if (has_preinc[mem_mode] && autoinc_offset == msize
4041 && msize == cstep)
4042 autoinc_type = AINC_PRE_INC;
4043 else if (has_predec[mem_mode] && autoinc_offset == -msize
4044 && msize == -cstep)
4045 autoinc_type = AINC_PRE_DEC;
4047 if (autoinc_type != AINC_NONE)
4048 autoinc = true;
4051 cost = 0;
4052 offset_p = (s_offset != 0
4053 && data->min_offset <= s_offset
4054 && s_offset <= data->max_offset);
4055 ratio_p = (ratio != 1
4056 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
4058 if (ratio != 1 && !ratio_p)
4059 cost += mult_by_coeff_cost (ratio, address_mode, speed);
4061 if (s_offset && !offset_p && !symbol_present)
4062 cost += add_cost (speed, address_mode);
4064 if (may_autoinc)
4065 *may_autoinc = autoinc;
4066 if (autoinc)
4067 acost = data->ainc_costs[autoinc_type];
4068 else
4069 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
4070 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
4071 return new_cost (cost + acost, complexity);
4074 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
4075 the EXPR operand holding the shift. COST0 and COST1 are the costs for
4076 calculating the operands of EXPR. Returns true if successful, and returns
4077 the cost in COST. */
4079 static bool
4080 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
4081 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4083 comp_cost res;
4084 tree op1 = TREE_OPERAND (expr, 1);
4085 tree cst = TREE_OPERAND (mult, 1);
4086 tree multop = TREE_OPERAND (mult, 0);
4087 int m = exact_log2 (int_cst_value (cst));
4088 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4089 int as_cost, sa_cost;
4090 bool mult_in_op1;
4092 if (!(m >= 0 && m < maxm))
4093 return false;
4095 STRIP_NOPS (op1);
4096 mult_in_op1 = operand_equal_p (op1, mult, 0);
4098 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4100 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4101 use that in preference to a shift insn followed by an add insn. */
4102 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4103 ? shiftadd_cost (speed, mode, m)
4104 : (mult_in_op1
4105 ? shiftsub1_cost (speed, mode, m)
4106 : shiftsub0_cost (speed, mode, m)));
4108 res = new_cost (MIN (as_cost, sa_cost), 0);
4109 res = add_costs (res, mult_in_op1 ? cost0 : cost1);
4111 STRIP_NOPS (multop);
4112 if (!is_gimple_val (multop))
4113 res = add_costs (res, force_expr_to_var_cost (multop, speed));
4115 *cost = res;
4116 return true;
4119 /* Estimates cost of forcing expression EXPR into a variable. */
4121 static comp_cost
4122 force_expr_to_var_cost (tree expr, bool speed)
4124 static bool costs_initialized = false;
4125 static unsigned integer_cost [2];
4126 static unsigned symbol_cost [2];
4127 static unsigned address_cost [2];
4128 tree op0, op1;
4129 comp_cost cost0, cost1, cost;
4130 machine_mode mode;
4132 if (!costs_initialized)
4134 tree type = build_pointer_type (integer_type_node);
4135 tree var, addr;
4136 rtx x;
4137 int i;
4139 var = create_tmp_var_raw (integer_type_node, "test_var");
4140 TREE_STATIC (var) = 1;
4141 x = produce_memory_decl_rtl (var, NULL);
4142 SET_DECL_RTL (var, x);
4144 addr = build1 (ADDR_EXPR, type, var);
4147 for (i = 0; i < 2; i++)
4149 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4150 2000), i);
4152 symbol_cost[i] = computation_cost (addr, i) + 1;
4154 address_cost[i]
4155 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4156 if (dump_file && (dump_flags & TDF_DETAILS))
4158 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4159 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
4160 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
4161 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
4162 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
4163 fprintf (dump_file, "\n");
4167 costs_initialized = true;
4170 STRIP_NOPS (expr);
4172 if (SSA_VAR_P (expr))
4173 return no_cost;
4175 if (is_gimple_min_invariant (expr))
4177 if (TREE_CODE (expr) == INTEGER_CST)
4178 return new_cost (integer_cost [speed], 0);
4180 if (TREE_CODE (expr) == ADDR_EXPR)
4182 tree obj = TREE_OPERAND (expr, 0);
4184 if (TREE_CODE (obj) == VAR_DECL
4185 || TREE_CODE (obj) == PARM_DECL
4186 || TREE_CODE (obj) == RESULT_DECL)
4187 return new_cost (symbol_cost [speed], 0);
4190 return new_cost (address_cost [speed], 0);
4193 switch (TREE_CODE (expr))
4195 case POINTER_PLUS_EXPR:
4196 case PLUS_EXPR:
4197 case MINUS_EXPR:
4198 case MULT_EXPR:
4199 op0 = TREE_OPERAND (expr, 0);
4200 op1 = TREE_OPERAND (expr, 1);
4201 STRIP_NOPS (op0);
4202 STRIP_NOPS (op1);
4203 break;
4205 CASE_CONVERT:
4206 case NEGATE_EXPR:
4207 op0 = TREE_OPERAND (expr, 0);
4208 STRIP_NOPS (op0);
4209 op1 = NULL_TREE;
4210 break;
4212 default:
4213 /* Just an arbitrary value, FIXME. */
4214 return new_cost (target_spill_cost[speed], 0);
4217 if (op0 == NULL_TREE
4218 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4219 cost0 = no_cost;
4220 else
4221 cost0 = force_expr_to_var_cost (op0, speed);
4223 if (op1 == NULL_TREE
4224 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4225 cost1 = no_cost;
4226 else
4227 cost1 = force_expr_to_var_cost (op1, speed);
4229 mode = TYPE_MODE (TREE_TYPE (expr));
4230 switch (TREE_CODE (expr))
4232 case POINTER_PLUS_EXPR:
4233 case PLUS_EXPR:
4234 case MINUS_EXPR:
4235 case NEGATE_EXPR:
4236 cost = new_cost (add_cost (speed, mode), 0);
4237 if (TREE_CODE (expr) != NEGATE_EXPR)
4239 tree mult = NULL_TREE;
4240 comp_cost sa_cost;
4241 if (TREE_CODE (op1) == MULT_EXPR)
4242 mult = op1;
4243 else if (TREE_CODE (op0) == MULT_EXPR)
4244 mult = op0;
4246 if (mult != NULL_TREE
4247 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4248 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4249 speed, &sa_cost))
4250 return sa_cost;
4252 break;
4254 CASE_CONVERT:
4256 tree inner_mode, outer_mode;
4257 outer_mode = TREE_TYPE (expr);
4258 inner_mode = TREE_TYPE (op0);
4259 cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
4260 TYPE_MODE (inner_mode), speed), 0);
4262 break;
4264 case MULT_EXPR:
4265 if (cst_and_fits_in_hwi (op0))
4266 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
4267 mode, speed), 0);
4268 else if (cst_and_fits_in_hwi (op1))
4269 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
4270 mode, speed), 0);
4271 else
4272 return new_cost (target_spill_cost [speed], 0);
4273 break;
4275 default:
4276 gcc_unreachable ();
4279 cost = add_costs (cost, cost0);
4280 cost = add_costs (cost, cost1);
4282 /* Bound the cost by target_spill_cost. The parts of complicated
4283 computations often are either loop invariant or at least can
4284 be shared between several iv uses, so letting this grow without
4285 limits would not give reasonable results. */
4286 if (cost.cost > (int) target_spill_cost [speed])
4287 cost.cost = target_spill_cost [speed];
4289 return cost;
4292 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
4293 invariants the computation depends on. */
4295 static comp_cost
4296 force_var_cost (struct ivopts_data *data,
4297 tree expr, bitmap *depends_on)
4299 if (depends_on)
4301 fd_ivopts_data = data;
4302 walk_tree (&expr, find_depends, depends_on, NULL);
4305 return force_expr_to_var_cost (expr, data->speed);
4308 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
4309 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4310 to false if the corresponding part is missing. DEPENDS_ON is a set of the
4311 invariants the computation depends on. */
4313 static comp_cost
4314 split_address_cost (struct ivopts_data *data,
4315 tree addr, bool *symbol_present, bool *var_present,
4316 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4318 tree core;
4319 HOST_WIDE_INT bitsize;
4320 HOST_WIDE_INT bitpos;
4321 tree toffset;
4322 machine_mode mode;
4323 int unsignedp, volatilep;
4325 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4326 &unsignedp, &volatilep, false);
4328 if (toffset != 0
4329 || bitpos % BITS_PER_UNIT != 0
4330 || TREE_CODE (core) != VAR_DECL)
4332 *symbol_present = false;
4333 *var_present = true;
4334 fd_ivopts_data = data;
4335 walk_tree (&addr, find_depends, depends_on, NULL);
4336 return new_cost (target_spill_cost[data->speed], 0);
4339 *offset += bitpos / BITS_PER_UNIT;
4340 if (TREE_STATIC (core)
4341 || DECL_EXTERNAL (core))
4343 *symbol_present = true;
4344 *var_present = false;
4345 return no_cost;
4348 *symbol_present = false;
4349 *var_present = true;
4350 return no_cost;
4353 /* Estimates cost of expressing difference of addresses E1 - E2 as
4354 var + symbol + offset. The value of offset is added to OFFSET,
4355 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4356 part is missing. DEPENDS_ON is a set of the invariants the computation
4357 depends on. */
4359 static comp_cost
4360 ptr_difference_cost (struct ivopts_data *data,
4361 tree e1, tree e2, bool *symbol_present, bool *var_present,
4362 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4364 HOST_WIDE_INT diff = 0;
4365 aff_tree aff_e1, aff_e2;
4366 tree type;
4368 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4370 if (ptr_difference_const (e1, e2, &diff))
4372 *offset += diff;
4373 *symbol_present = false;
4374 *var_present = false;
4375 return no_cost;
4378 if (integer_zerop (e2))
4379 return split_address_cost (data, TREE_OPERAND (e1, 0),
4380 symbol_present, var_present, offset, depends_on);
4382 *symbol_present = false;
4383 *var_present = true;
4385 type = signed_type_for (TREE_TYPE (e1));
4386 tree_to_aff_combination (e1, type, &aff_e1);
4387 tree_to_aff_combination (e2, type, &aff_e2);
4388 aff_combination_scale (&aff_e2, -1);
4389 aff_combination_add (&aff_e1, &aff_e2);
4391 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4394 /* Estimates cost of expressing difference E1 - E2 as
4395 var + symbol + offset. The value of offset is added to OFFSET,
4396 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4397 part is missing. DEPENDS_ON is a set of the invariants the computation
4398 depends on. */
4400 static comp_cost
4401 difference_cost (struct ivopts_data *data,
4402 tree e1, tree e2, bool *symbol_present, bool *var_present,
4403 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4405 machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4406 unsigned HOST_WIDE_INT off1, off2;
4407 aff_tree aff_e1, aff_e2;
4408 tree type;
4410 e1 = strip_offset (e1, &off1);
4411 e2 = strip_offset (e2, &off2);
4412 *offset += off1 - off2;
4414 STRIP_NOPS (e1);
4415 STRIP_NOPS (e2);
4417 if (TREE_CODE (e1) == ADDR_EXPR)
4418 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4419 offset, depends_on);
4420 *symbol_present = false;
4422 if (operand_equal_p (e1, e2, 0))
4424 *var_present = false;
4425 return no_cost;
4428 *var_present = true;
4430 if (integer_zerop (e2))
4431 return force_var_cost (data, e1, depends_on);
4433 if (integer_zerop (e1))
4435 comp_cost cost = force_var_cost (data, e2, depends_on);
4436 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
4437 return cost;
4440 type = signed_type_for (TREE_TYPE (e1));
4441 tree_to_aff_combination (e1, type, &aff_e1);
4442 tree_to_aff_combination (e2, type, &aff_e2);
4443 aff_combination_scale (&aff_e2, -1);
4444 aff_combination_add (&aff_e1, &aff_e2);
4446 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4449 /* Returns true if AFF1 and AFF2 are identical. */
4451 static bool
4452 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4454 unsigned i;
4456 if (aff1->n != aff2->n)
4457 return false;
4459 for (i = 0; i < aff1->n; i++)
4461 if (aff1->elts[i].coef != aff2->elts[i].coef)
4462 return false;
4464 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4465 return false;
4467 return true;
4470 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
4472 static int
4473 get_expr_id (struct ivopts_data *data, tree expr)
4475 struct iv_inv_expr_ent ent;
4476 struct iv_inv_expr_ent **slot;
4478 ent.expr = expr;
4479 ent.hash = iterative_hash_expr (expr, 0);
4480 slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4481 if (*slot)
4482 return (*slot)->id;
4484 *slot = XNEW (struct iv_inv_expr_ent);
4485 (*slot)->expr = expr;
4486 (*slot)->hash = ent.hash;
4487 (*slot)->id = data->inv_expr_id++;
4488 return (*slot)->id;
4491 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4492 requires a new compiler generated temporary. Returns -1 otherwise.
4493 ADDRESS_P is a flag indicating if the expression is for address
4494 computation. */
4496 static int
4497 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4498 tree cbase, HOST_WIDE_INT ratio,
4499 bool address_p)
4501 aff_tree ubase_aff, cbase_aff;
4502 tree expr, ub, cb;
4504 STRIP_NOPS (ubase);
4505 STRIP_NOPS (cbase);
4506 ub = ubase;
4507 cb = cbase;
4509 if ((TREE_CODE (ubase) == INTEGER_CST)
4510 && (TREE_CODE (cbase) == INTEGER_CST))
4511 return -1;
4513 /* Strips the constant part. */
4514 if (TREE_CODE (ubase) == PLUS_EXPR
4515 || TREE_CODE (ubase) == MINUS_EXPR
4516 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4518 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4519 ubase = TREE_OPERAND (ubase, 0);
4522 /* Strips the constant part. */
4523 if (TREE_CODE (cbase) == PLUS_EXPR
4524 || TREE_CODE (cbase) == MINUS_EXPR
4525 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4527 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4528 cbase = TREE_OPERAND (cbase, 0);
4531 if (address_p)
4533 if (((TREE_CODE (ubase) == SSA_NAME)
4534 || (TREE_CODE (ubase) == ADDR_EXPR
4535 && is_gimple_min_invariant (ubase)))
4536 && (TREE_CODE (cbase) == INTEGER_CST))
4537 return -1;
4539 if (((TREE_CODE (cbase) == SSA_NAME)
4540 || (TREE_CODE (cbase) == ADDR_EXPR
4541 && is_gimple_min_invariant (cbase)))
4542 && (TREE_CODE (ubase) == INTEGER_CST))
4543 return -1;
4546 if (ratio == 1)
4548 if (operand_equal_p (ubase, cbase, 0))
4549 return -1;
4551 if (TREE_CODE (ubase) == ADDR_EXPR
4552 && TREE_CODE (cbase) == ADDR_EXPR)
4554 tree usym, csym;
4556 usym = TREE_OPERAND (ubase, 0);
4557 csym = TREE_OPERAND (cbase, 0);
4558 if (TREE_CODE (usym) == ARRAY_REF)
4560 tree ind = TREE_OPERAND (usym, 1);
4561 if (TREE_CODE (ind) == INTEGER_CST
4562 && tree_fits_shwi_p (ind)
4563 && tree_to_shwi (ind) == 0)
4564 usym = TREE_OPERAND (usym, 0);
4566 if (TREE_CODE (csym) == ARRAY_REF)
4568 tree ind = TREE_OPERAND (csym, 1);
4569 if (TREE_CODE (ind) == INTEGER_CST
4570 && tree_fits_shwi_p (ind)
4571 && tree_to_shwi (ind) == 0)
4572 csym = TREE_OPERAND (csym, 0);
4574 if (operand_equal_p (usym, csym, 0))
4575 return -1;
4577 /* Now do more complex comparison */
4578 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4579 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4580 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4581 return -1;
4584 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4585 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4587 aff_combination_scale (&cbase_aff, -1 * ratio);
4588 aff_combination_add (&ubase_aff, &cbase_aff);
4589 expr = aff_combination_to_tree (&ubase_aff);
4590 return get_expr_id (data, expr);
4595 /* Determines the cost of the computation by that USE is expressed
4596 from induction variable CAND. If ADDRESS_P is true, we just need
4597 to create an address from it, otherwise we want to get it into
4598 register. A set of invariants we depend on is stored in
4599 DEPENDS_ON. AT is the statement at that the value is computed.
4600 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4601 addressing is likely. */
4603 static comp_cost
4604 get_computation_cost_at (struct ivopts_data *data,
4605 struct iv_use *use, struct iv_cand *cand,
4606 bool address_p, bitmap *depends_on, gimple *at,
4607 bool *can_autoinc,
4608 int *inv_expr_id)
4610 tree ubase = use->iv->base, ustep = use->iv->step;
4611 tree cbase, cstep;
4612 tree utype = TREE_TYPE (ubase), ctype;
4613 unsigned HOST_WIDE_INT cstepi, offset = 0;
4614 HOST_WIDE_INT ratio, aratio;
4615 bool var_present, symbol_present, stmt_is_after_inc;
4616 comp_cost cost;
4617 widest_int rat;
4618 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4619 machine_mode mem_mode = (address_p
4620 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4621 : VOIDmode);
4623 *depends_on = NULL;
4625 /* Only consider real candidates. */
4626 if (!cand->iv)
4627 return infinite_cost;
4629 cbase = cand->iv->base;
4630 cstep = cand->iv->step;
4631 ctype = TREE_TYPE (cbase);
4633 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4635 /* We do not have a precision to express the values of use. */
4636 return infinite_cost;
4639 if (address_p
4640 || (use->iv->base_object
4641 && cand->iv->base_object
4642 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4643 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4645 /* Do not try to express address of an object with computation based
4646 on address of a different object. This may cause problems in rtl
4647 level alias analysis (that does not expect this to be happening,
4648 as this is illegal in C), and would be unlikely to be useful
4649 anyway. */
4650 if (use->iv->base_object
4651 && cand->iv->base_object
4652 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4653 return infinite_cost;
4656 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4658 /* TODO -- add direct handling of this case. */
4659 goto fallback;
4662 /* CSTEPI is removed from the offset in case statement is after the
4663 increment. If the step is not constant, we use zero instead.
4664 This is a bit imprecise (there is the extra addition), but
4665 redundancy elimination is likely to transform the code so that
4666 it uses value of the variable before increment anyway,
4667 so it is not that much unrealistic. */
4668 if (cst_and_fits_in_hwi (cstep))
4669 cstepi = int_cst_value (cstep);
4670 else
4671 cstepi = 0;
4673 if (!constant_multiple_of (ustep, cstep, &rat))
4674 return infinite_cost;
4676 if (wi::fits_shwi_p (rat))
4677 ratio = rat.to_shwi ();
4678 else
4679 return infinite_cost;
4681 STRIP_NOPS (cbase);
4682 ctype = TREE_TYPE (cbase);
4684 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4686 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4687 or ratio == 1, it is better to handle this like
4689 ubase - ratio * cbase + ratio * var
4691 (also holds in the case ratio == -1, TODO. */
4693 if (cst_and_fits_in_hwi (cbase))
4695 offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4696 cost = difference_cost (data,
4697 ubase, build_int_cst (utype, 0),
4698 &symbol_present, &var_present, &offset,
4699 depends_on);
4700 cost.cost /= avg_loop_niter (data->current_loop);
4702 else if (ratio == 1)
4704 tree real_cbase = cbase;
4706 /* Check to see if any adjustment is needed. */
4707 if (cstepi == 0 && stmt_is_after_inc)
4709 aff_tree real_cbase_aff;
4710 aff_tree cstep_aff;
4712 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4713 &real_cbase_aff);
4714 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4716 aff_combination_add (&real_cbase_aff, &cstep_aff);
4717 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4720 cost = difference_cost (data,
4721 ubase, real_cbase,
4722 &symbol_present, &var_present, &offset,
4723 depends_on);
4724 cost.cost /= avg_loop_niter (data->current_loop);
4726 else if (address_p
4727 && !POINTER_TYPE_P (ctype)
4728 && multiplier_allowed_in_address_p
4729 (ratio, mem_mode,
4730 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4732 if (cstepi == 0 && stmt_is_after_inc)
4734 if (POINTER_TYPE_P (ctype))
4735 cbase = fold_build2 (POINTER_PLUS_EXPR, ctype, cbase, cstep);
4736 else
4737 cbase = fold_build2 (PLUS_EXPR, ctype, cbase, cstep);
4739 cbase
4740 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4741 cost = difference_cost (data,
4742 ubase, cbase,
4743 &symbol_present, &var_present, &offset,
4744 depends_on);
4745 cost.cost /= avg_loop_niter (data->current_loop);
4747 else
4749 cost = force_var_cost (data, cbase, depends_on);
4750 cost = add_costs (cost,
4751 difference_cost (data,
4752 ubase, build_int_cst (utype, 0),
4753 &symbol_present, &var_present,
4754 &offset, depends_on));
4755 cost.cost /= avg_loop_niter (data->current_loop);
4756 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4759 /* Set of invariants depended on by sub use has already been computed
4760 for the first use in the group. */
4761 if (use->sub_id)
4763 cost.cost = 0;
4764 if (depends_on && *depends_on)
4765 bitmap_clear (*depends_on);
4767 else if (inv_expr_id)
4769 *inv_expr_id =
4770 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4771 /* Clear depends on. */
4772 if (*inv_expr_id != -1 && depends_on && *depends_on)
4773 bitmap_clear (*depends_on);
4776 /* If we are after the increment, the value of the candidate is higher by
4777 one iteration. */
4778 if (stmt_is_after_inc)
4779 offset -= ratio * cstepi;
4781 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4782 (symbol/var1/const parts may be omitted). If we are looking for an
4783 address, find the cost of addressing this. */
4784 if (address_p)
4785 return add_costs (cost,
4786 get_address_cost (symbol_present, var_present,
4787 offset, ratio, cstepi,
4788 mem_mode,
4789 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4790 speed, stmt_is_after_inc,
4791 can_autoinc));
4793 /* Otherwise estimate the costs for computing the expression. */
4794 if (!symbol_present && !var_present && !offset)
4796 if (ratio != 1)
4797 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4798 return cost;
4801 /* Symbol + offset should be compile-time computable so consider that they
4802 are added once to the variable, if present. */
4803 if (var_present && (symbol_present || offset))
4804 cost.cost += adjust_setup_cost (data,
4805 add_cost (speed, TYPE_MODE (ctype)));
4807 /* Having offset does not affect runtime cost in case it is added to
4808 symbol, but it increases complexity. */
4809 if (offset)
4810 cost.complexity++;
4812 cost.cost += add_cost (speed, TYPE_MODE (ctype));
4814 aratio = ratio > 0 ? ratio : -ratio;
4815 if (aratio != 1)
4816 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4817 return cost;
4819 fallback:
4820 if (can_autoinc)
4821 *can_autoinc = false;
4824 /* Just get the expression, expand it and measure the cost. */
4825 tree comp = get_computation_at (data->current_loop, use, cand, at);
4827 if (!comp)
4828 return infinite_cost;
4830 if (address_p)
4831 comp = build_simple_mem_ref (comp);
4833 return new_cost (computation_cost (comp, speed), 0);
4837 /* Determines the cost of the computation by that USE is expressed
4838 from induction variable CAND. If ADDRESS_P is true, we just need
4839 to create an address from it, otherwise we want to get it into
4840 register. A set of invariants we depend on is stored in
4841 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4842 autoinc addressing is likely. */
4844 static comp_cost
4845 get_computation_cost (struct ivopts_data *data,
4846 struct iv_use *use, struct iv_cand *cand,
4847 bool address_p, bitmap *depends_on,
4848 bool *can_autoinc, int *inv_expr_id)
4850 return get_computation_cost_at (data,
4851 use, cand, address_p, depends_on, use->stmt,
4852 can_autoinc, inv_expr_id);
4855 /* Determines cost of basing replacement of USE on CAND in a generic
4856 expression. */
4858 static bool
4859 determine_use_iv_cost_generic (struct ivopts_data *data,
4860 struct iv_use *use, struct iv_cand *cand)
4862 bitmap depends_on;
4863 comp_cost cost;
4864 int inv_expr_id = -1;
4866 /* The simple case first -- if we need to express value of the preserved
4867 original biv, the cost is 0. This also prevents us from counting the
4868 cost of increment twice -- once at this use and once in the cost of
4869 the candidate. */
4870 if (cand->pos == IP_ORIGINAL
4871 && cand->incremented_at == use->stmt)
4873 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4874 ERROR_MARK, -1);
4875 return true;
4878 cost = get_computation_cost (data, use, cand, false, &depends_on,
4879 NULL, &inv_expr_id);
4881 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4882 inv_expr_id);
4884 return !infinite_cost_p (cost);
4887 /* Determines cost of basing replacement of USE on CAND in an address. */
4889 static bool
4890 determine_use_iv_cost_address (struct ivopts_data *data,
4891 struct iv_use *use, struct iv_cand *cand)
4893 bitmap depends_on;
4894 bool can_autoinc;
4895 int inv_expr_id = -1;
4896 struct iv_use *sub_use;
4897 comp_cost sub_cost;
4898 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4899 &can_autoinc, &inv_expr_id);
4901 if (cand->ainc_use == use)
4903 if (can_autoinc)
4904 cost.cost -= cand->cost_step;
4905 /* If we generated the candidate solely for exploiting autoincrement
4906 opportunities, and it turns out it can't be used, set the cost to
4907 infinity to make sure we ignore it. */
4908 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4909 cost = infinite_cost;
4911 for (sub_use = use->next;
4912 sub_use && !infinite_cost_p (cost);
4913 sub_use = sub_use->next)
4915 sub_cost = get_computation_cost (data, sub_use, cand, true, &depends_on,
4916 &can_autoinc, &inv_expr_id);
4917 cost = add_costs (cost, sub_cost);
4920 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4921 inv_expr_id);
4923 return !infinite_cost_p (cost);
4926 /* Computes value of candidate CAND at position AT in iteration NITER, and
4927 stores it to VAL. */
4929 static void
4930 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
4931 aff_tree *val)
4933 aff_tree step, delta, nit;
4934 struct iv *iv = cand->iv;
4935 tree type = TREE_TYPE (iv->base);
4936 tree steptype = type;
4937 if (POINTER_TYPE_P (type))
4938 steptype = sizetype;
4939 steptype = unsigned_type_for (type);
4941 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4942 aff_combination_convert (&step, steptype);
4943 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4944 aff_combination_convert (&nit, steptype);
4945 aff_combination_mult (&nit, &step, &delta);
4946 if (stmt_after_increment (loop, cand, at))
4947 aff_combination_add (&delta, &step);
4949 tree_to_aff_combination (iv->base, type, val);
4950 if (!POINTER_TYPE_P (type))
4951 aff_combination_convert (val, steptype);
4952 aff_combination_add (val, &delta);
4955 /* Returns period of induction variable iv. */
4957 static tree
4958 iv_period (struct iv *iv)
4960 tree step = iv->step, period, type;
4961 tree pow2div;
4963 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4965 type = unsigned_type_for (TREE_TYPE (step));
4966 /* Period of the iv is lcm (step, type_range)/step -1,
4967 i.e., N*type_range/step - 1. Since type range is power
4968 of two, N == (step >> num_of_ending_zeros_binary (step),
4969 so the final result is
4971 (type_range >> num_of_ending_zeros_binary (step)) - 1
4974 pow2div = num_ending_zeros (step);
4976 period = build_low_bits_mask (type,
4977 (TYPE_PRECISION (type)
4978 - tree_to_uhwi (pow2div)));
4980 return period;
4983 /* Returns the comparison operator used when eliminating the iv USE. */
4985 static enum tree_code
4986 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4988 struct loop *loop = data->current_loop;
4989 basic_block ex_bb;
4990 edge exit;
4992 ex_bb = gimple_bb (use->stmt);
4993 exit = EDGE_SUCC (ex_bb, 0);
4994 if (flow_bb_inside_loop_p (loop, exit->dest))
4995 exit = EDGE_SUCC (ex_bb, 1);
4997 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5000 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
5001 we only detect the situation that BASE = SOMETHING + OFFSET, where the
5002 calculation is performed in non-wrapping type.
5004 TODO: More generally, we could test for the situation that
5005 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5006 This would require knowing the sign of OFFSET. */
5008 static bool
5009 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5011 enum tree_code code;
5012 tree e1, e2;
5013 aff_tree aff_e1, aff_e2, aff_offset;
5015 if (!nowrap_type_p (TREE_TYPE (base)))
5016 return false;
5018 base = expand_simple_operations (base);
5020 if (TREE_CODE (base) == SSA_NAME)
5022 gimple *stmt = SSA_NAME_DEF_STMT (base);
5024 if (gimple_code (stmt) != GIMPLE_ASSIGN)
5025 return false;
5027 code = gimple_assign_rhs_code (stmt);
5028 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5029 return false;
5031 e1 = gimple_assign_rhs1 (stmt);
5032 e2 = gimple_assign_rhs2 (stmt);
5034 else
5036 code = TREE_CODE (base);
5037 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5038 return false;
5039 e1 = TREE_OPERAND (base, 0);
5040 e2 = TREE_OPERAND (base, 1);
5043 /* Use affine expansion as deeper inspection to prove the equality. */
5044 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5045 &aff_e2, &data->name_expansion_cache);
5046 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5047 &aff_offset, &data->name_expansion_cache);
5048 aff_combination_scale (&aff_offset, -1);
5049 switch (code)
5051 case PLUS_EXPR:
5052 aff_combination_add (&aff_e2, &aff_offset);
5053 if (aff_combination_zero_p (&aff_e2))
5054 return true;
5056 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5057 &aff_e1, &data->name_expansion_cache);
5058 aff_combination_add (&aff_e1, &aff_offset);
5059 return aff_combination_zero_p (&aff_e1);
5061 case POINTER_PLUS_EXPR:
5062 aff_combination_add (&aff_e2, &aff_offset);
5063 return aff_combination_zero_p (&aff_e2);
5065 default:
5066 return false;
5070 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5071 comparison with CAND. NITER describes the number of iterations of
5072 the loops. If successful, the comparison in COMP_P is altered accordingly.
5074 We aim to handle the following situation:
5076 sometype *base, *p;
5077 int a, b, i;
5079 i = a;
5080 p = p_0 = base + a;
5084 bla (*p);
5085 p++;
5086 i++;
5088 while (i < b);
5090 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5091 We aim to optimize this to
5093 p = p_0 = base + a;
5096 bla (*p);
5097 p++;
5099 while (p < p_0 - a + b);
5101 This preserves the correctness, since the pointer arithmetics does not
5102 overflow. More precisely:
5104 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5105 overflow in computing it or the values of p.
5106 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5107 overflow. To prove this, we use the fact that p_0 = base + a. */
5109 static bool
5110 iv_elimination_compare_lt (struct ivopts_data *data,
5111 struct iv_cand *cand, enum tree_code *comp_p,
5112 struct tree_niter_desc *niter)
5114 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5115 struct aff_tree nit, tmpa, tmpb;
5116 enum tree_code comp;
5117 HOST_WIDE_INT step;
5119 /* We need to know that the candidate induction variable does not overflow.
5120 While more complex analysis may be used to prove this, for now just
5121 check that the variable appears in the original program and that it
5122 is computed in a type that guarantees no overflows. */
5123 cand_type = TREE_TYPE (cand->iv->base);
5124 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5125 return false;
5127 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5128 the calculation of the BOUND could overflow, making the comparison
5129 invalid. */
5130 if (!data->loop_single_exit_p)
5131 return false;
5133 /* We need to be able to decide whether candidate is increasing or decreasing
5134 in order to choose the right comparison operator. */
5135 if (!cst_and_fits_in_hwi (cand->iv->step))
5136 return false;
5137 step = int_cst_value (cand->iv->step);
5139 /* Check that the number of iterations matches the expected pattern:
5140 a + 1 > b ? 0 : b - a - 1. */
5141 mbz = niter->may_be_zero;
5142 if (TREE_CODE (mbz) == GT_EXPR)
5144 /* Handle a + 1 > b. */
5145 tree op0 = TREE_OPERAND (mbz, 0);
5146 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5148 a = TREE_OPERAND (op0, 0);
5149 b = TREE_OPERAND (mbz, 1);
5151 else
5152 return false;
5154 else if (TREE_CODE (mbz) == LT_EXPR)
5156 tree op1 = TREE_OPERAND (mbz, 1);
5158 /* Handle b < a + 1. */
5159 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5161 a = TREE_OPERAND (op1, 0);
5162 b = TREE_OPERAND (mbz, 0);
5164 else
5165 return false;
5167 else
5168 return false;
5170 /* Expected number of iterations is B - A - 1. Check that it matches
5171 the actual number, i.e., that B - A - NITER = 1. */
5172 tree_to_aff_combination (niter->niter, nit_type, &nit);
5173 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5174 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5175 aff_combination_scale (&nit, -1);
5176 aff_combination_scale (&tmpa, -1);
5177 aff_combination_add (&tmpb, &tmpa);
5178 aff_combination_add (&tmpb, &nit);
5179 if (tmpb.n != 0 || tmpb.offset != 1)
5180 return false;
5182 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5183 overflow. */
5184 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5185 cand->iv->step,
5186 fold_convert (TREE_TYPE (cand->iv->step), a));
5187 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5188 return false;
5190 /* Determine the new comparison operator. */
5191 comp = step < 0 ? GT_EXPR : LT_EXPR;
5192 if (*comp_p == NE_EXPR)
5193 *comp_p = comp;
5194 else if (*comp_p == EQ_EXPR)
5195 *comp_p = invert_tree_comparison (comp, false);
5196 else
5197 gcc_unreachable ();
5199 return true;
5202 /* Check whether it is possible to express the condition in USE by comparison
5203 of candidate CAND. If so, store the value compared with to BOUND, and the
5204 comparison operator to COMP. */
5206 static bool
5207 may_eliminate_iv (struct ivopts_data *data,
5208 struct iv_use *use, struct iv_cand *cand, tree *bound,
5209 enum tree_code *comp)
5211 basic_block ex_bb;
5212 edge exit;
5213 tree period;
5214 struct loop *loop = data->current_loop;
5215 aff_tree bnd;
5216 struct tree_niter_desc *desc = NULL;
5218 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5219 return false;
5221 /* For now works only for exits that dominate the loop latch.
5222 TODO: extend to other conditions inside loop body. */
5223 ex_bb = gimple_bb (use->stmt);
5224 if (use->stmt != last_stmt (ex_bb)
5225 || gimple_code (use->stmt) != GIMPLE_COND
5226 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5227 return false;
5229 exit = EDGE_SUCC (ex_bb, 0);
5230 if (flow_bb_inside_loop_p (loop, exit->dest))
5231 exit = EDGE_SUCC (ex_bb, 1);
5232 if (flow_bb_inside_loop_p (loop, exit->dest))
5233 return false;
5235 desc = niter_for_exit (data, exit);
5236 if (!desc)
5237 return false;
5239 /* Determine whether we can use the variable to test the exit condition.
5240 This is the case iff the period of the induction variable is greater
5241 than the number of iterations for which the exit condition is true. */
5242 period = iv_period (cand->iv);
5244 /* If the number of iterations is constant, compare against it directly. */
5245 if (TREE_CODE (desc->niter) == INTEGER_CST)
5247 /* See cand_value_at. */
5248 if (stmt_after_increment (loop, cand, use->stmt))
5250 if (!tree_int_cst_lt (desc->niter, period))
5251 return false;
5253 else
5255 if (tree_int_cst_lt (period, desc->niter))
5256 return false;
5260 /* If not, and if this is the only possible exit of the loop, see whether
5261 we can get a conservative estimate on the number of iterations of the
5262 entire loop and compare against that instead. */
5263 else
5265 widest_int period_value, max_niter;
5267 max_niter = desc->max;
5268 if (stmt_after_increment (loop, cand, use->stmt))
5269 max_niter += 1;
5270 period_value = wi::to_widest (period);
5271 if (wi::gtu_p (max_niter, period_value))
5273 /* See if we can take advantage of inferred loop bound information. */
5274 if (data->loop_single_exit_p)
5276 if (!max_loop_iterations (loop, &max_niter))
5277 return false;
5278 /* The loop bound is already adjusted by adding 1. */
5279 if (wi::gtu_p (max_niter, period_value))
5280 return false;
5282 else
5283 return false;
5287 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5289 *bound = fold_convert (TREE_TYPE (cand->iv->base),
5290 aff_combination_to_tree (&bnd));
5291 *comp = iv_elimination_compare (data, use);
5293 /* It is unlikely that computing the number of iterations using division
5294 would be more profitable than keeping the original induction variable. */
5295 if (expression_expensive_p (*bound))
5296 return false;
5298 /* Sometimes, it is possible to handle the situation that the number of
5299 iterations may be zero unless additional assumtions by using <
5300 instead of != in the exit condition.
5302 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5303 base the exit condition on it. However, that is often too
5304 expensive. */
5305 if (!integer_zerop (desc->may_be_zero))
5306 return iv_elimination_compare_lt (data, cand, comp, desc);
5308 return true;
5311 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5312 be copied, if it is used in the loop body and DATA->body_includes_call. */
5314 static int
5315 parm_decl_cost (struct ivopts_data *data, tree bound)
5317 tree sbound = bound;
5318 STRIP_NOPS (sbound);
5320 if (TREE_CODE (sbound) == SSA_NAME
5321 && SSA_NAME_IS_DEFAULT_DEF (sbound)
5322 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5323 && data->body_includes_call)
5324 return COSTS_N_INSNS (1);
5326 return 0;
5329 /* Determines cost of basing replacement of USE on CAND in a condition. */
5331 static bool
5332 determine_use_iv_cost_condition (struct ivopts_data *data,
5333 struct iv_use *use, struct iv_cand *cand)
5335 tree bound = NULL_TREE;
5336 struct iv *cmp_iv;
5337 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
5338 comp_cost elim_cost, express_cost, cost, bound_cost;
5339 bool ok;
5340 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
5341 tree *control_var, *bound_cst;
5342 enum tree_code comp = ERROR_MARK;
5344 /* Only consider real candidates. */
5345 if (!cand->iv)
5347 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
5348 ERROR_MARK, -1);
5349 return false;
5352 /* Try iv elimination. */
5353 if (may_eliminate_iv (data, use, cand, &bound, &comp))
5355 elim_cost = force_var_cost (data, bound, &depends_on_elim);
5356 if (elim_cost.cost == 0)
5357 elim_cost.cost = parm_decl_cost (data, bound);
5358 else if (TREE_CODE (bound) == INTEGER_CST)
5359 elim_cost.cost = 0;
5360 /* If we replace a loop condition 'i < n' with 'p < base + n',
5361 depends_on_elim will have 'base' and 'n' set, which implies
5362 that both 'base' and 'n' will be live during the loop. More likely,
5363 'base + n' will be loop invariant, resulting in only one live value
5364 during the loop. So in that case we clear depends_on_elim and set
5365 elim_inv_expr_id instead. */
5366 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5368 elim_inv_expr_id = get_expr_id (data, bound);
5369 bitmap_clear (depends_on_elim);
5371 /* The bound is a loop invariant, so it will be only computed
5372 once. */
5373 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5375 else
5376 elim_cost = infinite_cost;
5378 /* Try expressing the original giv. If it is compared with an invariant,
5379 note that we cannot get rid of it. */
5380 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5381 NULL, &cmp_iv);
5382 gcc_assert (ok);
5384 /* When the condition is a comparison of the candidate IV against
5385 zero, prefer this IV.
5387 TODO: The constant that we're subtracting from the cost should
5388 be target-dependent. This information should be added to the
5389 target costs for each backend. */
5390 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
5391 && integer_zerop (*bound_cst)
5392 && (operand_equal_p (*control_var, cand->var_after, 0)
5393 || operand_equal_p (*control_var, cand->var_before, 0)))
5394 elim_cost.cost -= 1;
5396 express_cost = get_computation_cost (data, use, cand, false,
5397 &depends_on_express, NULL,
5398 &express_inv_expr_id);
5399 fd_ivopts_data = data;
5400 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5402 /* Count the cost of the original bound as well. */
5403 bound_cost = force_var_cost (data, *bound_cst, NULL);
5404 if (bound_cost.cost == 0)
5405 bound_cost.cost = parm_decl_cost (data, *bound_cst);
5406 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5407 bound_cost.cost = 0;
5408 express_cost.cost += bound_cost.cost;
5410 /* Choose the better approach, preferring the eliminated IV. */
5411 if (compare_costs (elim_cost, express_cost) <= 0)
5413 cost = elim_cost;
5414 depends_on = depends_on_elim;
5415 depends_on_elim = NULL;
5416 inv_expr_id = elim_inv_expr_id;
5418 else
5420 cost = express_cost;
5421 depends_on = depends_on_express;
5422 depends_on_express = NULL;
5423 bound = NULL_TREE;
5424 comp = ERROR_MARK;
5425 inv_expr_id = express_inv_expr_id;
5428 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
5430 if (depends_on_elim)
5431 BITMAP_FREE (depends_on_elim);
5432 if (depends_on_express)
5433 BITMAP_FREE (depends_on_express);
5435 return !infinite_cost_p (cost);
5438 /* Determines cost of basing replacement of USE on CAND. Returns false
5439 if USE cannot be based on CAND. */
5441 static bool
5442 determine_use_iv_cost (struct ivopts_data *data,
5443 struct iv_use *use, struct iv_cand *cand)
5445 switch (use->type)
5447 case USE_NONLINEAR_EXPR:
5448 return determine_use_iv_cost_generic (data, use, cand);
5450 case USE_ADDRESS:
5451 return determine_use_iv_cost_address (data, use, cand);
5453 case USE_COMPARE:
5454 return determine_use_iv_cost_condition (data, use, cand);
5456 default:
5457 gcc_unreachable ();
5461 /* Return true if get_computation_cost indicates that autoincrement is
5462 a possibility for the pair of USE and CAND, false otherwise. */
5464 static bool
5465 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5466 struct iv_cand *cand)
5468 bitmap depends_on;
5469 bool can_autoinc;
5470 comp_cost cost;
5472 if (use->type != USE_ADDRESS)
5473 return false;
5475 cost = get_computation_cost (data, use, cand, true, &depends_on,
5476 &can_autoinc, NULL);
5478 BITMAP_FREE (depends_on);
5480 return !infinite_cost_p (cost) && can_autoinc;
5483 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5484 use that allows autoincrement, and set their AINC_USE if possible. */
5486 static void
5487 set_autoinc_for_original_candidates (struct ivopts_data *data)
5489 unsigned i, j;
5491 for (i = 0; i < n_iv_cands (data); i++)
5493 struct iv_cand *cand = iv_cand (data, i);
5494 struct iv_use *closest_before = NULL;
5495 struct iv_use *closest_after = NULL;
5496 if (cand->pos != IP_ORIGINAL)
5497 continue;
5499 for (j = 0; j < n_iv_uses (data); j++)
5501 struct iv_use *use = iv_use (data, j);
5502 unsigned uid = gimple_uid (use->stmt);
5504 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5505 continue;
5507 if (uid < gimple_uid (cand->incremented_at)
5508 && (closest_before == NULL
5509 || uid > gimple_uid (closest_before->stmt)))
5510 closest_before = use;
5512 if (uid > gimple_uid (cand->incremented_at)
5513 && (closest_after == NULL
5514 || uid < gimple_uid (closest_after->stmt)))
5515 closest_after = use;
5518 if (closest_before != NULL
5519 && autoinc_possible_for_pair (data, closest_before, cand))
5520 cand->ainc_use = closest_before;
5521 else if (closest_after != NULL
5522 && autoinc_possible_for_pair (data, closest_after, cand))
5523 cand->ainc_use = closest_after;
5527 /* Finds the candidates for the induction variables. */
5529 static void
5530 find_iv_candidates (struct ivopts_data *data)
5532 /* Add commonly used ivs. */
5533 add_standard_iv_candidates (data);
5535 /* Add old induction variables. */
5536 add_iv_candidate_for_bivs (data);
5538 /* Add induction variables derived from uses. */
5539 add_iv_candidate_for_uses (data);
5541 set_autoinc_for_original_candidates (data);
5543 /* Record the important candidates. */
5544 record_important_candidates (data);
5547 /* Determines costs of basing the use of the iv on an iv candidate. */
5549 static void
5550 determine_use_iv_costs (struct ivopts_data *data)
5552 unsigned i, j;
5553 struct iv_use *use;
5554 struct iv_cand *cand;
5555 bitmap to_clear = BITMAP_ALLOC (NULL);
5557 alloc_use_cost_map (data);
5559 for (i = 0; i < n_iv_uses (data); i++)
5561 use = iv_use (data, i);
5563 if (data->consider_all_candidates)
5565 for (j = 0; j < n_iv_cands (data); j++)
5567 cand = iv_cand (data, j);
5568 determine_use_iv_cost (data, use, cand);
5571 else
5573 bitmap_iterator bi;
5575 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5577 cand = iv_cand (data, j);
5578 if (!determine_use_iv_cost (data, use, cand))
5579 bitmap_set_bit (to_clear, j);
5582 /* Remove the candidates for that the cost is infinite from
5583 the list of related candidates. */
5584 bitmap_and_compl_into (use->related_cands, to_clear);
5585 bitmap_clear (to_clear);
5589 BITMAP_FREE (to_clear);
5591 if (dump_file && (dump_flags & TDF_DETAILS))
5593 fprintf (dump_file, "Use-candidate costs:\n");
5595 for (i = 0; i < n_iv_uses (data); i++)
5597 use = iv_use (data, i);
5599 fprintf (dump_file, "Use %d:\n", i);
5600 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
5601 for (j = 0; j < use->n_map_members; j++)
5603 if (!use->cost_map[j].cand
5604 || infinite_cost_p (use->cost_map[j].cost))
5605 continue;
5607 fprintf (dump_file, " %d\t%d\t%d\t",
5608 use->cost_map[j].cand->id,
5609 use->cost_map[j].cost.cost,
5610 use->cost_map[j].cost.complexity);
5611 if (use->cost_map[j].depends_on)
5612 bitmap_print (dump_file,
5613 use->cost_map[j].depends_on, "","");
5614 if (use->cost_map[j].inv_expr_id != -1)
5615 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5616 fprintf (dump_file, "\n");
5619 fprintf (dump_file, "\n");
5621 fprintf (dump_file, "\n");
5625 /* Determines cost of the candidate CAND. */
5627 static void
5628 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5630 comp_cost cost_base;
5631 unsigned cost, cost_step;
5632 tree base;
5634 if (!cand->iv)
5636 cand->cost = 0;
5637 return;
5640 /* There are two costs associated with the candidate -- its increment
5641 and its initialization. The second is almost negligible for any loop
5642 that rolls enough, so we take it just very little into account. */
5644 base = cand->iv->base;
5645 cost_base = force_var_cost (data, base, NULL);
5646 /* It will be exceptional that the iv register happens to be initialized with
5647 the proper value at no cost. In general, there will at least be a regcopy
5648 or a const set. */
5649 if (cost_base.cost == 0)
5650 cost_base.cost = COSTS_N_INSNS (1);
5651 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5653 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5655 /* Prefer the original ivs unless we may gain something by replacing it.
5656 The reason is to make debugging simpler; so this is not relevant for
5657 artificial ivs created by other optimization passes. */
5658 if (cand->pos != IP_ORIGINAL
5659 || !SSA_NAME_VAR (cand->var_before)
5660 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5661 cost++;
5663 /* Prefer not to insert statements into latch unless there are some
5664 already (so that we do not create unnecessary jumps). */
5665 if (cand->pos == IP_END
5666 && empty_block_p (ip_end_pos (data->current_loop)))
5667 cost++;
5669 cand->cost = cost;
5670 cand->cost_step = cost_step;
5673 /* Determines costs of computation of the candidates. */
5675 static void
5676 determine_iv_costs (struct ivopts_data *data)
5678 unsigned i;
5680 if (dump_file && (dump_flags & TDF_DETAILS))
5682 fprintf (dump_file, "Candidate costs:\n");
5683 fprintf (dump_file, " cand\tcost\n");
5686 for (i = 0; i < n_iv_cands (data); i++)
5688 struct iv_cand *cand = iv_cand (data, i);
5690 determine_iv_cost (data, cand);
5692 if (dump_file && (dump_flags & TDF_DETAILS))
5693 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5696 if (dump_file && (dump_flags & TDF_DETAILS))
5697 fprintf (dump_file, "\n");
5700 /* Calculates cost for having SIZE induction variables. */
5702 static unsigned
5703 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5705 /* We add size to the cost, so that we prefer eliminating ivs
5706 if possible. */
5707 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5708 data->body_includes_call);
5711 /* For each size of the induction variable set determine the penalty. */
5713 static void
5714 determine_set_costs (struct ivopts_data *data)
5716 unsigned j, n;
5717 gphi *phi;
5718 gphi_iterator psi;
5719 tree op;
5720 struct loop *loop = data->current_loop;
5721 bitmap_iterator bi;
5723 if (dump_file && (dump_flags & TDF_DETAILS))
5725 fprintf (dump_file, "Global costs:\n");
5726 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5727 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5728 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5729 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5732 n = 0;
5733 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5735 phi = psi.phi ();
5736 op = PHI_RESULT (phi);
5738 if (virtual_operand_p (op))
5739 continue;
5741 if (get_iv (data, op))
5742 continue;
5744 n++;
5747 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5749 struct version_info *info = ver_info (data, j);
5751 if (info->inv_id && info->has_nonlin_use)
5752 n++;
5755 data->regs_used = n;
5756 if (dump_file && (dump_flags & TDF_DETAILS))
5757 fprintf (dump_file, " regs_used %d\n", n);
5759 if (dump_file && (dump_flags & TDF_DETAILS))
5761 fprintf (dump_file, " cost for size:\n");
5762 fprintf (dump_file, " ivs\tcost\n");
5763 for (j = 0; j <= 2 * target_avail_regs; j++)
5764 fprintf (dump_file, " %d\t%d\n", j,
5765 ivopts_global_cost_for_size (data, j));
5766 fprintf (dump_file, "\n");
5770 /* Returns true if A is a cheaper cost pair than B. */
5772 static bool
5773 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5775 int cmp;
5777 if (!a)
5778 return false;
5780 if (!b)
5781 return true;
5783 cmp = compare_costs (a->cost, b->cost);
5784 if (cmp < 0)
5785 return true;
5787 if (cmp > 0)
5788 return false;
5790 /* In case the costs are the same, prefer the cheaper candidate. */
5791 if (a->cand->cost < b->cand->cost)
5792 return true;
5794 return false;
5798 /* Returns candidate by that USE is expressed in IVS. */
5800 static struct cost_pair *
5801 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5803 return ivs->cand_for_use[use->id];
5806 /* Computes the cost field of IVS structure. */
5808 static void
5809 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5811 comp_cost cost = ivs->cand_use_cost;
5813 cost.cost += ivs->cand_cost;
5815 cost.cost += ivopts_global_cost_for_size (data,
5816 ivs->n_regs + ivs->num_used_inv_expr);
5818 ivs->cost = cost;
5821 /* Remove invariants in set INVS to set IVS. */
5823 static void
5824 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5826 bitmap_iterator bi;
5827 unsigned iid;
5829 if (!invs)
5830 return;
5832 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5834 ivs->n_invariant_uses[iid]--;
5835 if (ivs->n_invariant_uses[iid] == 0)
5836 ivs->n_regs--;
5840 /* Set USE not to be expressed by any candidate in IVS. */
5842 static void
5843 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5844 struct iv_use *use)
5846 unsigned uid = use->id, cid;
5847 struct cost_pair *cp;
5849 cp = ivs->cand_for_use[uid];
5850 if (!cp)
5851 return;
5852 cid = cp->cand->id;
5854 ivs->bad_uses++;
5855 ivs->cand_for_use[uid] = NULL;
5856 ivs->n_cand_uses[cid]--;
5858 if (ivs->n_cand_uses[cid] == 0)
5860 bitmap_clear_bit (ivs->cands, cid);
5861 /* Do not count the pseudocandidates. */
5862 if (cp->cand->iv)
5863 ivs->n_regs--;
5864 ivs->n_cands--;
5865 ivs->cand_cost -= cp->cand->cost;
5867 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5870 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5872 iv_ca_set_remove_invariants (ivs, cp->depends_on);
5874 if (cp->inv_expr_id != -1)
5876 ivs->used_inv_expr[cp->inv_expr_id]--;
5877 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5878 ivs->num_used_inv_expr--;
5880 iv_ca_recount_cost (data, ivs);
5883 /* Add invariants in set INVS to set IVS. */
5885 static void
5886 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5888 bitmap_iterator bi;
5889 unsigned iid;
5891 if (!invs)
5892 return;
5894 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5896 ivs->n_invariant_uses[iid]++;
5897 if (ivs->n_invariant_uses[iid] == 1)
5898 ivs->n_regs++;
5902 /* Set cost pair for USE in set IVS to CP. */
5904 static void
5905 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5906 struct iv_use *use, struct cost_pair *cp)
5908 unsigned uid = use->id, cid;
5910 if (ivs->cand_for_use[uid] == cp)
5911 return;
5913 if (ivs->cand_for_use[uid])
5914 iv_ca_set_no_cp (data, ivs, use);
5916 if (cp)
5918 cid = cp->cand->id;
5920 ivs->bad_uses--;
5921 ivs->cand_for_use[uid] = cp;
5922 ivs->n_cand_uses[cid]++;
5923 if (ivs->n_cand_uses[cid] == 1)
5925 bitmap_set_bit (ivs->cands, cid);
5926 /* Do not count the pseudocandidates. */
5927 if (cp->cand->iv)
5928 ivs->n_regs++;
5929 ivs->n_cands++;
5930 ivs->cand_cost += cp->cand->cost;
5932 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5935 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5936 iv_ca_set_add_invariants (ivs, cp->depends_on);
5938 if (cp->inv_expr_id != -1)
5940 ivs->used_inv_expr[cp->inv_expr_id]++;
5941 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5942 ivs->num_used_inv_expr++;
5944 iv_ca_recount_cost (data, ivs);
5948 /* Extend set IVS by expressing USE by some of the candidates in it
5949 if possible. Consider all important candidates if candidates in
5950 set IVS don't give any result. */
5952 static void
5953 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5954 struct iv_use *use)
5956 struct cost_pair *best_cp = NULL, *cp;
5957 bitmap_iterator bi;
5958 unsigned i;
5959 struct iv_cand *cand;
5961 gcc_assert (ivs->upto >= use->id);
5962 ivs->upto++;
5963 ivs->bad_uses++;
5965 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5967 cand = iv_cand (data, i);
5968 cp = get_use_iv_cost (data, use, cand);
5969 if (cheaper_cost_pair (cp, best_cp))
5970 best_cp = cp;
5973 if (best_cp == NULL)
5975 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5977 cand = iv_cand (data, i);
5978 cp = get_use_iv_cost (data, use, cand);
5979 if (cheaper_cost_pair (cp, best_cp))
5980 best_cp = cp;
5984 iv_ca_set_cp (data, ivs, use, best_cp);
5987 /* Get cost for assignment IVS. */
5989 static comp_cost
5990 iv_ca_cost (struct iv_ca *ivs)
5992 /* This was a conditional expression but it triggered a bug in
5993 Sun C 5.5. */
5994 if (ivs->bad_uses)
5995 return infinite_cost;
5996 else
5997 return ivs->cost;
6000 /* Returns true if all dependences of CP are among invariants in IVS. */
6002 static bool
6003 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
6005 unsigned i;
6006 bitmap_iterator bi;
6008 if (!cp->depends_on)
6009 return true;
6011 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
6013 if (ivs->n_invariant_uses[i] == 0)
6014 return false;
6017 return true;
6020 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
6021 it before NEXT_CHANGE. */
6023 static struct iv_ca_delta *
6024 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
6025 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
6027 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6029 change->use = use;
6030 change->old_cp = old_cp;
6031 change->new_cp = new_cp;
6032 change->next_change = next_change;
6034 return change;
6037 /* Joins two lists of changes L1 and L2. Destructive -- old lists
6038 are rewritten. */
6040 static struct iv_ca_delta *
6041 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6043 struct iv_ca_delta *last;
6045 if (!l2)
6046 return l1;
6048 if (!l1)
6049 return l2;
6051 for (last = l1; last->next_change; last = last->next_change)
6052 continue;
6053 last->next_change = l2;
6055 return l1;
6058 /* Reverse the list of changes DELTA, forming the inverse to it. */
6060 static struct iv_ca_delta *
6061 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6063 struct iv_ca_delta *act, *next, *prev = NULL;
6065 for (act = delta; act; act = next)
6067 next = act->next_change;
6068 act->next_change = prev;
6069 prev = act;
6071 std::swap (act->old_cp, act->new_cp);
6074 return prev;
6077 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
6078 reverted instead. */
6080 static void
6081 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
6082 struct iv_ca_delta *delta, bool forward)
6084 struct cost_pair *from, *to;
6085 struct iv_ca_delta *act;
6087 if (!forward)
6088 delta = iv_ca_delta_reverse (delta);
6090 for (act = delta; act; act = act->next_change)
6092 from = act->old_cp;
6093 to = act->new_cp;
6094 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
6095 iv_ca_set_cp (data, ivs, act->use, to);
6098 if (!forward)
6099 iv_ca_delta_reverse (delta);
6102 /* Returns true if CAND is used in IVS. */
6104 static bool
6105 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
6107 return ivs->n_cand_uses[cand->id] > 0;
6110 /* Returns number of induction variable candidates in the set IVS. */
6112 static unsigned
6113 iv_ca_n_cands (struct iv_ca *ivs)
6115 return ivs->n_cands;
6118 /* Free the list of changes DELTA. */
6120 static void
6121 iv_ca_delta_free (struct iv_ca_delta **delta)
6123 struct iv_ca_delta *act, *next;
6125 for (act = *delta; act; act = next)
6127 next = act->next_change;
6128 free (act);
6131 *delta = NULL;
6134 /* Allocates new iv candidates assignment. */
6136 static struct iv_ca *
6137 iv_ca_new (struct ivopts_data *data)
6139 struct iv_ca *nw = XNEW (struct iv_ca);
6141 nw->upto = 0;
6142 nw->bad_uses = 0;
6143 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
6144 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
6145 nw->cands = BITMAP_ALLOC (NULL);
6146 nw->n_cands = 0;
6147 nw->n_regs = 0;
6148 nw->cand_use_cost = no_cost;
6149 nw->cand_cost = 0;
6150 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
6151 nw->cost = no_cost;
6152 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
6153 nw->num_used_inv_expr = 0;
6155 return nw;
6158 /* Free memory occupied by the set IVS. */
6160 static void
6161 iv_ca_free (struct iv_ca **ivs)
6163 free ((*ivs)->cand_for_use);
6164 free ((*ivs)->n_cand_uses);
6165 BITMAP_FREE ((*ivs)->cands);
6166 free ((*ivs)->n_invariant_uses);
6167 free ((*ivs)->used_inv_expr);
6168 free (*ivs);
6169 *ivs = NULL;
6172 /* Dumps IVS to FILE. */
6174 static void
6175 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6177 const char *pref = " invariants ";
6178 unsigned i;
6179 comp_cost cost = iv_ca_cost (ivs);
6181 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
6182 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
6183 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6184 bitmap_print (file, ivs->cands, " candidates: ","\n");
6186 for (i = 0; i < ivs->upto; i++)
6188 struct iv_use *use = iv_use (data, i);
6189 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
6190 if (cp)
6191 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
6192 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
6193 else
6194 fprintf (file, " use:%d --> ??\n", use->id);
6197 for (i = 1; i <= data->max_inv_id; i++)
6198 if (ivs->n_invariant_uses[i])
6200 fprintf (file, "%s%d", pref, i);
6201 pref = ", ";
6203 fprintf (file, "\n\n");
6206 /* Try changing candidate in IVS to CAND for each use. Return cost of the
6207 new set, and store differences in DELTA. Number of induction variables
6208 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6209 the function will try to find a solution with mimimal iv candidates. */
6211 static comp_cost
6212 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6213 struct iv_cand *cand, struct iv_ca_delta **delta,
6214 unsigned *n_ivs, bool min_ncand)
6216 unsigned i;
6217 comp_cost cost;
6218 struct iv_use *use;
6219 struct cost_pair *old_cp, *new_cp;
6221 *delta = NULL;
6222 for (i = 0; i < ivs->upto; i++)
6224 use = iv_use (data, i);
6225 old_cp = iv_ca_cand_for_use (ivs, use);
6227 if (old_cp
6228 && old_cp->cand == cand)
6229 continue;
6231 new_cp = get_use_iv_cost (data, use, cand);
6232 if (!new_cp)
6233 continue;
6235 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6236 continue;
6238 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6239 continue;
6241 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6244 iv_ca_delta_commit (data, ivs, *delta, true);
6245 cost = iv_ca_cost (ivs);
6246 if (n_ivs)
6247 *n_ivs = iv_ca_n_cands (ivs);
6248 iv_ca_delta_commit (data, ivs, *delta, false);
6250 return cost;
6253 /* Try narrowing set IVS by removing CAND. Return the cost of
6254 the new set and store the differences in DELTA. START is
6255 the candidate with which we start narrowing. */
6257 static comp_cost
6258 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6259 struct iv_cand *cand, struct iv_cand *start,
6260 struct iv_ca_delta **delta)
6262 unsigned i, ci;
6263 struct iv_use *use;
6264 struct cost_pair *old_cp, *new_cp, *cp;
6265 bitmap_iterator bi;
6266 struct iv_cand *cnd;
6267 comp_cost cost, best_cost, acost;
6269 *delta = NULL;
6270 for (i = 0; i < n_iv_uses (data); i++)
6272 use = iv_use (data, i);
6274 old_cp = iv_ca_cand_for_use (ivs, use);
6275 if (old_cp->cand != cand)
6276 continue;
6278 best_cost = iv_ca_cost (ivs);
6279 /* Start narrowing with START. */
6280 new_cp = get_use_iv_cost (data, use, start);
6282 if (data->consider_all_candidates)
6284 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6286 if (ci == cand->id || (start && ci == start->id))
6287 continue;
6289 cnd = iv_cand (data, ci);
6291 cp = get_use_iv_cost (data, use, cnd);
6292 if (!cp)
6293 continue;
6295 iv_ca_set_cp (data, ivs, use, cp);
6296 acost = iv_ca_cost (ivs);
6298 if (compare_costs (acost, best_cost) < 0)
6300 best_cost = acost;
6301 new_cp = cp;
6305 else
6307 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
6309 if (ci == cand->id || (start && ci == start->id))
6310 continue;
6312 cnd = iv_cand (data, ci);
6314 cp = get_use_iv_cost (data, use, cnd);
6315 if (!cp)
6316 continue;
6318 iv_ca_set_cp (data, ivs, use, cp);
6319 acost = iv_ca_cost (ivs);
6321 if (compare_costs (acost, best_cost) < 0)
6323 best_cost = acost;
6324 new_cp = cp;
6328 /* Restore to old cp for use. */
6329 iv_ca_set_cp (data, ivs, use, old_cp);
6331 if (!new_cp)
6333 iv_ca_delta_free (delta);
6334 return infinite_cost;
6337 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6340 iv_ca_delta_commit (data, ivs, *delta, true);
6341 cost = iv_ca_cost (ivs);
6342 iv_ca_delta_commit (data, ivs, *delta, false);
6344 return cost;
6347 /* Try optimizing the set of candidates IVS by removing candidates different
6348 from to EXCEPT_CAND from it. Return cost of the new set, and store
6349 differences in DELTA. */
6351 static comp_cost
6352 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6353 struct iv_cand *except_cand, struct iv_ca_delta **delta)
6355 bitmap_iterator bi;
6356 struct iv_ca_delta *act_delta, *best_delta;
6357 unsigned i;
6358 comp_cost best_cost, acost;
6359 struct iv_cand *cand;
6361 best_delta = NULL;
6362 best_cost = iv_ca_cost (ivs);
6364 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6366 cand = iv_cand (data, i);
6368 if (cand == except_cand)
6369 continue;
6371 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6373 if (compare_costs (acost, best_cost) < 0)
6375 best_cost = acost;
6376 iv_ca_delta_free (&best_delta);
6377 best_delta = act_delta;
6379 else
6380 iv_ca_delta_free (&act_delta);
6383 if (!best_delta)
6385 *delta = NULL;
6386 return best_cost;
6389 /* Recurse to possibly remove other unnecessary ivs. */
6390 iv_ca_delta_commit (data, ivs, best_delta, true);
6391 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6392 iv_ca_delta_commit (data, ivs, best_delta, false);
6393 *delta = iv_ca_delta_join (best_delta, *delta);
6394 return best_cost;
6397 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6398 cheaper local cost for USE than BEST_CP. Return pointer to
6399 the corresponding cost_pair, otherwise just return BEST_CP. */
6401 static struct cost_pair*
6402 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
6403 unsigned int cand_idx, struct iv_cand *old_cand,
6404 struct cost_pair *best_cp)
6406 struct iv_cand *cand;
6407 struct cost_pair *cp;
6409 gcc_assert (old_cand != NULL && best_cp != NULL);
6410 if (cand_idx == old_cand->id)
6411 return best_cp;
6413 cand = iv_cand (data, cand_idx);
6414 cp = get_use_iv_cost (data, use, cand);
6415 if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6416 return cp;
6418 return best_cp;
6421 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6422 which are used by more than one iv uses. For each of those candidates,
6423 this function tries to represent iv uses under that candidate using
6424 other ones with lower local cost, then tries to prune the new set.
6425 If the new set has lower cost, It returns the new cost after recording
6426 candidate replacement in list DELTA. */
6428 static comp_cost
6429 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6430 struct iv_ca_delta **delta)
6432 bitmap_iterator bi, bj;
6433 unsigned int i, j, k;
6434 struct iv_use *use;
6435 struct iv_cand *cand;
6436 comp_cost orig_cost, acost;
6437 struct iv_ca_delta *act_delta, *tmp_delta;
6438 struct cost_pair *old_cp, *best_cp = NULL;
6440 *delta = NULL;
6441 orig_cost = iv_ca_cost (ivs);
6443 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6445 if (ivs->n_cand_uses[i] == 1
6446 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6447 continue;
6449 cand = iv_cand (data, i);
6451 act_delta = NULL;
6452 /* Represent uses under current candidate using other ones with
6453 lower local cost. */
6454 for (j = 0; j < ivs->upto; j++)
6456 use = iv_use (data, j);
6457 old_cp = iv_ca_cand_for_use (ivs, use);
6459 if (old_cp->cand != cand)
6460 continue;
6462 best_cp = old_cp;
6463 if (data->consider_all_candidates)
6464 for (k = 0; k < n_iv_cands (data); k++)
6465 best_cp = cheaper_cost_with_cand (data, use, k,
6466 old_cp->cand, best_cp);
6467 else
6468 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
6469 best_cp = cheaper_cost_with_cand (data, use, k,
6470 old_cp->cand, best_cp);
6472 if (best_cp == old_cp)
6473 continue;
6475 act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
6477 /* No need for further prune. */
6478 if (!act_delta)
6479 continue;
6481 /* Prune the new candidate set. */
6482 iv_ca_delta_commit (data, ivs, act_delta, true);
6483 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6484 iv_ca_delta_commit (data, ivs, act_delta, false);
6485 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6487 if (compare_costs (acost, orig_cost) < 0)
6489 *delta = act_delta;
6490 return acost;
6492 else
6493 iv_ca_delta_free (&act_delta);
6496 return orig_cost;
6499 /* Tries to extend the sets IVS in the best possible way in order
6500 to express the USE. If ORIGINALP is true, prefer candidates from
6501 the original set of IVs, otherwise favor important candidates not
6502 based on any memory object. */
6504 static bool
6505 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6506 struct iv_use *use, bool originalp)
6508 comp_cost best_cost, act_cost;
6509 unsigned i;
6510 bitmap_iterator bi;
6511 struct iv_cand *cand;
6512 struct iv_ca_delta *best_delta = NULL, *act_delta;
6513 struct cost_pair *cp;
6515 iv_ca_add_use (data, ivs, use);
6516 best_cost = iv_ca_cost (ivs);
6517 cp = iv_ca_cand_for_use (ivs, use);
6518 if (cp)
6520 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6521 iv_ca_set_no_cp (data, ivs, use);
6524 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6525 first try important candidates not based on any memory object. Only if
6526 this fails, try the specific ones. Rationale -- in loops with many
6527 variables the best choice often is to use just one generic biv. If we
6528 added here many ivs specific to the uses, the optimization algorithm later
6529 would be likely to get stuck in a local minimum, thus causing us to create
6530 too many ivs. The approach from few ivs to more seems more likely to be
6531 successful -- starting from few ivs, replacing an expensive use by a
6532 specific iv should always be a win. */
6533 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6535 cand = iv_cand (data, i);
6537 if (originalp && cand->pos !=IP_ORIGINAL)
6538 continue;
6540 if (!originalp && cand->iv->base_object != NULL_TREE)
6541 continue;
6543 if (iv_ca_cand_used_p (ivs, cand))
6544 continue;
6546 cp = get_use_iv_cost (data, use, cand);
6547 if (!cp)
6548 continue;
6550 iv_ca_set_cp (data, ivs, use, cp);
6551 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6552 true);
6553 iv_ca_set_no_cp (data, ivs, use);
6554 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6556 if (compare_costs (act_cost, best_cost) < 0)
6558 best_cost = act_cost;
6560 iv_ca_delta_free (&best_delta);
6561 best_delta = act_delta;
6563 else
6564 iv_ca_delta_free (&act_delta);
6567 if (infinite_cost_p (best_cost))
6569 for (i = 0; i < use->n_map_members; i++)
6571 cp = use->cost_map + i;
6572 cand = cp->cand;
6573 if (!cand)
6574 continue;
6576 /* Already tried this. */
6577 if (cand->important)
6579 if (originalp && cand->pos == IP_ORIGINAL)
6580 continue;
6581 if (!originalp && cand->iv->base_object == NULL_TREE)
6582 continue;
6585 if (iv_ca_cand_used_p (ivs, cand))
6586 continue;
6588 act_delta = NULL;
6589 iv_ca_set_cp (data, ivs, use, cp);
6590 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6591 iv_ca_set_no_cp (data, ivs, use);
6592 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6593 cp, act_delta);
6595 if (compare_costs (act_cost, best_cost) < 0)
6597 best_cost = act_cost;
6599 if (best_delta)
6600 iv_ca_delta_free (&best_delta);
6601 best_delta = act_delta;
6603 else
6604 iv_ca_delta_free (&act_delta);
6608 iv_ca_delta_commit (data, ivs, best_delta, true);
6609 iv_ca_delta_free (&best_delta);
6611 return !infinite_cost_p (best_cost);
6614 /* Finds an initial assignment of candidates to uses. */
6616 static struct iv_ca *
6617 get_initial_solution (struct ivopts_data *data, bool originalp)
6619 struct iv_ca *ivs = iv_ca_new (data);
6620 unsigned i;
6622 for (i = 0; i < n_iv_uses (data); i++)
6623 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6625 iv_ca_free (&ivs);
6626 return NULL;
6629 return ivs;
6632 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6633 points to a bool variable, this function tries to break local
6634 optimal fixed-point by replacing candidates in IVS if it's true. */
6636 static bool
6637 try_improve_iv_set (struct ivopts_data *data,
6638 struct iv_ca *ivs, bool *try_replace_p)
6640 unsigned i, n_ivs;
6641 comp_cost acost, best_cost = iv_ca_cost (ivs);
6642 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6643 struct iv_cand *cand;
6645 /* Try extending the set of induction variables by one. */
6646 for (i = 0; i < n_iv_cands (data); i++)
6648 cand = iv_cand (data, i);
6650 if (iv_ca_cand_used_p (ivs, cand))
6651 continue;
6653 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6654 if (!act_delta)
6655 continue;
6657 /* If we successfully added the candidate and the set is small enough,
6658 try optimizing it by removing other candidates. */
6659 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6661 iv_ca_delta_commit (data, ivs, act_delta, true);
6662 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6663 iv_ca_delta_commit (data, ivs, act_delta, false);
6664 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6667 if (compare_costs (acost, best_cost) < 0)
6669 best_cost = acost;
6670 iv_ca_delta_free (&best_delta);
6671 best_delta = act_delta;
6673 else
6674 iv_ca_delta_free (&act_delta);
6677 if (!best_delta)
6679 /* Try removing the candidates from the set instead. */
6680 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6682 if (!best_delta && *try_replace_p)
6684 *try_replace_p = false;
6685 /* So far candidate selecting algorithm tends to choose fewer IVs
6686 so that it can handle cases in which loops have many variables
6687 but the best choice is often to use only one general biv. One
6688 weakness is it can't handle opposite cases, in which different
6689 candidates should be chosen with respect to each use. To solve
6690 the problem, we replace candidates in a manner described by the
6691 comments of iv_ca_replace, thus give general algorithm a chance
6692 to break local optimal fixed-point in these cases. */
6693 best_cost = iv_ca_replace (data, ivs, &best_delta);
6696 if (!best_delta)
6697 return false;
6700 iv_ca_delta_commit (data, ivs, best_delta, true);
6701 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6702 iv_ca_delta_free (&best_delta);
6703 return true;
6706 /* Attempts to find the optimal set of induction variables. We do simple
6707 greedy heuristic -- we try to replace at most one candidate in the selected
6708 solution and remove the unused ivs while this improves the cost. */
6710 static struct iv_ca *
6711 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6713 struct iv_ca *set;
6714 bool try_replace_p = true;
6716 /* Get the initial solution. */
6717 set = get_initial_solution (data, originalp);
6718 if (!set)
6720 if (dump_file && (dump_flags & TDF_DETAILS))
6721 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6722 return NULL;
6725 if (dump_file && (dump_flags & TDF_DETAILS))
6727 fprintf (dump_file, "Initial set of candidates:\n");
6728 iv_ca_dump (data, dump_file, set);
6731 while (try_improve_iv_set (data, set, &try_replace_p))
6733 if (dump_file && (dump_flags & TDF_DETAILS))
6735 fprintf (dump_file, "Improved to:\n");
6736 iv_ca_dump (data, dump_file, set);
6740 return set;
6743 static struct iv_ca *
6744 find_optimal_iv_set (struct ivopts_data *data)
6746 unsigned i;
6747 struct iv_ca *set, *origset;
6748 struct iv_use *use;
6749 comp_cost cost, origcost;
6751 /* Determine the cost based on a strategy that starts with original IVs,
6752 and try again using a strategy that prefers candidates not based
6753 on any IVs. */
6754 origset = find_optimal_iv_set_1 (data, true);
6755 set = find_optimal_iv_set_1 (data, false);
6757 if (!origset && !set)
6758 return NULL;
6760 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6761 cost = set ? iv_ca_cost (set) : infinite_cost;
6763 if (dump_file && (dump_flags & TDF_DETAILS))
6765 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6766 origcost.cost, origcost.complexity);
6767 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6768 cost.cost, cost.complexity);
6771 /* Choose the one with the best cost. */
6772 if (compare_costs (origcost, cost) <= 0)
6774 if (set)
6775 iv_ca_free (&set);
6776 set = origset;
6778 else if (origset)
6779 iv_ca_free (&origset);
6781 for (i = 0; i < n_iv_uses (data); i++)
6783 use = iv_use (data, i);
6784 use->selected = iv_ca_cand_for_use (set, use)->cand;
6787 return set;
6790 /* Creates a new induction variable corresponding to CAND. */
6792 static void
6793 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6795 gimple_stmt_iterator incr_pos;
6796 tree base;
6797 bool after = false;
6799 if (!cand->iv)
6800 return;
6802 switch (cand->pos)
6804 case IP_NORMAL:
6805 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6806 break;
6808 case IP_END:
6809 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6810 after = true;
6811 break;
6813 case IP_AFTER_USE:
6814 after = true;
6815 /* fall through */
6816 case IP_BEFORE_USE:
6817 incr_pos = gsi_for_stmt (cand->incremented_at);
6818 break;
6820 case IP_ORIGINAL:
6821 /* Mark that the iv is preserved. */
6822 name_info (data, cand->var_before)->preserve_biv = true;
6823 name_info (data, cand->var_after)->preserve_biv = true;
6825 /* Rewrite the increment so that it uses var_before directly. */
6826 find_interesting_uses_op (data, cand->var_after)->selected = cand;
6827 return;
6830 gimple_add_tmp_var (cand->var_before);
6832 base = unshare_expr (cand->iv->base);
6834 create_iv (base, unshare_expr (cand->iv->step),
6835 cand->var_before, data->current_loop,
6836 &incr_pos, after, &cand->var_before, &cand->var_after);
6839 /* Creates new induction variables described in SET. */
6841 static void
6842 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6844 unsigned i;
6845 struct iv_cand *cand;
6846 bitmap_iterator bi;
6848 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6850 cand = iv_cand (data, i);
6851 create_new_iv (data, cand);
6854 if (dump_file && (dump_flags & TDF_DETAILS))
6856 fprintf (dump_file, "Selected IV set for loop %d",
6857 data->current_loop->num);
6858 if (data->loop_loc != UNKNOWN_LOCATION)
6859 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6860 LOCATION_LINE (data->loop_loc));
6861 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6862 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6864 cand = iv_cand (data, i);
6865 dump_cand (dump_file, cand);
6867 fprintf (dump_file, "\n");
6871 /* Rewrites USE (definition of iv used in a nonlinear expression)
6872 using candidate CAND. */
6874 static void
6875 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6876 struct iv_use *use, struct iv_cand *cand)
6878 tree comp;
6879 tree op, tgt;
6880 gassign *ass;
6881 gimple_stmt_iterator bsi;
6883 /* An important special case -- if we are asked to express value of
6884 the original iv by itself, just exit; there is no need to
6885 introduce a new computation (that might also need casting the
6886 variable to unsigned and back). */
6887 if (cand->pos == IP_ORIGINAL
6888 && cand->incremented_at == use->stmt)
6890 enum tree_code stmt_code;
6892 gcc_assert (is_gimple_assign (use->stmt));
6893 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6895 /* Check whether we may leave the computation unchanged.
6896 This is the case only if it does not rely on other
6897 computations in the loop -- otherwise, the computation
6898 we rely upon may be removed in remove_unused_ivs,
6899 thus leading to ICE. */
6900 stmt_code = gimple_assign_rhs_code (use->stmt);
6901 if (stmt_code == PLUS_EXPR
6902 || stmt_code == MINUS_EXPR
6903 || stmt_code == POINTER_PLUS_EXPR)
6905 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6906 op = gimple_assign_rhs2 (use->stmt);
6907 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6908 op = gimple_assign_rhs1 (use->stmt);
6909 else
6910 op = NULL_TREE;
6912 else
6913 op = NULL_TREE;
6915 if (op && expr_invariant_in_loop_p (data->current_loop, op))
6916 return;
6919 comp = get_computation (data->current_loop, use, cand);
6920 gcc_assert (comp != NULL_TREE);
6922 switch (gimple_code (use->stmt))
6924 case GIMPLE_PHI:
6925 tgt = PHI_RESULT (use->stmt);
6927 /* If we should keep the biv, do not replace it. */
6928 if (name_info (data, tgt)->preserve_biv)
6929 return;
6931 bsi = gsi_after_labels (gimple_bb (use->stmt));
6932 break;
6934 case GIMPLE_ASSIGN:
6935 tgt = gimple_assign_lhs (use->stmt);
6936 bsi = gsi_for_stmt (use->stmt);
6937 break;
6939 default:
6940 gcc_unreachable ();
6943 if (!valid_gimple_rhs_p (comp)
6944 || (gimple_code (use->stmt) != GIMPLE_PHI
6945 /* We can't allow re-allocating the stmt as it might be pointed
6946 to still. */
6947 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6948 >= gimple_num_ops (gsi_stmt (bsi)))))
6950 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6951 true, GSI_SAME_STMT);
6952 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6954 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6955 /* As this isn't a plain copy we have to reset alignment
6956 information. */
6957 if (SSA_NAME_PTR_INFO (comp))
6958 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6962 if (gimple_code (use->stmt) == GIMPLE_PHI)
6964 ass = gimple_build_assign (tgt, comp);
6965 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6967 bsi = gsi_for_stmt (use->stmt);
6968 remove_phi_node (&bsi, false);
6970 else
6972 gimple_assign_set_rhs_from_tree (&bsi, comp);
6973 use->stmt = gsi_stmt (bsi);
6977 /* Performs a peephole optimization to reorder the iv update statement with
6978 a mem ref to enable instruction combining in later phases. The mem ref uses
6979 the iv value before the update, so the reordering transformation requires
6980 adjustment of the offset. CAND is the selected IV_CAND.
6982 Example:
6984 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6985 iv2 = iv1 + 1;
6987 if (t < val) (1)
6988 goto L;
6989 goto Head;
6992 directly propagating t over to (1) will introduce overlapping live range
6993 thus increase register pressure. This peephole transform it into:
6996 iv2 = iv1 + 1;
6997 t = MEM_REF (base, iv2, 8, 8);
6998 if (t < val)
6999 goto L;
7000 goto Head;
7003 static void
7004 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7006 tree var_after;
7007 gimple *iv_update, *stmt;
7008 basic_block bb;
7009 gimple_stmt_iterator gsi, gsi_iv;
7011 if (cand->pos != IP_NORMAL)
7012 return;
7014 var_after = cand->var_after;
7015 iv_update = SSA_NAME_DEF_STMT (var_after);
7017 bb = gimple_bb (iv_update);
7018 gsi = gsi_last_nondebug_bb (bb);
7019 stmt = gsi_stmt (gsi);
7021 /* Only handle conditional statement for now. */
7022 if (gimple_code (stmt) != GIMPLE_COND)
7023 return;
7025 gsi_prev_nondebug (&gsi);
7026 stmt = gsi_stmt (gsi);
7027 if (stmt != iv_update)
7028 return;
7030 gsi_prev_nondebug (&gsi);
7031 if (gsi_end_p (gsi))
7032 return;
7034 stmt = gsi_stmt (gsi);
7035 if (gimple_code (stmt) != GIMPLE_ASSIGN)
7036 return;
7038 if (stmt != use->stmt)
7039 return;
7041 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7042 return;
7044 if (dump_file && (dump_flags & TDF_DETAILS))
7046 fprintf (dump_file, "Reordering \n");
7047 print_gimple_stmt (dump_file, iv_update, 0, 0);
7048 print_gimple_stmt (dump_file, use->stmt, 0, 0);
7049 fprintf (dump_file, "\n");
7052 gsi = gsi_for_stmt (use->stmt);
7053 gsi_iv = gsi_for_stmt (iv_update);
7054 gsi_move_before (&gsi_iv, &gsi);
7056 cand->pos = IP_BEFORE_USE;
7057 cand->incremented_at = use->stmt;
7060 /* Rewrites USE (address that is an iv) using candidate CAND. */
7062 static void
7063 rewrite_use_address_1 (struct ivopts_data *data,
7064 struct iv_use *use, struct iv_cand *cand)
7066 aff_tree aff;
7067 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7068 tree base_hint = NULL_TREE;
7069 tree ref, iv;
7070 bool ok;
7072 adjust_iv_update_pos (cand, use);
7073 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
7074 gcc_assert (ok);
7075 unshare_aff_combination (&aff);
7077 /* To avoid undefined overflow problems, all IV candidates use unsigned
7078 integer types. The drawback is that this makes it impossible for
7079 create_mem_ref to distinguish an IV that is based on a memory object
7080 from one that represents simply an offset.
7082 To work around this problem, we pass a hint to create_mem_ref that
7083 indicates which variable (if any) in aff is an IV based on a memory
7084 object. Note that we only consider the candidate. If this is not
7085 based on an object, the base of the reference is in some subexpression
7086 of the use -- but these will use pointer types, so they are recognized
7087 by the create_mem_ref heuristics anyway. */
7088 if (cand->iv->base_object)
7089 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
7091 iv = var_at_stmt (data->current_loop, cand, use->stmt);
7092 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
7093 reference_alias_ptr_type (*use->op_p),
7094 iv, base_hint, data->speed);
7095 copy_ref_info (ref, *use->op_p);
7096 *use->op_p = ref;
7099 /* Rewrites USE (address that is an iv) using candidate CAND. If it's the
7100 first use of a group, rewrites sub uses in the group too. */
7102 static void
7103 rewrite_use_address (struct ivopts_data *data,
7104 struct iv_use *use, struct iv_cand *cand)
7106 struct iv_use *next;
7108 gcc_assert (use->sub_id == 0);
7109 rewrite_use_address_1 (data, use, cand);
7110 update_stmt (use->stmt);
7112 for (next = use->next; next != NULL; next = next->next)
7114 rewrite_use_address_1 (data, next, cand);
7115 update_stmt (next->stmt);
7118 return;
7121 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7122 candidate CAND. */
7124 static void
7125 rewrite_use_compare (struct ivopts_data *data,
7126 struct iv_use *use, struct iv_cand *cand)
7128 tree comp, *var_p, op, bound;
7129 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7130 enum tree_code compare;
7131 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
7132 bool ok;
7134 bound = cp->value;
7135 if (bound)
7137 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7138 tree var_type = TREE_TYPE (var);
7139 gimple_seq stmts;
7141 if (dump_file && (dump_flags & TDF_DETAILS))
7143 fprintf (dump_file, "Replacing exit test: ");
7144 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7146 compare = cp->comp;
7147 bound = unshare_expr (fold_convert (var_type, bound));
7148 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7149 if (stmts)
7150 gsi_insert_seq_on_edge_immediate (
7151 loop_preheader_edge (data->current_loop),
7152 stmts);
7154 gcond *cond_stmt = as_a <gcond *> (use->stmt);
7155 gimple_cond_set_lhs (cond_stmt, var);
7156 gimple_cond_set_code (cond_stmt, compare);
7157 gimple_cond_set_rhs (cond_stmt, op);
7158 return;
7161 /* The induction variable elimination failed; just express the original
7162 giv. */
7163 comp = get_computation (data->current_loop, use, cand);
7164 gcc_assert (comp != NULL_TREE);
7166 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
7167 gcc_assert (ok);
7169 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
7170 true, GSI_SAME_STMT);
7173 /* Rewrites USE using candidate CAND. */
7175 static void
7176 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
7178 switch (use->type)
7180 case USE_NONLINEAR_EXPR:
7181 rewrite_use_nonlinear_expr (data, use, cand);
7182 break;
7184 case USE_ADDRESS:
7185 rewrite_use_address (data, use, cand);
7186 break;
7188 case USE_COMPARE:
7189 rewrite_use_compare (data, use, cand);
7190 break;
7192 default:
7193 gcc_unreachable ();
7196 update_stmt (use->stmt);
7199 /* Rewrite the uses using the selected induction variables. */
7201 static void
7202 rewrite_uses (struct ivopts_data *data)
7204 unsigned i;
7205 struct iv_cand *cand;
7206 struct iv_use *use;
7208 for (i = 0; i < n_iv_uses (data); i++)
7210 use = iv_use (data, i);
7211 cand = use->selected;
7212 gcc_assert (cand);
7214 rewrite_use (data, use, cand);
7218 /* Removes the ivs that are not used after rewriting. */
7220 static void
7221 remove_unused_ivs (struct ivopts_data *data)
7223 unsigned j;
7224 bitmap_iterator bi;
7225 bitmap toremove = BITMAP_ALLOC (NULL);
7227 /* Figure out an order in which to release SSA DEFs so that we don't
7228 release something that we'd have to propagate into a debug stmt
7229 afterwards. */
7230 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7232 struct version_info *info;
7234 info = ver_info (data, j);
7235 if (info->iv
7236 && !integer_zerop (info->iv->step)
7237 && !info->inv_id
7238 && !info->iv->have_use_for
7239 && !info->preserve_biv)
7241 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7243 tree def = info->iv->ssa_name;
7245 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7247 imm_use_iterator imm_iter;
7248 use_operand_p use_p;
7249 gimple *stmt;
7250 int count = 0;
7252 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7254 if (!gimple_debug_bind_p (stmt))
7255 continue;
7257 /* We just want to determine whether to do nothing
7258 (count == 0), to substitute the computed
7259 expression into a single use of the SSA DEF by
7260 itself (count == 1), or to use a debug temp
7261 because the SSA DEF is used multiple times or as
7262 part of a larger expression (count > 1). */
7263 count++;
7264 if (gimple_debug_bind_get_value (stmt) != def)
7265 count++;
7267 if (count > 1)
7268 BREAK_FROM_IMM_USE_STMT (imm_iter);
7271 if (!count)
7272 continue;
7274 struct iv_use dummy_use;
7275 struct iv_cand *best_cand = NULL, *cand;
7276 unsigned i, best_pref = 0, cand_pref;
7278 memset (&dummy_use, 0, sizeof (dummy_use));
7279 dummy_use.iv = info->iv;
7280 for (i = 0; i < n_iv_uses (data) && i < 64; i++)
7282 cand = iv_use (data, i)->selected;
7283 if (cand == best_cand)
7284 continue;
7285 cand_pref = operand_equal_p (cand->iv->step,
7286 info->iv->step, 0)
7287 ? 4 : 0;
7288 cand_pref
7289 += TYPE_MODE (TREE_TYPE (cand->iv->base))
7290 == TYPE_MODE (TREE_TYPE (info->iv->base))
7291 ? 2 : 0;
7292 cand_pref
7293 += TREE_CODE (cand->iv->base) == INTEGER_CST
7294 ? 1 : 0;
7295 if (best_cand == NULL || best_pref < cand_pref)
7297 best_cand = cand;
7298 best_pref = cand_pref;
7302 if (!best_cand)
7303 continue;
7305 tree comp = get_computation_at (data->current_loop,
7306 &dummy_use, best_cand,
7307 SSA_NAME_DEF_STMT (def));
7308 if (!comp)
7309 continue;
7311 if (count > 1)
7313 tree vexpr = make_node (DEBUG_EXPR_DECL);
7314 DECL_ARTIFICIAL (vexpr) = 1;
7315 TREE_TYPE (vexpr) = TREE_TYPE (comp);
7316 if (SSA_NAME_VAR (def))
7317 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
7318 else
7319 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
7320 gdebug *def_temp
7321 = gimple_build_debug_bind (vexpr, comp, NULL);
7322 gimple_stmt_iterator gsi;
7324 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7325 gsi = gsi_after_labels (gimple_bb
7326 (SSA_NAME_DEF_STMT (def)));
7327 else
7328 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7330 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7331 comp = vexpr;
7334 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7336 if (!gimple_debug_bind_p (stmt))
7337 continue;
7339 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7340 SET_USE (use_p, comp);
7342 update_stmt (stmt);
7348 release_defs_bitset (toremove);
7350 BITMAP_FREE (toremove);
7353 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7354 for hash_map::traverse. */
7356 bool
7357 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7359 free (value);
7360 return true;
7363 /* Frees data allocated by the optimization of a single loop. */
7365 static void
7366 free_loop_data (struct ivopts_data *data)
7368 unsigned i, j;
7369 bitmap_iterator bi;
7370 tree obj;
7372 if (data->niters)
7374 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7375 delete data->niters;
7376 data->niters = NULL;
7379 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7381 struct version_info *info;
7383 info = ver_info (data, i);
7384 info->iv = NULL;
7385 info->has_nonlin_use = false;
7386 info->preserve_biv = false;
7387 info->inv_id = 0;
7389 bitmap_clear (data->relevant);
7390 bitmap_clear (data->important_candidates);
7392 for (i = 0; i < n_iv_uses (data); i++)
7394 struct iv_use *use = iv_use (data, i);
7395 struct iv_use *pre = use, *sub = use->next;
7397 while (sub)
7399 gcc_assert (sub->related_cands == NULL);
7400 gcc_assert (sub->n_map_members == 0 && sub->cost_map == NULL);
7402 pre = sub;
7403 sub = sub->next;
7404 free (pre);
7407 BITMAP_FREE (use->related_cands);
7408 for (j = 0; j < use->n_map_members; j++)
7409 if (use->cost_map[j].depends_on)
7410 BITMAP_FREE (use->cost_map[j].depends_on);
7411 free (use->cost_map);
7412 free (use);
7414 data->iv_uses.truncate (0);
7416 for (i = 0; i < n_iv_cands (data); i++)
7418 struct iv_cand *cand = iv_cand (data, i);
7420 if (cand->depends_on)
7421 BITMAP_FREE (cand->depends_on);
7422 free (cand);
7424 data->iv_candidates.truncate (0);
7426 if (data->version_info_size < num_ssa_names)
7428 data->version_info_size = 2 * num_ssa_names;
7429 free (data->version_info);
7430 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7433 data->max_inv_id = 0;
7435 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7436 SET_DECL_RTL (obj, NULL_RTX);
7438 decl_rtl_to_reset.truncate (0);
7440 data->inv_expr_tab->empty ();
7441 data->inv_expr_id = 0;
7444 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7445 loop tree. */
7447 static void
7448 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7450 free_loop_data (data);
7451 free (data->version_info);
7452 BITMAP_FREE (data->relevant);
7453 BITMAP_FREE (data->important_candidates);
7455 decl_rtl_to_reset.release ();
7456 data->iv_uses.release ();
7457 data->iv_candidates.release ();
7458 delete data->inv_expr_tab;
7459 data->inv_expr_tab = NULL;
7460 free_affine_expand_cache (&data->name_expansion_cache);
7461 obstack_free (&data->iv_obstack, NULL);
7464 /* Returns true if the loop body BODY includes any function calls. */
7466 static bool
7467 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7469 gimple_stmt_iterator gsi;
7470 unsigned i;
7472 for (i = 0; i < num_nodes; i++)
7473 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7475 gimple *stmt = gsi_stmt (gsi);
7476 if (is_gimple_call (stmt)
7477 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7478 return true;
7480 return false;
7483 /* Optimizes the LOOP. Returns true if anything changed. */
7485 static bool
7486 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7488 bool changed = false;
7489 struct iv_ca *iv_ca;
7490 edge exit = single_dom_exit (loop);
7491 basic_block *body;
7493 gcc_assert (!data->niters);
7494 data->current_loop = loop;
7495 data->loop_loc = find_loop_location (loop);
7496 data->speed = optimize_loop_for_speed_p (loop);
7498 if (dump_file && (dump_flags & TDF_DETAILS))
7500 fprintf (dump_file, "Processing loop %d", loop->num);
7501 if (data->loop_loc != UNKNOWN_LOCATION)
7502 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7503 LOCATION_LINE (data->loop_loc));
7504 fprintf (dump_file, "\n");
7506 if (exit)
7508 fprintf (dump_file, " single exit %d -> %d, exit condition ",
7509 exit->src->index, exit->dest->index);
7510 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7511 fprintf (dump_file, "\n");
7514 fprintf (dump_file, "\n");
7517 body = get_loop_body (loop);
7518 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7519 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7520 free (body);
7522 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7524 /* For each ssa name determines whether it behaves as an induction variable
7525 in some loop. */
7526 if (!find_induction_variables (data))
7527 goto finish;
7529 /* Finds interesting uses (item 1). */
7530 find_interesting_uses (data);
7531 group_address_uses (data);
7532 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
7533 goto finish;
7535 /* Finds candidates for the induction variables (item 2). */
7536 find_iv_candidates (data);
7538 /* Calculates the costs (item 3, part 1). */
7539 determine_iv_costs (data);
7540 determine_use_iv_costs (data);
7541 determine_set_costs (data);
7543 /* Find the optimal set of induction variables (item 3, part 2). */
7544 iv_ca = find_optimal_iv_set (data);
7545 if (!iv_ca)
7546 goto finish;
7547 changed = true;
7549 /* Create the new induction variables (item 4, part 1). */
7550 create_new_ivs (data, iv_ca);
7551 iv_ca_free (&iv_ca);
7553 /* Rewrite the uses (item 4, part 2). */
7554 rewrite_uses (data);
7556 /* Remove the ivs that are unused after rewriting. */
7557 remove_unused_ivs (data);
7559 /* We have changed the structure of induction variables; it might happen
7560 that definitions in the scev database refer to some of them that were
7561 eliminated. */
7562 scev_reset ();
7564 finish:
7565 free_loop_data (data);
7567 return changed;
7570 /* Main entry point. Optimizes induction variables in loops. */
7572 void
7573 tree_ssa_iv_optimize (void)
7575 struct loop *loop;
7576 struct ivopts_data data;
7578 tree_ssa_iv_optimize_init (&data);
7580 /* Optimize the loops starting with the innermost ones. */
7581 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7583 if (dump_file && (dump_flags & TDF_DETAILS))
7584 flow_loop_dump (loop, dump_file, NULL, 1);
7586 tree_ssa_iv_optimize_loop (&data, loop);
7589 tree_ssa_iv_optimize_finalize (&data);