Simplify convert_modes, ignoring invalid old modes for CONST_INTs.
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob6846fcf6115b6f8bf26a1a2308bfb6258c430875
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "tm.h"
68 #include "tree.h"
69 #include "tm_p.h"
70 #include "basic-block.h"
71 #include "gimple-pretty-print.h"
72 #include "tree-ssa.h"
73 #include "cfgloop.h"
74 #include "tree-pass.h"
75 #include "ggc.h"
76 #include "insn-config.h"
77 #include "pointer-set.h"
78 #include "hash-table.h"
79 #include "tree-chrec.h"
80 #include "tree-scalar-evolution.h"
81 #include "cfgloop.h"
82 #include "params.h"
83 #include "langhooks.h"
84 #include "tree-affine.h"
85 #include "target.h"
86 #include "tree-inline.h"
87 #include "tree-ssa-propagate.h"
88 #include "expmed.h"
89 #include "tree-ssa-address.h"
91 /* FIXME: Expressions are expanded to RTL in this pass to determine the
92 cost of different addressing modes. This should be moved to a TBD
93 interface between the GIMPLE and RTL worlds. */
94 #include "expr.h"
95 #include "recog.h"
97 /* The infinite cost. */
98 #define INFTY 10000000
100 #define AVG_LOOP_NITER(LOOP) 5
102 /* Returns the expected number of loop iterations for LOOP.
103 The average trip count is computed from profile data if it
104 exists. */
106 static inline HOST_WIDE_INT
107 avg_loop_niter (struct loop *loop)
109 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
110 if (niter == -1)
111 return AVG_LOOP_NITER (loop);
113 return niter;
116 /* Representation of the induction variable. */
117 struct iv
119 tree base; /* Initial value of the iv. */
120 tree base_object; /* A memory object to that the induction variable points. */
121 tree step; /* Step of the iv (constant only). */
122 tree ssa_name; /* The ssa name with the value. */
123 bool biv_p; /* Is it a biv? */
124 bool have_use_for; /* Do we already have a use for it? */
125 unsigned use_id; /* The identifier in the use if it is the case. */
128 /* Per-ssa version information (induction variable descriptions, etc.). */
129 struct version_info
131 tree name; /* The ssa name. */
132 struct iv *iv; /* Induction variable description. */
133 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
134 an expression that is not an induction variable. */
135 bool preserve_biv; /* For the original biv, whether to preserve it. */
136 unsigned inv_id; /* Id of an invariant. */
139 /* Types of uses. */
140 enum use_type
142 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
143 USE_ADDRESS, /* Use in an address. */
144 USE_COMPARE /* Use is a compare. */
147 /* Cost of a computation. */
148 typedef struct
150 int cost; /* The runtime cost. */
151 unsigned complexity; /* The estimate of the complexity of the code for
152 the computation (in no concrete units --
153 complexity field should be larger for more
154 complex expressions and addressing modes). */
155 } comp_cost;
157 static const comp_cost no_cost = {0, 0};
158 static const comp_cost infinite_cost = {INFTY, INFTY};
160 /* The candidate - cost pair. */
161 struct cost_pair
163 struct iv_cand *cand; /* The candidate. */
164 comp_cost cost; /* The cost. */
165 bitmap depends_on; /* The list of invariants that have to be
166 preserved. */
167 tree value; /* For final value elimination, the expression for
168 the final value of the iv. For iv elimination,
169 the new bound to compare with. */
170 enum tree_code comp; /* For iv elimination, the comparison. */
171 int inv_expr_id; /* Loop invariant expression id. */
174 /* Use. */
175 struct iv_use
177 unsigned id; /* The id of the use. */
178 enum use_type type; /* Type of the use. */
179 struct iv *iv; /* The induction variable it is based on. */
180 gimple stmt; /* Statement in that it occurs. */
181 tree *op_p; /* The place where it occurs. */
182 bitmap related_cands; /* The set of "related" iv candidates, plus the common
183 important ones. */
185 unsigned n_map_members; /* Number of candidates in the cost_map list. */
186 struct cost_pair *cost_map;
187 /* The costs wrto the iv candidates. */
189 struct iv_cand *selected;
190 /* The selected candidate. */
193 /* The position where the iv is computed. */
194 enum iv_position
196 IP_NORMAL, /* At the end, just before the exit condition. */
197 IP_END, /* At the end of the latch block. */
198 IP_BEFORE_USE, /* Immediately before a specific use. */
199 IP_AFTER_USE, /* Immediately after a specific use. */
200 IP_ORIGINAL /* The original biv. */
203 /* The induction variable candidate. */
204 struct iv_cand
206 unsigned id; /* The number of the candidate. */
207 bool important; /* Whether this is an "important" candidate, i.e. such
208 that it should be considered by all uses. */
209 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
210 gimple incremented_at;/* For original biv, the statement where it is
211 incremented. */
212 tree var_before; /* The variable used for it before increment. */
213 tree var_after; /* The variable used for it after increment. */
214 struct iv *iv; /* The value of the candidate. NULL for
215 "pseudocandidate" used to indicate the possibility
216 to replace the final value of an iv by direct
217 computation of the value. */
218 unsigned cost; /* Cost of the candidate. */
219 unsigned cost_step; /* Cost of the candidate's increment operation. */
220 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
221 where it is incremented. */
222 bitmap depends_on; /* The list of invariants that are used in step of the
223 biv. */
226 /* Loop invariant expression hashtable entry. */
227 struct iv_inv_expr_ent
229 tree expr;
230 int id;
231 hashval_t hash;
234 /* The data used by the induction variable optimizations. */
236 typedef struct iv_use *iv_use_p;
238 typedef struct iv_cand *iv_cand_p;
240 /* Hashtable helpers. */
242 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
244 typedef iv_inv_expr_ent value_type;
245 typedef iv_inv_expr_ent compare_type;
246 static inline hashval_t hash (const value_type *);
247 static inline bool equal (const value_type *, const compare_type *);
250 /* Hash function for loop invariant expressions. */
252 inline hashval_t
253 iv_inv_expr_hasher::hash (const value_type *expr)
255 return expr->hash;
258 /* Hash table equality function for expressions. */
260 inline bool
261 iv_inv_expr_hasher::equal (const value_type *expr1, const compare_type *expr2)
263 return expr1->hash == expr2->hash
264 && operand_equal_p (expr1->expr, expr2->expr, 0);
267 struct ivopts_data
269 /* The currently optimized loop. */
270 struct loop *current_loop;
272 /* Numbers of iterations for all exits of the current loop. */
273 struct pointer_map_t *niters;
275 /* Number of registers used in it. */
276 unsigned regs_used;
278 /* The size of version_info array allocated. */
279 unsigned version_info_size;
281 /* The array of information for the ssa names. */
282 struct version_info *version_info;
284 /* The hashtable of loop invariant expressions created
285 by ivopt. */
286 hash_table <iv_inv_expr_hasher> inv_expr_tab;
288 /* Loop invariant expression id. */
289 int inv_expr_id;
291 /* The bitmap of indices in version_info whose value was changed. */
292 bitmap relevant;
294 /* The uses of induction variables. */
295 vec<iv_use_p> iv_uses;
297 /* The candidates. */
298 vec<iv_cand_p> iv_candidates;
300 /* A bitmap of important candidates. */
301 bitmap important_candidates;
303 /* The maximum invariant id. */
304 unsigned max_inv_id;
306 /* Whether to consider just related and important candidates when replacing a
307 use. */
308 bool consider_all_candidates;
310 /* Are we optimizing for speed? */
311 bool speed;
313 /* Whether the loop body includes any function calls. */
314 bool body_includes_call;
316 /* Whether the loop body can only be exited via single exit. */
317 bool loop_single_exit_p;
320 /* An assignment of iv candidates to uses. */
322 struct iv_ca
324 /* The number of uses covered by the assignment. */
325 unsigned upto;
327 /* Number of uses that cannot be expressed by the candidates in the set. */
328 unsigned bad_uses;
330 /* Candidate assigned to a use, together with the related costs. */
331 struct cost_pair **cand_for_use;
333 /* Number of times each candidate is used. */
334 unsigned *n_cand_uses;
336 /* The candidates used. */
337 bitmap cands;
339 /* The number of candidates in the set. */
340 unsigned n_cands;
342 /* Total number of registers needed. */
343 unsigned n_regs;
345 /* Total cost of expressing uses. */
346 comp_cost cand_use_cost;
348 /* Total cost of candidates. */
349 unsigned cand_cost;
351 /* Number of times each invariant is used. */
352 unsigned *n_invariant_uses;
354 /* The array holding the number of uses of each loop
355 invariant expressions created by ivopt. */
356 unsigned *used_inv_expr;
358 /* The number of created loop invariants. */
359 unsigned num_used_inv_expr;
361 /* Total cost of the assignment. */
362 comp_cost cost;
365 /* Difference of two iv candidate assignments. */
367 struct iv_ca_delta
369 /* Changed use. */
370 struct iv_use *use;
372 /* An old assignment (for rollback purposes). */
373 struct cost_pair *old_cp;
375 /* A new assignment. */
376 struct cost_pair *new_cp;
378 /* Next change in the list. */
379 struct iv_ca_delta *next_change;
382 /* Bound on number of candidates below that all candidates are considered. */
384 #define CONSIDER_ALL_CANDIDATES_BOUND \
385 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
387 /* If there are more iv occurrences, we just give up (it is quite unlikely that
388 optimizing such a loop would help, and it would take ages). */
390 #define MAX_CONSIDERED_USES \
391 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
393 /* If there are at most this number of ivs in the set, try removing unnecessary
394 ivs from the set always. */
396 #define ALWAYS_PRUNE_CAND_SET_BOUND \
397 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
399 /* The list of trees for that the decl_rtl field must be reset is stored
400 here. */
402 static vec<tree> decl_rtl_to_reset;
404 static comp_cost force_expr_to_var_cost (tree, bool);
406 /* Number of uses recorded in DATA. */
408 static inline unsigned
409 n_iv_uses (struct ivopts_data *data)
411 return data->iv_uses.length ();
414 /* Ith use recorded in DATA. */
416 static inline struct iv_use *
417 iv_use (struct ivopts_data *data, unsigned i)
419 return data->iv_uses[i];
422 /* Number of candidates recorded in DATA. */
424 static inline unsigned
425 n_iv_cands (struct ivopts_data *data)
427 return data->iv_candidates.length ();
430 /* Ith candidate recorded in DATA. */
432 static inline struct iv_cand *
433 iv_cand (struct ivopts_data *data, unsigned i)
435 return data->iv_candidates[i];
438 /* The single loop exit if it dominates the latch, NULL otherwise. */
440 edge
441 single_dom_exit (struct loop *loop)
443 edge exit = single_exit (loop);
445 if (!exit)
446 return NULL;
448 if (!just_once_each_iteration_p (loop, exit->src))
449 return NULL;
451 return exit;
454 /* Dumps information about the induction variable IV to FILE. */
456 void
457 dump_iv (FILE *file, struct iv *iv)
459 if (iv->ssa_name)
461 fprintf (file, "ssa name ");
462 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
463 fprintf (file, "\n");
466 fprintf (file, " type ");
467 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
468 fprintf (file, "\n");
470 if (iv->step)
472 fprintf (file, " base ");
473 print_generic_expr (file, iv->base, TDF_SLIM);
474 fprintf (file, "\n");
476 fprintf (file, " step ");
477 print_generic_expr (file, iv->step, TDF_SLIM);
478 fprintf (file, "\n");
480 else
482 fprintf (file, " invariant ");
483 print_generic_expr (file, iv->base, TDF_SLIM);
484 fprintf (file, "\n");
487 if (iv->base_object)
489 fprintf (file, " base object ");
490 print_generic_expr (file, iv->base_object, TDF_SLIM);
491 fprintf (file, "\n");
494 if (iv->biv_p)
495 fprintf (file, " is a biv\n");
498 /* Dumps information about the USE to FILE. */
500 void
501 dump_use (FILE *file, struct iv_use *use)
503 fprintf (file, "use %d\n", use->id);
505 switch (use->type)
507 case USE_NONLINEAR_EXPR:
508 fprintf (file, " generic\n");
509 break;
511 case USE_ADDRESS:
512 fprintf (file, " address\n");
513 break;
515 case USE_COMPARE:
516 fprintf (file, " compare\n");
517 break;
519 default:
520 gcc_unreachable ();
523 fprintf (file, " in statement ");
524 print_gimple_stmt (file, use->stmt, 0, 0);
525 fprintf (file, "\n");
527 fprintf (file, " at position ");
528 if (use->op_p)
529 print_generic_expr (file, *use->op_p, TDF_SLIM);
530 fprintf (file, "\n");
532 dump_iv (file, use->iv);
534 if (use->related_cands)
536 fprintf (file, " related candidates ");
537 dump_bitmap (file, use->related_cands);
541 /* Dumps information about the uses to FILE. */
543 void
544 dump_uses (FILE *file, struct ivopts_data *data)
546 unsigned i;
547 struct iv_use *use;
549 for (i = 0; i < n_iv_uses (data); i++)
551 use = iv_use (data, i);
553 dump_use (file, use);
554 fprintf (file, "\n");
558 /* Dumps information about induction variable candidate CAND to FILE. */
560 void
561 dump_cand (FILE *file, struct iv_cand *cand)
563 struct iv *iv = cand->iv;
565 fprintf (file, "candidate %d%s\n",
566 cand->id, cand->important ? " (important)" : "");
568 if (cand->depends_on)
570 fprintf (file, " depends on ");
571 dump_bitmap (file, cand->depends_on);
574 if (!iv)
576 fprintf (file, " final value replacement\n");
577 return;
580 if (cand->var_before)
582 fprintf (file, " var_before ");
583 print_generic_expr (file, cand->var_before, TDF_SLIM);
584 fprintf (file, "\n");
586 if (cand->var_after)
588 fprintf (file, " var_after ");
589 print_generic_expr (file, cand->var_after, TDF_SLIM);
590 fprintf (file, "\n");
593 switch (cand->pos)
595 case IP_NORMAL:
596 fprintf (file, " incremented before exit test\n");
597 break;
599 case IP_BEFORE_USE:
600 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
601 break;
603 case IP_AFTER_USE:
604 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
605 break;
607 case IP_END:
608 fprintf (file, " incremented at end\n");
609 break;
611 case IP_ORIGINAL:
612 fprintf (file, " original biv\n");
613 break;
616 dump_iv (file, iv);
619 /* Returns the info for ssa version VER. */
621 static inline struct version_info *
622 ver_info (struct ivopts_data *data, unsigned ver)
624 return data->version_info + ver;
627 /* Returns the info for ssa name NAME. */
629 static inline struct version_info *
630 name_info (struct ivopts_data *data, tree name)
632 return ver_info (data, SSA_NAME_VERSION (name));
635 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
636 emitted in LOOP. */
638 static bool
639 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
641 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
643 gcc_assert (bb);
645 if (sbb == loop->latch)
646 return true;
648 if (sbb != bb)
649 return false;
651 return stmt == last_stmt (bb);
654 /* Returns true if STMT if after the place where the original induction
655 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
656 if the positions are identical. */
658 static bool
659 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
661 basic_block cand_bb = gimple_bb (cand->incremented_at);
662 basic_block stmt_bb = gimple_bb (stmt);
664 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
665 return false;
667 if (stmt_bb != cand_bb)
668 return true;
670 if (true_if_equal
671 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
672 return true;
673 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
676 /* Returns true if STMT if after the place where the induction variable
677 CAND is incremented in LOOP. */
679 static bool
680 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
682 switch (cand->pos)
684 case IP_END:
685 return false;
687 case IP_NORMAL:
688 return stmt_after_ip_normal_pos (loop, stmt);
690 case IP_ORIGINAL:
691 case IP_AFTER_USE:
692 return stmt_after_inc_pos (cand, stmt, false);
694 case IP_BEFORE_USE:
695 return stmt_after_inc_pos (cand, stmt, true);
697 default:
698 gcc_unreachable ();
702 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
704 static bool
705 abnormal_ssa_name_p (tree exp)
707 if (!exp)
708 return false;
710 if (TREE_CODE (exp) != SSA_NAME)
711 return false;
713 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
716 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
717 abnormal phi node. Callback for for_each_index. */
719 static bool
720 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
721 void *data ATTRIBUTE_UNUSED)
723 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
725 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
726 return false;
727 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
728 return false;
731 return !abnormal_ssa_name_p (*index);
734 /* Returns true if EXPR contains a ssa name that occurs in an
735 abnormal phi node. */
737 bool
738 contains_abnormal_ssa_name_p (tree expr)
740 enum tree_code code;
741 enum tree_code_class codeclass;
743 if (!expr)
744 return false;
746 code = TREE_CODE (expr);
747 codeclass = TREE_CODE_CLASS (code);
749 if (code == SSA_NAME)
750 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
752 if (code == INTEGER_CST
753 || is_gimple_min_invariant (expr))
754 return false;
756 if (code == ADDR_EXPR)
757 return !for_each_index (&TREE_OPERAND (expr, 0),
758 idx_contains_abnormal_ssa_name_p,
759 NULL);
761 if (code == COND_EXPR)
762 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
763 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
764 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
766 switch (codeclass)
768 case tcc_binary:
769 case tcc_comparison:
770 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
771 return true;
773 /* Fallthru. */
774 case tcc_unary:
775 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
776 return true;
778 break;
780 default:
781 gcc_unreachable ();
784 return false;
787 /* Returns the structure describing number of iterations determined from
788 EXIT of DATA->current_loop, or NULL if something goes wrong. */
790 static struct tree_niter_desc *
791 niter_for_exit (struct ivopts_data *data, edge exit)
793 struct tree_niter_desc *desc;
794 void **slot;
796 if (!data->niters)
798 data->niters = pointer_map_create ();
799 slot = NULL;
801 else
802 slot = pointer_map_contains (data->niters, exit);
804 if (!slot)
806 /* Try to determine number of iterations. We cannot safely work with ssa
807 names that appear in phi nodes on abnormal edges, so that we do not
808 create overlapping life ranges for them (PR 27283). */
809 desc = XNEW (struct tree_niter_desc);
810 if (!number_of_iterations_exit (data->current_loop,
811 exit, desc, true)
812 || contains_abnormal_ssa_name_p (desc->niter))
814 XDELETE (desc);
815 desc = NULL;
817 slot = pointer_map_insert (data->niters, exit);
818 *slot = desc;
820 else
821 desc = (struct tree_niter_desc *) *slot;
823 return desc;
826 /* Returns the structure describing number of iterations determined from
827 single dominating exit of DATA->current_loop, or NULL if something
828 goes wrong. */
830 static struct tree_niter_desc *
831 niter_for_single_dom_exit (struct ivopts_data *data)
833 edge exit = single_dom_exit (data->current_loop);
835 if (!exit)
836 return NULL;
838 return niter_for_exit (data, exit);
841 /* Initializes data structures used by the iv optimization pass, stored
842 in DATA. */
844 static void
845 tree_ssa_iv_optimize_init (struct ivopts_data *data)
847 data->version_info_size = 2 * num_ssa_names;
848 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
849 data->relevant = BITMAP_ALLOC (NULL);
850 data->important_candidates = BITMAP_ALLOC (NULL);
851 data->max_inv_id = 0;
852 data->niters = NULL;
853 data->iv_uses.create (20);
854 data->iv_candidates.create (20);
855 data->inv_expr_tab.create (10);
856 data->inv_expr_id = 0;
857 decl_rtl_to_reset.create (20);
860 /* Returns a memory object to that EXPR points. In case we are able to
861 determine that it does not point to any such object, NULL is returned. */
863 static tree
864 determine_base_object (tree expr)
866 enum tree_code code = TREE_CODE (expr);
867 tree base, obj;
869 /* If this is a pointer casted to any type, we need to determine
870 the base object for the pointer; so handle conversions before
871 throwing away non-pointer expressions. */
872 if (CONVERT_EXPR_P (expr))
873 return determine_base_object (TREE_OPERAND (expr, 0));
875 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
876 return NULL_TREE;
878 switch (code)
880 case INTEGER_CST:
881 return NULL_TREE;
883 case ADDR_EXPR:
884 obj = TREE_OPERAND (expr, 0);
885 base = get_base_address (obj);
887 if (!base)
888 return expr;
890 if (TREE_CODE (base) == MEM_REF)
891 return determine_base_object (TREE_OPERAND (base, 0));
893 return fold_convert (ptr_type_node,
894 build_fold_addr_expr (base));
896 case POINTER_PLUS_EXPR:
897 return determine_base_object (TREE_OPERAND (expr, 0));
899 case PLUS_EXPR:
900 case MINUS_EXPR:
901 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
902 gcc_unreachable ();
904 default:
905 return fold_convert (ptr_type_node, expr);
909 /* Allocates an induction variable with given initial value BASE and step STEP
910 for loop LOOP. */
912 static struct iv *
913 alloc_iv (tree base, tree step)
915 struct iv *iv = XCNEW (struct iv);
916 gcc_assert (step != NULL_TREE);
918 iv->base = base;
919 iv->base_object = determine_base_object (base);
920 iv->step = step;
921 iv->biv_p = false;
922 iv->have_use_for = false;
923 iv->use_id = 0;
924 iv->ssa_name = NULL_TREE;
926 return iv;
929 /* Sets STEP and BASE for induction variable IV. */
931 static void
932 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
934 struct version_info *info = name_info (data, iv);
936 gcc_assert (!info->iv);
938 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
939 info->iv = alloc_iv (base, step);
940 info->iv->ssa_name = iv;
943 /* Finds induction variable declaration for VAR. */
945 static struct iv *
946 get_iv (struct ivopts_data *data, tree var)
948 basic_block bb;
949 tree type = TREE_TYPE (var);
951 if (!POINTER_TYPE_P (type)
952 && !INTEGRAL_TYPE_P (type))
953 return NULL;
955 if (!name_info (data, var)->iv)
957 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
959 if (!bb
960 || !flow_bb_inside_loop_p (data->current_loop, bb))
961 set_iv (data, var, var, build_int_cst (type, 0));
964 return name_info (data, var)->iv;
967 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
968 not define a simple affine biv with nonzero step. */
970 static tree
971 determine_biv_step (gimple phi)
973 struct loop *loop = gimple_bb (phi)->loop_father;
974 tree name = PHI_RESULT (phi);
975 affine_iv iv;
977 if (virtual_operand_p (name))
978 return NULL_TREE;
980 if (!simple_iv (loop, loop, name, &iv, true))
981 return NULL_TREE;
983 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
986 /* Finds basic ivs. */
988 static bool
989 find_bivs (struct ivopts_data *data)
991 gimple phi;
992 tree step, type, base;
993 bool found = false;
994 struct loop *loop = data->current_loop;
995 gimple_stmt_iterator psi;
997 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
999 phi = gsi_stmt (psi);
1001 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1002 continue;
1004 step = determine_biv_step (phi);
1005 if (!step)
1006 continue;
1008 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1009 base = expand_simple_operations (base);
1010 if (contains_abnormal_ssa_name_p (base)
1011 || contains_abnormal_ssa_name_p (step))
1012 continue;
1014 type = TREE_TYPE (PHI_RESULT (phi));
1015 base = fold_convert (type, base);
1016 if (step)
1018 if (POINTER_TYPE_P (type))
1019 step = convert_to_ptrofftype (step);
1020 else
1021 step = fold_convert (type, step);
1024 set_iv (data, PHI_RESULT (phi), base, step);
1025 found = true;
1028 return found;
1031 /* Marks basic ivs. */
1033 static void
1034 mark_bivs (struct ivopts_data *data)
1036 gimple phi;
1037 tree var;
1038 struct iv *iv, *incr_iv;
1039 struct loop *loop = data->current_loop;
1040 basic_block incr_bb;
1041 gimple_stmt_iterator psi;
1043 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1045 phi = gsi_stmt (psi);
1047 iv = get_iv (data, PHI_RESULT (phi));
1048 if (!iv)
1049 continue;
1051 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1052 incr_iv = get_iv (data, var);
1053 if (!incr_iv)
1054 continue;
1056 /* If the increment is in the subloop, ignore it. */
1057 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1058 if (incr_bb->loop_father != data->current_loop
1059 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1060 continue;
1062 iv->biv_p = true;
1063 incr_iv->biv_p = true;
1067 /* Checks whether STMT defines a linear induction variable and stores its
1068 parameters to IV. */
1070 static bool
1071 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1073 tree lhs;
1074 struct loop *loop = data->current_loop;
1076 iv->base = NULL_TREE;
1077 iv->step = NULL_TREE;
1079 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1080 return false;
1082 lhs = gimple_assign_lhs (stmt);
1083 if (TREE_CODE (lhs) != SSA_NAME)
1084 return false;
1086 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1087 return false;
1088 iv->base = expand_simple_operations (iv->base);
1090 if (contains_abnormal_ssa_name_p (iv->base)
1091 || contains_abnormal_ssa_name_p (iv->step))
1092 return false;
1094 /* If STMT could throw, then do not consider STMT as defining a GIV.
1095 While this will suppress optimizations, we can not safely delete this
1096 GIV and associated statements, even if it appears it is not used. */
1097 if (stmt_could_throw_p (stmt))
1098 return false;
1100 return true;
1103 /* Finds general ivs in statement STMT. */
1105 static void
1106 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1108 affine_iv iv;
1110 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1111 return;
1113 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1116 /* Finds general ivs in basic block BB. */
1118 static void
1119 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1121 gimple_stmt_iterator bsi;
1123 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1124 find_givs_in_stmt (data, gsi_stmt (bsi));
1127 /* Finds general ivs. */
1129 static void
1130 find_givs (struct ivopts_data *data)
1132 struct loop *loop = data->current_loop;
1133 basic_block *body = get_loop_body_in_dom_order (loop);
1134 unsigned i;
1136 for (i = 0; i < loop->num_nodes; i++)
1137 find_givs_in_bb (data, body[i]);
1138 free (body);
1141 /* For each ssa name defined in LOOP determines whether it is an induction
1142 variable and if so, its initial value and step. */
1144 static bool
1145 find_induction_variables (struct ivopts_data *data)
1147 unsigned i;
1148 bitmap_iterator bi;
1150 if (!find_bivs (data))
1151 return false;
1153 find_givs (data);
1154 mark_bivs (data);
1156 if (dump_file && (dump_flags & TDF_DETAILS))
1158 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1160 if (niter)
1162 fprintf (dump_file, " number of iterations ");
1163 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1164 if (!integer_zerop (niter->may_be_zero))
1166 fprintf (dump_file, "; zero if ");
1167 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1169 fprintf (dump_file, "\n\n");
1172 fprintf (dump_file, "Induction variables:\n\n");
1174 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1176 if (ver_info (data, i)->iv)
1177 dump_iv (dump_file, ver_info (data, i)->iv);
1181 return true;
1184 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1186 static struct iv_use *
1187 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1188 gimple stmt, enum use_type use_type)
1190 struct iv_use *use = XCNEW (struct iv_use);
1192 use->id = n_iv_uses (data);
1193 use->type = use_type;
1194 use->iv = iv;
1195 use->stmt = stmt;
1196 use->op_p = use_p;
1197 use->related_cands = BITMAP_ALLOC (NULL);
1199 /* To avoid showing ssa name in the dumps, if it was not reset by the
1200 caller. */
1201 iv->ssa_name = NULL_TREE;
1203 if (dump_file && (dump_flags & TDF_DETAILS))
1204 dump_use (dump_file, use);
1206 data->iv_uses.safe_push (use);
1208 return use;
1211 /* Checks whether OP is a loop-level invariant and if so, records it.
1212 NONLINEAR_USE is true if the invariant is used in a way we do not
1213 handle specially. */
1215 static void
1216 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1218 basic_block bb;
1219 struct version_info *info;
1221 if (TREE_CODE (op) != SSA_NAME
1222 || virtual_operand_p (op))
1223 return;
1225 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1226 if (bb
1227 && flow_bb_inside_loop_p (data->current_loop, bb))
1228 return;
1230 info = name_info (data, op);
1231 info->name = op;
1232 info->has_nonlin_use |= nonlinear_use;
1233 if (!info->inv_id)
1234 info->inv_id = ++data->max_inv_id;
1235 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1238 /* Checks whether the use OP is interesting and if so, records it. */
1240 static struct iv_use *
1241 find_interesting_uses_op (struct ivopts_data *data, tree op)
1243 struct iv *iv;
1244 struct iv *civ;
1245 gimple stmt;
1246 struct iv_use *use;
1248 if (TREE_CODE (op) != SSA_NAME)
1249 return NULL;
1251 iv = get_iv (data, op);
1252 if (!iv)
1253 return NULL;
1255 if (iv->have_use_for)
1257 use = iv_use (data, iv->use_id);
1259 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1260 return use;
1263 if (integer_zerop (iv->step))
1265 record_invariant (data, op, true);
1266 return NULL;
1268 iv->have_use_for = true;
1270 civ = XNEW (struct iv);
1271 *civ = *iv;
1273 stmt = SSA_NAME_DEF_STMT (op);
1274 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1275 || is_gimple_assign (stmt));
1277 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1278 iv->use_id = use->id;
1280 return use;
1283 /* Given a condition in statement STMT, checks whether it is a compare
1284 of an induction variable and an invariant. If this is the case,
1285 CONTROL_VAR is set to location of the iv, BOUND to the location of
1286 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1287 induction variable descriptions, and true is returned. If this is not
1288 the case, CONTROL_VAR and BOUND are set to the arguments of the
1289 condition and false is returned. */
1291 static bool
1292 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1293 tree **control_var, tree **bound,
1294 struct iv **iv_var, struct iv **iv_bound)
1296 /* The objects returned when COND has constant operands. */
1297 static struct iv const_iv;
1298 static tree zero;
1299 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1300 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1301 bool ret = false;
1303 if (gimple_code (stmt) == GIMPLE_COND)
1305 op0 = gimple_cond_lhs_ptr (stmt);
1306 op1 = gimple_cond_rhs_ptr (stmt);
1308 else
1310 op0 = gimple_assign_rhs1_ptr (stmt);
1311 op1 = gimple_assign_rhs2_ptr (stmt);
1314 zero = integer_zero_node;
1315 const_iv.step = integer_zero_node;
1317 if (TREE_CODE (*op0) == SSA_NAME)
1318 iv0 = get_iv (data, *op0);
1319 if (TREE_CODE (*op1) == SSA_NAME)
1320 iv1 = get_iv (data, *op1);
1322 /* Exactly one of the compared values must be an iv, and the other one must
1323 be an invariant. */
1324 if (!iv0 || !iv1)
1325 goto end;
1327 if (integer_zerop (iv0->step))
1329 /* Control variable may be on the other side. */
1330 tmp_op = op0; op0 = op1; op1 = tmp_op;
1331 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1333 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1335 end:
1336 if (control_var)
1337 *control_var = op0;;
1338 if (iv_var)
1339 *iv_var = iv0;;
1340 if (bound)
1341 *bound = op1;
1342 if (iv_bound)
1343 *iv_bound = iv1;
1345 return ret;
1348 /* Checks whether the condition in STMT is interesting and if so,
1349 records it. */
1351 static void
1352 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1354 tree *var_p, *bound_p;
1355 struct iv *var_iv, *civ;
1357 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1359 find_interesting_uses_op (data, *var_p);
1360 find_interesting_uses_op (data, *bound_p);
1361 return;
1364 civ = XNEW (struct iv);
1365 *civ = *var_iv;
1366 record_use (data, NULL, civ, stmt, USE_COMPARE);
1369 /* Returns the outermost loop EXPR is obviously invariant in
1370 relative to the loop LOOP, i.e. if all its operands are defined
1371 outside of the returned loop. Returns NULL if EXPR is not
1372 even obviously invariant in LOOP. */
1374 struct loop *
1375 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1377 basic_block def_bb;
1378 unsigned i, len;
1380 if (is_gimple_min_invariant (expr))
1381 return current_loops->tree_root;
1383 if (TREE_CODE (expr) == SSA_NAME)
1385 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1386 if (def_bb)
1388 if (flow_bb_inside_loop_p (loop, def_bb))
1389 return NULL;
1390 return superloop_at_depth (loop,
1391 loop_depth (def_bb->loop_father) + 1);
1394 return current_loops->tree_root;
1397 if (!EXPR_P (expr))
1398 return NULL;
1400 unsigned maxdepth = 0;
1401 len = TREE_OPERAND_LENGTH (expr);
1402 for (i = 0; i < len; i++)
1404 struct loop *ivloop;
1405 if (!TREE_OPERAND (expr, i))
1406 continue;
1408 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1409 if (!ivloop)
1410 return NULL;
1411 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1414 return superloop_at_depth (loop, maxdepth);
1417 /* Returns true if expression EXPR is obviously invariant in LOOP,
1418 i.e. if all its operands are defined outside of the LOOP. LOOP
1419 should not be the function body. */
1421 bool
1422 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1424 basic_block def_bb;
1425 unsigned i, len;
1427 gcc_assert (loop_depth (loop) > 0);
1429 if (is_gimple_min_invariant (expr))
1430 return true;
1432 if (TREE_CODE (expr) == SSA_NAME)
1434 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1435 if (def_bb
1436 && flow_bb_inside_loop_p (loop, def_bb))
1437 return false;
1439 return true;
1442 if (!EXPR_P (expr))
1443 return false;
1445 len = TREE_OPERAND_LENGTH (expr);
1446 for (i = 0; i < len; i++)
1447 if (TREE_OPERAND (expr, i)
1448 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1449 return false;
1451 return true;
1454 /* Cumulates the steps of indices into DATA and replaces their values with the
1455 initial ones. Returns false when the value of the index cannot be determined.
1456 Callback for for_each_index. */
1458 struct ifs_ivopts_data
1460 struct ivopts_data *ivopts_data;
1461 gimple stmt;
1462 tree step;
1465 static bool
1466 idx_find_step (tree base, tree *idx, void *data)
1468 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1469 struct iv *iv;
1470 tree step, iv_base, iv_step, lbound, off;
1471 struct loop *loop = dta->ivopts_data->current_loop;
1473 /* If base is a component ref, require that the offset of the reference
1474 be invariant. */
1475 if (TREE_CODE (base) == COMPONENT_REF)
1477 off = component_ref_field_offset (base);
1478 return expr_invariant_in_loop_p (loop, off);
1481 /* If base is array, first check whether we will be able to move the
1482 reference out of the loop (in order to take its address in strength
1483 reduction). In order for this to work we need both lower bound
1484 and step to be loop invariants. */
1485 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1487 /* Moreover, for a range, the size needs to be invariant as well. */
1488 if (TREE_CODE (base) == ARRAY_RANGE_REF
1489 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1490 return false;
1492 step = array_ref_element_size (base);
1493 lbound = array_ref_low_bound (base);
1495 if (!expr_invariant_in_loop_p (loop, step)
1496 || !expr_invariant_in_loop_p (loop, lbound))
1497 return false;
1500 if (TREE_CODE (*idx) != SSA_NAME)
1501 return true;
1503 iv = get_iv (dta->ivopts_data, *idx);
1504 if (!iv)
1505 return false;
1507 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1508 *&x[0], which is not folded and does not trigger the
1509 ARRAY_REF path below. */
1510 *idx = iv->base;
1512 if (integer_zerop (iv->step))
1513 return true;
1515 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1517 step = array_ref_element_size (base);
1519 /* We only handle addresses whose step is an integer constant. */
1520 if (TREE_CODE (step) != INTEGER_CST)
1521 return false;
1523 else
1524 /* The step for pointer arithmetics already is 1 byte. */
1525 step = size_one_node;
1527 iv_base = iv->base;
1528 iv_step = iv->step;
1529 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1530 sizetype, &iv_base, &iv_step, dta->stmt,
1531 false))
1533 /* The index might wrap. */
1534 return false;
1537 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1538 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1540 return true;
1543 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1544 object is passed to it in DATA. */
1546 static bool
1547 idx_record_use (tree base, tree *idx,
1548 void *vdata)
1550 struct ivopts_data *data = (struct ivopts_data *) vdata;
1551 find_interesting_uses_op (data, *idx);
1552 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1554 find_interesting_uses_op (data, array_ref_element_size (base));
1555 find_interesting_uses_op (data, array_ref_low_bound (base));
1557 return true;
1560 /* If we can prove that TOP = cst * BOT for some constant cst,
1561 store cst to MUL and return true. Otherwise return false.
1562 The returned value is always sign-extended, regardless of the
1563 signedness of TOP and BOT. */
1565 static bool
1566 constant_multiple_of (tree top, tree bot, widest_int *mul)
1568 tree mby;
1569 enum tree_code code;
1570 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1571 widest_int res, p0, p1;
1573 STRIP_NOPS (top);
1574 STRIP_NOPS (bot);
1576 if (operand_equal_p (top, bot, 0))
1578 *mul = 1;
1579 return true;
1582 code = TREE_CODE (top);
1583 switch (code)
1585 case MULT_EXPR:
1586 mby = TREE_OPERAND (top, 1);
1587 if (TREE_CODE (mby) != INTEGER_CST)
1588 return false;
1590 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1591 return false;
1593 *mul = wi::sext (res * wi::to_widest (mby), precision);
1594 return true;
1596 case PLUS_EXPR:
1597 case MINUS_EXPR:
1598 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1599 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1600 return false;
1602 if (code == MINUS_EXPR)
1603 p1 = -p1;
1604 *mul = wi::sext (p0 + p1, precision);
1605 return true;
1607 case INTEGER_CST:
1608 if (TREE_CODE (bot) != INTEGER_CST)
1609 return false;
1611 p0 = widest_int::from (top, SIGNED);
1612 p1 = widest_int::from (bot, SIGNED);
1613 if (p1 == 0)
1614 return false;
1615 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1616 return res == 0;
1618 default:
1619 return false;
1623 /* Returns true if memory reference REF with step STEP may be unaligned. */
1625 static bool
1626 may_be_unaligned_p (tree ref, tree step)
1628 tree base;
1629 tree base_type;
1630 HOST_WIDE_INT bitsize;
1631 HOST_WIDE_INT bitpos;
1632 tree toffset;
1633 enum machine_mode mode;
1634 int unsignedp, volatilep;
1635 unsigned base_align;
1637 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1638 thus they are not misaligned. */
1639 if (TREE_CODE (ref) == TARGET_MEM_REF)
1640 return false;
1642 /* The test below is basically copy of what expr.c:normal_inner_ref
1643 does to check whether the object must be loaded by parts when
1644 STRICT_ALIGNMENT is true. */
1645 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1646 &unsignedp, &volatilep, true);
1647 base_type = TREE_TYPE (base);
1648 base_align = get_object_alignment (base);
1649 base_align = MAX (base_align, TYPE_ALIGN (base_type));
1651 if (mode != BLKmode)
1653 unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1655 if (base_align < mode_align
1656 || (bitpos % mode_align) != 0
1657 || (bitpos % BITS_PER_UNIT) != 0)
1658 return true;
1660 if (toffset
1661 && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1662 return true;
1664 if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1665 return true;
1668 return false;
1671 /* Return true if EXPR may be non-addressable. */
1673 bool
1674 may_be_nonaddressable_p (tree expr)
1676 switch (TREE_CODE (expr))
1678 case TARGET_MEM_REF:
1679 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1680 target, thus they are always addressable. */
1681 return false;
1683 case COMPONENT_REF:
1684 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1685 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1687 case VIEW_CONVERT_EXPR:
1688 /* This kind of view-conversions may wrap non-addressable objects
1689 and make them look addressable. After some processing the
1690 non-addressability may be uncovered again, causing ADDR_EXPRs
1691 of inappropriate objects to be built. */
1692 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1693 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1694 return true;
1696 /* ... fall through ... */
1698 case ARRAY_REF:
1699 case ARRAY_RANGE_REF:
1700 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1702 CASE_CONVERT:
1703 return true;
1705 default:
1706 break;
1709 return false;
1712 /* Finds addresses in *OP_P inside STMT. */
1714 static void
1715 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1717 tree base = *op_p, step = size_zero_node;
1718 struct iv *civ;
1719 struct ifs_ivopts_data ifs_ivopts_data;
1721 /* Do not play with volatile memory references. A bit too conservative,
1722 perhaps, but safe. */
1723 if (gimple_has_volatile_ops (stmt))
1724 goto fail;
1726 /* Ignore bitfields for now. Not really something terribly complicated
1727 to handle. TODO. */
1728 if (TREE_CODE (base) == BIT_FIELD_REF)
1729 goto fail;
1731 base = unshare_expr (base);
1733 if (TREE_CODE (base) == TARGET_MEM_REF)
1735 tree type = build_pointer_type (TREE_TYPE (base));
1736 tree astep;
1738 if (TMR_BASE (base)
1739 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1741 civ = get_iv (data, TMR_BASE (base));
1742 if (!civ)
1743 goto fail;
1745 TMR_BASE (base) = civ->base;
1746 step = civ->step;
1748 if (TMR_INDEX2 (base)
1749 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1751 civ = get_iv (data, TMR_INDEX2 (base));
1752 if (!civ)
1753 goto fail;
1755 TMR_INDEX2 (base) = civ->base;
1756 step = civ->step;
1758 if (TMR_INDEX (base)
1759 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1761 civ = get_iv (data, TMR_INDEX (base));
1762 if (!civ)
1763 goto fail;
1765 TMR_INDEX (base) = civ->base;
1766 astep = civ->step;
1768 if (astep)
1770 if (TMR_STEP (base))
1771 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1773 step = fold_build2 (PLUS_EXPR, type, step, astep);
1777 if (integer_zerop (step))
1778 goto fail;
1779 base = tree_mem_ref_addr (type, base);
1781 else
1783 ifs_ivopts_data.ivopts_data = data;
1784 ifs_ivopts_data.stmt = stmt;
1785 ifs_ivopts_data.step = size_zero_node;
1786 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1787 || integer_zerop (ifs_ivopts_data.step))
1788 goto fail;
1789 step = ifs_ivopts_data.step;
1791 /* Check that the base expression is addressable. This needs
1792 to be done after substituting bases of IVs into it. */
1793 if (may_be_nonaddressable_p (base))
1794 goto fail;
1796 /* Moreover, on strict alignment platforms, check that it is
1797 sufficiently aligned. */
1798 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1799 goto fail;
1801 base = build_fold_addr_expr (base);
1803 /* Substituting bases of IVs into the base expression might
1804 have caused folding opportunities. */
1805 if (TREE_CODE (base) == ADDR_EXPR)
1807 tree *ref = &TREE_OPERAND (base, 0);
1808 while (handled_component_p (*ref))
1809 ref = &TREE_OPERAND (*ref, 0);
1810 if (TREE_CODE (*ref) == MEM_REF)
1812 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1813 TREE_OPERAND (*ref, 0),
1814 TREE_OPERAND (*ref, 1));
1815 if (tem)
1816 *ref = tem;
1821 civ = alloc_iv (base, step);
1822 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1823 return;
1825 fail:
1826 for_each_index (op_p, idx_record_use, data);
1829 /* Finds and records invariants used in STMT. */
1831 static void
1832 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1834 ssa_op_iter iter;
1835 use_operand_p use_p;
1836 tree op;
1838 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1840 op = USE_FROM_PTR (use_p);
1841 record_invariant (data, op, false);
1845 /* Finds interesting uses of induction variables in the statement STMT. */
1847 static void
1848 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1850 struct iv *iv;
1851 tree op, *lhs, *rhs;
1852 ssa_op_iter iter;
1853 use_operand_p use_p;
1854 enum tree_code code;
1856 find_invariants_stmt (data, stmt);
1858 if (gimple_code (stmt) == GIMPLE_COND)
1860 find_interesting_uses_cond (data, stmt);
1861 return;
1864 if (is_gimple_assign (stmt))
1866 lhs = gimple_assign_lhs_ptr (stmt);
1867 rhs = gimple_assign_rhs1_ptr (stmt);
1869 if (TREE_CODE (*lhs) == SSA_NAME)
1871 /* If the statement defines an induction variable, the uses are not
1872 interesting by themselves. */
1874 iv = get_iv (data, *lhs);
1876 if (iv && !integer_zerop (iv->step))
1877 return;
1880 code = gimple_assign_rhs_code (stmt);
1881 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1882 && (REFERENCE_CLASS_P (*rhs)
1883 || is_gimple_val (*rhs)))
1885 if (REFERENCE_CLASS_P (*rhs))
1886 find_interesting_uses_address (data, stmt, rhs);
1887 else
1888 find_interesting_uses_op (data, *rhs);
1890 if (REFERENCE_CLASS_P (*lhs))
1891 find_interesting_uses_address (data, stmt, lhs);
1892 return;
1894 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1896 find_interesting_uses_cond (data, stmt);
1897 return;
1900 /* TODO -- we should also handle address uses of type
1902 memory = call (whatever);
1906 call (memory). */
1909 if (gimple_code (stmt) == GIMPLE_PHI
1910 && gimple_bb (stmt) == data->current_loop->header)
1912 iv = get_iv (data, PHI_RESULT (stmt));
1914 if (iv && !integer_zerop (iv->step))
1915 return;
1918 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1920 op = USE_FROM_PTR (use_p);
1922 if (TREE_CODE (op) != SSA_NAME)
1923 continue;
1925 iv = get_iv (data, op);
1926 if (!iv)
1927 continue;
1929 find_interesting_uses_op (data, op);
1933 /* Finds interesting uses of induction variables outside of loops
1934 on loop exit edge EXIT. */
1936 static void
1937 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1939 gimple phi;
1940 gimple_stmt_iterator psi;
1941 tree def;
1943 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1945 phi = gsi_stmt (psi);
1946 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1947 if (!virtual_operand_p (def))
1948 find_interesting_uses_op (data, def);
1952 /* Finds uses of the induction variables that are interesting. */
1954 static void
1955 find_interesting_uses (struct ivopts_data *data)
1957 basic_block bb;
1958 gimple_stmt_iterator bsi;
1959 basic_block *body = get_loop_body (data->current_loop);
1960 unsigned i;
1961 struct version_info *info;
1962 edge e;
1964 if (dump_file && (dump_flags & TDF_DETAILS))
1965 fprintf (dump_file, "Uses:\n\n");
1967 for (i = 0; i < data->current_loop->num_nodes; i++)
1969 edge_iterator ei;
1970 bb = body[i];
1972 FOR_EACH_EDGE (e, ei, bb->succs)
1973 if (e->dest != EXIT_BLOCK_PTR
1974 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1975 find_interesting_uses_outside (data, e);
1977 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1978 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1979 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1980 if (!is_gimple_debug (gsi_stmt (bsi)))
1981 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1984 if (dump_file && (dump_flags & TDF_DETAILS))
1986 bitmap_iterator bi;
1988 fprintf (dump_file, "\n");
1990 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1992 info = ver_info (data, i);
1993 if (info->inv_id)
1995 fprintf (dump_file, " ");
1996 print_generic_expr (dump_file, info->name, TDF_SLIM);
1997 fprintf (dump_file, " is invariant (%d)%s\n",
1998 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2002 fprintf (dump_file, "\n");
2005 free (body);
2008 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2009 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2010 we are at the top-level of the processed address. */
2012 static tree
2013 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2014 unsigned HOST_WIDE_INT *offset)
2016 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2017 enum tree_code code;
2018 tree type, orig_type = TREE_TYPE (expr);
2019 unsigned HOST_WIDE_INT off0, off1, st;
2020 tree orig_expr = expr;
2022 STRIP_NOPS (expr);
2024 type = TREE_TYPE (expr);
2025 code = TREE_CODE (expr);
2026 *offset = 0;
2028 switch (code)
2030 case INTEGER_CST:
2031 if (!cst_fits_shwi_p (expr)
2032 || integer_zerop (expr))
2033 return orig_expr;
2035 *offset = int_cst_value (expr);
2036 return build_int_cst (orig_type, 0);
2038 case POINTER_PLUS_EXPR:
2039 case PLUS_EXPR:
2040 case MINUS_EXPR:
2041 op0 = TREE_OPERAND (expr, 0);
2042 op1 = TREE_OPERAND (expr, 1);
2044 op0 = strip_offset_1 (op0, false, false, &off0);
2045 op1 = strip_offset_1 (op1, false, false, &off1);
2047 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2048 if (op0 == TREE_OPERAND (expr, 0)
2049 && op1 == TREE_OPERAND (expr, 1))
2050 return orig_expr;
2052 if (integer_zerop (op1))
2053 expr = op0;
2054 else if (integer_zerop (op0))
2056 if (code == MINUS_EXPR)
2057 expr = fold_build1 (NEGATE_EXPR, type, op1);
2058 else
2059 expr = op1;
2061 else
2062 expr = fold_build2 (code, type, op0, op1);
2064 return fold_convert (orig_type, expr);
2066 case MULT_EXPR:
2067 op1 = TREE_OPERAND (expr, 1);
2068 if (!cst_fits_shwi_p (op1))
2069 return orig_expr;
2071 op0 = TREE_OPERAND (expr, 0);
2072 op0 = strip_offset_1 (op0, false, false, &off0);
2073 if (op0 == TREE_OPERAND (expr, 0))
2074 return orig_expr;
2076 *offset = off0 * int_cst_value (op1);
2077 if (integer_zerop (op0))
2078 expr = op0;
2079 else
2080 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2082 return fold_convert (orig_type, expr);
2084 case ARRAY_REF:
2085 case ARRAY_RANGE_REF:
2086 if (!inside_addr)
2087 return orig_expr;
2089 step = array_ref_element_size (expr);
2090 if (!cst_fits_shwi_p (step))
2091 break;
2093 st = int_cst_value (step);
2094 op1 = TREE_OPERAND (expr, 1);
2095 op1 = strip_offset_1 (op1, false, false, &off1);
2096 *offset = off1 * st;
2098 if (top_compref
2099 && integer_zerop (op1))
2101 /* Strip the component reference completely. */
2102 op0 = TREE_OPERAND (expr, 0);
2103 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2104 *offset += off0;
2105 return op0;
2107 break;
2109 case COMPONENT_REF:
2110 if (!inside_addr)
2111 return orig_expr;
2113 tmp = component_ref_field_offset (expr);
2114 if (top_compref
2115 && cst_fits_shwi_p (tmp))
2117 /* Strip the component reference completely. */
2118 op0 = TREE_OPERAND (expr, 0);
2119 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2120 *offset = off0 + int_cst_value (tmp);
2121 return op0;
2123 break;
2125 case ADDR_EXPR:
2126 op0 = TREE_OPERAND (expr, 0);
2127 op0 = strip_offset_1 (op0, true, true, &off0);
2128 *offset += off0;
2130 if (op0 == TREE_OPERAND (expr, 0))
2131 return orig_expr;
2133 expr = build_fold_addr_expr (op0);
2134 return fold_convert (orig_type, expr);
2136 case MEM_REF:
2137 /* ??? Offset operand? */
2138 inside_addr = false;
2139 break;
2141 default:
2142 return orig_expr;
2145 /* Default handling of expressions for that we want to recurse into
2146 the first operand. */
2147 op0 = TREE_OPERAND (expr, 0);
2148 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2149 *offset += off0;
2151 if (op0 == TREE_OPERAND (expr, 0)
2152 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2153 return orig_expr;
2155 expr = copy_node (expr);
2156 TREE_OPERAND (expr, 0) = op0;
2157 if (op1)
2158 TREE_OPERAND (expr, 1) = op1;
2160 /* Inside address, we might strip the top level component references,
2161 thus changing type of the expression. Handling of ADDR_EXPR
2162 will fix that. */
2163 expr = fold_convert (orig_type, expr);
2165 return expr;
2168 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2170 static tree
2171 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2173 return strip_offset_1 (expr, false, false, offset);
2176 /* Returns variant of TYPE that can be used as base for different uses.
2177 We return unsigned type with the same precision, which avoids problems
2178 with overflows. */
2180 static tree
2181 generic_type_for (tree type)
2183 if (POINTER_TYPE_P (type))
2184 return unsigned_type_for (type);
2186 if (TYPE_UNSIGNED (type))
2187 return type;
2189 return unsigned_type_for (type);
2192 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2193 the bitmap to that we should store it. */
2195 static struct ivopts_data *fd_ivopts_data;
2196 static tree
2197 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2199 bitmap *depends_on = (bitmap *) data;
2200 struct version_info *info;
2202 if (TREE_CODE (*expr_p) != SSA_NAME)
2203 return NULL_TREE;
2204 info = name_info (fd_ivopts_data, *expr_p);
2206 if (!info->inv_id || info->has_nonlin_use)
2207 return NULL_TREE;
2209 if (!*depends_on)
2210 *depends_on = BITMAP_ALLOC (NULL);
2211 bitmap_set_bit (*depends_on, info->inv_id);
2213 return NULL_TREE;
2216 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2217 position to POS. If USE is not NULL, the candidate is set as related to
2218 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2219 replacement of the final value of the iv by a direct computation. */
2221 static struct iv_cand *
2222 add_candidate_1 (struct ivopts_data *data,
2223 tree base, tree step, bool important, enum iv_position pos,
2224 struct iv_use *use, gimple incremented_at)
2226 unsigned i;
2227 struct iv_cand *cand = NULL;
2228 tree type, orig_type;
2230 /* For non-original variables, make sure their values are computed in a type
2231 that does not invoke undefined behavior on overflows (since in general,
2232 we cannot prove that these induction variables are non-wrapping). */
2233 if (pos != IP_ORIGINAL)
2235 orig_type = TREE_TYPE (base);
2236 type = generic_type_for (orig_type);
2237 if (type != orig_type)
2239 base = fold_convert (type, base);
2240 step = fold_convert (type, step);
2244 for (i = 0; i < n_iv_cands (data); i++)
2246 cand = iv_cand (data, i);
2248 if (cand->pos != pos)
2249 continue;
2251 if (cand->incremented_at != incremented_at
2252 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2253 && cand->ainc_use != use))
2254 continue;
2256 if (!cand->iv)
2258 if (!base && !step)
2259 break;
2261 continue;
2264 if (!base && !step)
2265 continue;
2267 if (operand_equal_p (base, cand->iv->base, 0)
2268 && operand_equal_p (step, cand->iv->step, 0)
2269 && (TYPE_PRECISION (TREE_TYPE (base))
2270 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2271 break;
2274 if (i == n_iv_cands (data))
2276 cand = XCNEW (struct iv_cand);
2277 cand->id = i;
2279 if (!base && !step)
2280 cand->iv = NULL;
2281 else
2282 cand->iv = alloc_iv (base, step);
2284 cand->pos = pos;
2285 if (pos != IP_ORIGINAL && cand->iv)
2287 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2288 cand->var_after = cand->var_before;
2290 cand->important = important;
2291 cand->incremented_at = incremented_at;
2292 data->iv_candidates.safe_push (cand);
2294 if (step
2295 && TREE_CODE (step) != INTEGER_CST)
2297 fd_ivopts_data = data;
2298 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2301 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2302 cand->ainc_use = use;
2303 else
2304 cand->ainc_use = NULL;
2306 if (dump_file && (dump_flags & TDF_DETAILS))
2307 dump_cand (dump_file, cand);
2310 if (important && !cand->important)
2312 cand->important = true;
2313 if (dump_file && (dump_flags & TDF_DETAILS))
2314 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2317 if (use)
2319 bitmap_set_bit (use->related_cands, i);
2320 if (dump_file && (dump_flags & TDF_DETAILS))
2321 fprintf (dump_file, "Candidate %d is related to use %d\n",
2322 cand->id, use->id);
2325 return cand;
2328 /* Returns true if incrementing the induction variable at the end of the LOOP
2329 is allowed.
2331 The purpose is to avoid splitting latch edge with a biv increment, thus
2332 creating a jump, possibly confusing other optimization passes and leaving
2333 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2334 is not available (so we do not have a better alternative), or if the latch
2335 edge is already nonempty. */
2337 static bool
2338 allow_ip_end_pos_p (struct loop *loop)
2340 if (!ip_normal_pos (loop))
2341 return true;
2343 if (!empty_block_p (ip_end_pos (loop)))
2344 return true;
2346 return false;
2349 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2350 Important field is set to IMPORTANT. */
2352 static void
2353 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2354 bool important, struct iv_use *use)
2356 basic_block use_bb = gimple_bb (use->stmt);
2357 enum machine_mode mem_mode;
2358 unsigned HOST_WIDE_INT cstepi;
2360 /* If we insert the increment in any position other than the standard
2361 ones, we must ensure that it is incremented once per iteration.
2362 It must not be in an inner nested loop, or one side of an if
2363 statement. */
2364 if (use_bb->loop_father != data->current_loop
2365 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2366 || stmt_could_throw_p (use->stmt)
2367 || !cst_fits_shwi_p (step))
2368 return;
2370 cstepi = int_cst_value (step);
2372 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2373 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2374 || USE_STORE_PRE_INCREMENT (mem_mode))
2375 && GET_MODE_SIZE (mem_mode) == cstepi)
2376 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2377 || USE_STORE_PRE_DECREMENT (mem_mode))
2378 && GET_MODE_SIZE (mem_mode) == -cstepi))
2380 enum tree_code code = MINUS_EXPR;
2381 tree new_base;
2382 tree new_step = step;
2384 if (POINTER_TYPE_P (TREE_TYPE (base)))
2386 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2387 code = POINTER_PLUS_EXPR;
2389 else
2390 new_step = fold_convert (TREE_TYPE (base), new_step);
2391 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2392 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2393 use->stmt);
2395 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2396 || USE_STORE_POST_INCREMENT (mem_mode))
2397 && GET_MODE_SIZE (mem_mode) == cstepi)
2398 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2399 || USE_STORE_POST_DECREMENT (mem_mode))
2400 && GET_MODE_SIZE (mem_mode) == -cstepi))
2402 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2403 use->stmt);
2407 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2408 position to POS. If USE is not NULL, the candidate is set as related to
2409 it. The candidate computation is scheduled on all available positions. */
2411 static void
2412 add_candidate (struct ivopts_data *data,
2413 tree base, tree step, bool important, struct iv_use *use)
2415 if (ip_normal_pos (data->current_loop))
2416 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2417 if (ip_end_pos (data->current_loop)
2418 && allow_ip_end_pos_p (data->current_loop))
2419 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2421 if (use != NULL && use->type == USE_ADDRESS)
2422 add_autoinc_candidates (data, base, step, important, use);
2425 /* Adds standard iv candidates. */
2427 static void
2428 add_standard_iv_candidates (struct ivopts_data *data)
2430 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2432 /* The same for a double-integer type if it is still fast enough. */
2433 if (TYPE_PRECISION
2434 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2435 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2436 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2437 build_int_cst (long_integer_type_node, 1), true, NULL);
2439 /* The same for a double-integer type if it is still fast enough. */
2440 if (TYPE_PRECISION
2441 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2442 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2443 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2444 build_int_cst (long_long_integer_type_node, 1), true, NULL);
2448 /* Adds candidates bases on the old induction variable IV. */
2450 static void
2451 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2453 gimple phi;
2454 tree def;
2455 struct iv_cand *cand;
2457 add_candidate (data, iv->base, iv->step, true, NULL);
2459 /* The same, but with initial value zero. */
2460 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2461 add_candidate (data, size_int (0), iv->step, true, NULL);
2462 else
2463 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2464 iv->step, true, NULL);
2466 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2467 if (gimple_code (phi) == GIMPLE_PHI)
2469 /* Additionally record the possibility of leaving the original iv
2470 untouched. */
2471 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2472 cand = add_candidate_1 (data,
2473 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2474 SSA_NAME_DEF_STMT (def));
2475 cand->var_before = iv->ssa_name;
2476 cand->var_after = def;
2480 /* Adds candidates based on the old induction variables. */
2482 static void
2483 add_old_ivs_candidates (struct ivopts_data *data)
2485 unsigned i;
2486 struct iv *iv;
2487 bitmap_iterator bi;
2489 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2491 iv = ver_info (data, i)->iv;
2492 if (iv && iv->biv_p && !integer_zerop (iv->step))
2493 add_old_iv_candidates (data, iv);
2497 /* Adds candidates based on the value of the induction variable IV and USE. */
2499 static void
2500 add_iv_value_candidates (struct ivopts_data *data,
2501 struct iv *iv, struct iv_use *use)
2503 unsigned HOST_WIDE_INT offset;
2504 tree base;
2505 tree basetype;
2507 add_candidate (data, iv->base, iv->step, false, use);
2509 /* The same, but with initial value zero. Make such variable important,
2510 since it is generic enough so that possibly many uses may be based
2511 on it. */
2512 basetype = TREE_TYPE (iv->base);
2513 if (POINTER_TYPE_P (basetype))
2514 basetype = sizetype;
2515 add_candidate (data, build_int_cst (basetype, 0),
2516 iv->step, true, use);
2518 /* Third, try removing the constant offset. Make sure to even
2519 add a candidate for &a[0] vs. (T *)&a. */
2520 base = strip_offset (iv->base, &offset);
2521 if (offset
2522 || base != iv->base)
2523 add_candidate (data, base, iv->step, false, use);
2526 /* Adds candidates based on the uses. */
2528 static void
2529 add_derived_ivs_candidates (struct ivopts_data *data)
2531 unsigned i;
2533 for (i = 0; i < n_iv_uses (data); i++)
2535 struct iv_use *use = iv_use (data, i);
2537 if (!use)
2538 continue;
2540 switch (use->type)
2542 case USE_NONLINEAR_EXPR:
2543 case USE_COMPARE:
2544 case USE_ADDRESS:
2545 /* Just add the ivs based on the value of the iv used here. */
2546 add_iv_value_candidates (data, use->iv, use);
2547 break;
2549 default:
2550 gcc_unreachable ();
2555 /* Record important candidates and add them to related_cands bitmaps
2556 if needed. */
2558 static void
2559 record_important_candidates (struct ivopts_data *data)
2561 unsigned i;
2562 struct iv_use *use;
2564 for (i = 0; i < n_iv_cands (data); i++)
2566 struct iv_cand *cand = iv_cand (data, i);
2568 if (cand->important)
2569 bitmap_set_bit (data->important_candidates, i);
2572 data->consider_all_candidates = (n_iv_cands (data)
2573 <= CONSIDER_ALL_CANDIDATES_BOUND);
2575 if (data->consider_all_candidates)
2577 /* We will not need "related_cands" bitmaps in this case,
2578 so release them to decrease peak memory consumption. */
2579 for (i = 0; i < n_iv_uses (data); i++)
2581 use = iv_use (data, i);
2582 BITMAP_FREE (use->related_cands);
2585 else
2587 /* Add important candidates to the related_cands bitmaps. */
2588 for (i = 0; i < n_iv_uses (data); i++)
2589 bitmap_ior_into (iv_use (data, i)->related_cands,
2590 data->important_candidates);
2594 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2595 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2596 we allocate a simple list to every use. */
2598 static void
2599 alloc_use_cost_map (struct ivopts_data *data)
2601 unsigned i, size, s;
2603 for (i = 0; i < n_iv_uses (data); i++)
2605 struct iv_use *use = iv_use (data, i);
2607 if (data->consider_all_candidates)
2608 size = n_iv_cands (data);
2609 else
2611 s = bitmap_count_bits (use->related_cands);
2613 /* Round up to the power of two, so that moduling by it is fast. */
2614 size = s ? (1 << ceil_log2 (s)) : 1;
2617 use->n_map_members = size;
2618 use->cost_map = XCNEWVEC (struct cost_pair, size);
2622 /* Returns description of computation cost of expression whose runtime
2623 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2625 static comp_cost
2626 new_cost (unsigned runtime, unsigned complexity)
2628 comp_cost cost;
2630 static int ct = 0;
2631 ct++;
2633 cost.cost = runtime;
2634 cost.complexity = complexity;
2636 return cost;
2639 /* Adds costs COST1 and COST2. */
2641 static comp_cost
2642 add_costs (comp_cost cost1, comp_cost cost2)
2644 cost1.cost += cost2.cost;
2645 cost1.complexity += cost2.complexity;
2647 return cost1;
2649 /* Subtracts costs COST1 and COST2. */
2651 static comp_cost
2652 sub_costs (comp_cost cost1, comp_cost cost2)
2654 cost1.cost -= cost2.cost;
2655 cost1.complexity -= cost2.complexity;
2657 return cost1;
2660 /* Returns a negative number if COST1 < COST2, a positive number if
2661 COST1 > COST2, and 0 if COST1 = COST2. */
2663 static int
2664 compare_costs (comp_cost cost1, comp_cost cost2)
2666 if (cost1.cost == cost2.cost)
2667 return cost1.complexity - cost2.complexity;
2669 return cost1.cost - cost2.cost;
2672 /* Returns true if COST is infinite. */
2674 static bool
2675 infinite_cost_p (comp_cost cost)
2677 return cost.cost == INFTY;
2680 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2681 on invariants DEPENDS_ON and that the value used in expressing it
2682 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2684 static void
2685 set_use_iv_cost (struct ivopts_data *data,
2686 struct iv_use *use, struct iv_cand *cand,
2687 comp_cost cost, bitmap depends_on, tree value,
2688 enum tree_code comp, int inv_expr_id)
2690 unsigned i, s;
2692 if (infinite_cost_p (cost))
2694 BITMAP_FREE (depends_on);
2695 return;
2698 if (data->consider_all_candidates)
2700 use->cost_map[cand->id].cand = cand;
2701 use->cost_map[cand->id].cost = cost;
2702 use->cost_map[cand->id].depends_on = depends_on;
2703 use->cost_map[cand->id].value = value;
2704 use->cost_map[cand->id].comp = comp;
2705 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2706 return;
2709 /* n_map_members is a power of two, so this computes modulo. */
2710 s = cand->id & (use->n_map_members - 1);
2711 for (i = s; i < use->n_map_members; i++)
2712 if (!use->cost_map[i].cand)
2713 goto found;
2714 for (i = 0; i < s; i++)
2715 if (!use->cost_map[i].cand)
2716 goto found;
2718 gcc_unreachable ();
2720 found:
2721 use->cost_map[i].cand = cand;
2722 use->cost_map[i].cost = cost;
2723 use->cost_map[i].depends_on = depends_on;
2724 use->cost_map[i].value = value;
2725 use->cost_map[i].comp = comp;
2726 use->cost_map[i].inv_expr_id = inv_expr_id;
2729 /* Gets cost of (USE, CANDIDATE) pair. */
2731 static struct cost_pair *
2732 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2733 struct iv_cand *cand)
2735 unsigned i, s;
2736 struct cost_pair *ret;
2738 if (!cand)
2739 return NULL;
2741 if (data->consider_all_candidates)
2743 ret = use->cost_map + cand->id;
2744 if (!ret->cand)
2745 return NULL;
2747 return ret;
2750 /* n_map_members is a power of two, so this computes modulo. */
2751 s = cand->id & (use->n_map_members - 1);
2752 for (i = s; i < use->n_map_members; i++)
2753 if (use->cost_map[i].cand == cand)
2754 return use->cost_map + i;
2755 else if (use->cost_map[i].cand == NULL)
2756 return NULL;
2757 for (i = 0; i < s; i++)
2758 if (use->cost_map[i].cand == cand)
2759 return use->cost_map + i;
2760 else if (use->cost_map[i].cand == NULL)
2761 return NULL;
2763 return NULL;
2766 /* Returns estimate on cost of computing SEQ. */
2768 static unsigned
2769 seq_cost (rtx seq, bool speed)
2771 unsigned cost = 0;
2772 rtx set;
2774 for (; seq; seq = NEXT_INSN (seq))
2776 set = single_set (seq);
2777 if (set)
2778 cost += set_src_cost (SET_SRC (set), speed);
2779 else
2780 cost++;
2783 return cost;
2786 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2787 static rtx
2788 produce_memory_decl_rtl (tree obj, int *regno)
2790 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2791 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2792 rtx x;
2794 gcc_assert (obj);
2795 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2797 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2798 x = gen_rtx_SYMBOL_REF (address_mode, name);
2799 SET_SYMBOL_REF_DECL (x, obj);
2800 x = gen_rtx_MEM (DECL_MODE (obj), x);
2801 set_mem_addr_space (x, as);
2802 targetm.encode_section_info (obj, x, true);
2804 else
2806 x = gen_raw_REG (address_mode, (*regno)++);
2807 x = gen_rtx_MEM (DECL_MODE (obj), x);
2808 set_mem_addr_space (x, as);
2811 return x;
2814 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2815 walk_tree. DATA contains the actual fake register number. */
2817 static tree
2818 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2820 tree obj = NULL_TREE;
2821 rtx x = NULL_RTX;
2822 int *regno = (int *) data;
2824 switch (TREE_CODE (*expr_p))
2826 case ADDR_EXPR:
2827 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2828 handled_component_p (*expr_p);
2829 expr_p = &TREE_OPERAND (*expr_p, 0))
2830 continue;
2831 obj = *expr_p;
2832 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2833 x = produce_memory_decl_rtl (obj, regno);
2834 break;
2836 case SSA_NAME:
2837 *ws = 0;
2838 obj = SSA_NAME_VAR (*expr_p);
2839 /* Defer handling of anonymous SSA_NAMEs to the expander. */
2840 if (!obj)
2841 return NULL_TREE;
2842 if (!DECL_RTL_SET_P (obj))
2843 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2844 break;
2846 case VAR_DECL:
2847 case PARM_DECL:
2848 case RESULT_DECL:
2849 *ws = 0;
2850 obj = *expr_p;
2852 if (DECL_RTL_SET_P (obj))
2853 break;
2855 if (DECL_MODE (obj) == BLKmode)
2856 x = produce_memory_decl_rtl (obj, regno);
2857 else
2858 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2860 break;
2862 default:
2863 break;
2866 if (x)
2868 decl_rtl_to_reset.safe_push (obj);
2869 SET_DECL_RTL (obj, x);
2872 return NULL_TREE;
2875 /* Determines cost of the computation of EXPR. */
2877 static unsigned
2878 computation_cost (tree expr, bool speed)
2880 rtx seq, rslt;
2881 tree type = TREE_TYPE (expr);
2882 unsigned cost;
2883 /* Avoid using hard regs in ways which may be unsupported. */
2884 int regno = LAST_VIRTUAL_REGISTER + 1;
2885 struct cgraph_node *node = cgraph_get_node (current_function_decl);
2886 enum node_frequency real_frequency = node->frequency;
2888 node->frequency = NODE_FREQUENCY_NORMAL;
2889 crtl->maybe_hot_insn_p = speed;
2890 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2891 start_sequence ();
2892 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2893 seq = get_insns ();
2894 end_sequence ();
2895 default_rtl_profile ();
2896 node->frequency = real_frequency;
2898 cost = seq_cost (seq, speed);
2899 if (MEM_P (rslt))
2900 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2901 TYPE_ADDR_SPACE (type), speed);
2902 else if (!REG_P (rslt))
2903 cost += set_src_cost (rslt, speed);
2905 return cost;
2908 /* Returns variable containing the value of candidate CAND at statement AT. */
2910 static tree
2911 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2913 if (stmt_after_increment (loop, cand, stmt))
2914 return cand->var_after;
2915 else
2916 return cand->var_before;
2919 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2920 same precision that is at least as wide as the precision of TYPE, stores
2921 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2922 type of A and B. */
2924 static tree
2925 determine_common_wider_type (tree *a, tree *b)
2927 tree wider_type = NULL;
2928 tree suba, subb;
2929 tree atype = TREE_TYPE (*a);
2931 if (CONVERT_EXPR_P (*a))
2933 suba = TREE_OPERAND (*a, 0);
2934 wider_type = TREE_TYPE (suba);
2935 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2936 return atype;
2938 else
2939 return atype;
2941 if (CONVERT_EXPR_P (*b))
2943 subb = TREE_OPERAND (*b, 0);
2944 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2945 return atype;
2947 else
2948 return atype;
2950 *a = suba;
2951 *b = subb;
2952 return wider_type;
2955 /* Determines the expression by that USE is expressed from induction variable
2956 CAND at statement AT in LOOP. The expression is stored in a decomposed
2957 form into AFF. Returns false if USE cannot be expressed using CAND. */
2959 static bool
2960 get_computation_aff (struct loop *loop,
2961 struct iv_use *use, struct iv_cand *cand, gimple at,
2962 struct affine_tree_combination *aff)
2964 tree ubase = use->iv->base;
2965 tree ustep = use->iv->step;
2966 tree cbase = cand->iv->base;
2967 tree cstep = cand->iv->step, cstep_common;
2968 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2969 tree common_type, var;
2970 tree uutype;
2971 aff_tree cbase_aff, var_aff;
2972 widest_int rat;
2974 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2976 /* We do not have a precision to express the values of use. */
2977 return false;
2980 var = var_at_stmt (loop, cand, at);
2981 uutype = unsigned_type_for (utype);
2983 /* If the conversion is not noop, perform it. */
2984 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2986 cstep = fold_convert (uutype, cstep);
2987 cbase = fold_convert (uutype, cbase);
2988 var = fold_convert (uutype, var);
2991 if (!constant_multiple_of (ustep, cstep, &rat))
2992 return false;
2994 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2995 type, we achieve better folding by computing their difference in this
2996 wider type, and cast the result to UUTYPE. We do not need to worry about
2997 overflows, as all the arithmetics will in the end be performed in UUTYPE
2998 anyway. */
2999 common_type = determine_common_wider_type (&ubase, &cbase);
3001 /* use = ubase - ratio * cbase + ratio * var. */
3002 tree_to_aff_combination (ubase, common_type, aff);
3003 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3004 tree_to_aff_combination (var, uutype, &var_aff);
3006 /* We need to shift the value if we are after the increment. */
3007 if (stmt_after_increment (loop, cand, at))
3009 aff_tree cstep_aff;
3011 if (common_type != uutype)
3012 cstep_common = fold_convert (common_type, cstep);
3013 else
3014 cstep_common = cstep;
3016 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3017 aff_combination_add (&cbase_aff, &cstep_aff);
3020 aff_combination_scale (&cbase_aff, -rat);
3021 aff_combination_add (aff, &cbase_aff);
3022 if (common_type != uutype)
3023 aff_combination_convert (aff, uutype);
3025 aff_combination_scale (&var_aff, rat);
3026 aff_combination_add (aff, &var_aff);
3028 return true;
3031 /* Return the type of USE. */
3033 static tree
3034 get_use_type (struct iv_use *use)
3036 tree base_type = TREE_TYPE (use->iv->base);
3037 tree type;
3039 if (use->type == USE_ADDRESS)
3041 /* The base_type may be a void pointer. Create a pointer type based on
3042 the mem_ref instead. */
3043 type = build_pointer_type (TREE_TYPE (*use->op_p));
3044 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3045 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3047 else
3048 type = base_type;
3050 return type;
3053 /* Determines the expression by that USE is expressed from induction variable
3054 CAND at statement AT in LOOP. The computation is unshared. */
3056 static tree
3057 get_computation_at (struct loop *loop,
3058 struct iv_use *use, struct iv_cand *cand, gimple at)
3060 aff_tree aff;
3061 tree type = get_use_type (use);
3063 if (!get_computation_aff (loop, use, cand, at, &aff))
3064 return NULL_TREE;
3065 unshare_aff_combination (&aff);
3066 return fold_convert (type, aff_combination_to_tree (&aff));
3069 /* Determines the expression by that USE is expressed from induction variable
3070 CAND in LOOP. The computation is unshared. */
3072 static tree
3073 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3075 return get_computation_at (loop, use, cand, use->stmt);
3078 /* Adjust the cost COST for being in loop setup rather than loop body.
3079 If we're optimizing for space, the loop setup overhead is constant;
3080 if we're optimizing for speed, amortize it over the per-iteration cost. */
3081 static unsigned
3082 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3084 if (cost == INFTY)
3085 return cost;
3086 else if (optimize_loop_for_speed_p (data->current_loop))
3087 return cost / avg_loop_niter (data->current_loop);
3088 else
3089 return cost;
3092 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3093 validity for a memory reference accessing memory of mode MODE in
3094 address space AS. */
3097 bool
3098 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3099 addr_space_t as)
3101 #define MAX_RATIO 128
3102 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3103 static vec<sbitmap> valid_mult_list;
3104 sbitmap valid_mult;
3106 if (data_index >= valid_mult_list.length ())
3107 valid_mult_list.safe_grow_cleared (data_index + 1);
3109 valid_mult = valid_mult_list[data_index];
3110 if (!valid_mult)
3112 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3113 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3114 rtx addr;
3115 HOST_WIDE_INT i;
3117 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3118 bitmap_clear (valid_mult);
3119 addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3120 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3122 XEXP (addr, 1) = gen_int_mode (i, address_mode);
3123 if (memory_address_addr_space_p (mode, addr, as))
3124 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3127 if (dump_file && (dump_flags & TDF_DETAILS))
3129 fprintf (dump_file, " allowed multipliers:");
3130 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3131 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3132 fprintf (dump_file, " %d", (int) i);
3133 fprintf (dump_file, "\n");
3134 fprintf (dump_file, "\n");
3137 valid_mult_list[data_index] = valid_mult;
3140 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3141 return false;
3143 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3146 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3147 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3148 variable is omitted. Compute the cost for a memory reference that accesses
3149 a memory location of mode MEM_MODE in address space AS.
3151 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3152 size of MEM_MODE / RATIO) is available. To make this determination, we
3153 look at the size of the increment to be made, which is given in CSTEP.
3154 CSTEP may be zero if the step is unknown.
3155 STMT_AFTER_INC is true iff the statement we're looking at is after the
3156 increment of the original biv.
3158 TODO -- there must be some better way. This all is quite crude. */
3160 typedef struct address_cost_data_s
3162 HOST_WIDE_INT min_offset, max_offset;
3163 unsigned costs[2][2][2][2];
3164 } *address_cost_data;
3167 static comp_cost
3168 get_address_cost (bool symbol_present, bool var_present,
3169 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3170 HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3171 addr_space_t as, bool speed,
3172 bool stmt_after_inc, bool *may_autoinc)
3174 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3175 static vec<address_cost_data> address_cost_data_list;
3176 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3177 address_cost_data data;
3178 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3179 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3180 unsigned cost, acost, complexity;
3181 bool offset_p, ratio_p, autoinc;
3182 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3183 unsigned HOST_WIDE_INT mask;
3184 unsigned bits;
3186 if (data_index >= address_cost_data_list.length ())
3187 address_cost_data_list.safe_grow_cleared (data_index + 1);
3189 data = address_cost_data_list[data_index];
3190 if (!data)
3192 HOST_WIDE_INT i;
3193 HOST_WIDE_INT rat, off = 0;
3194 int old_cse_not_expected, width;
3195 unsigned sym_p, var_p, off_p, rat_p, add_c;
3196 rtx seq, addr, base;
3197 rtx reg0, reg1;
3199 data = (address_cost_data) xcalloc (1, sizeof (*data));
3201 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3203 width = GET_MODE_BITSIZE (address_mode) - 1;
3204 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3205 width = HOST_BITS_PER_WIDE_INT - 1;
3206 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3208 for (i = width; i >= 0; i--)
3210 off = -((unsigned HOST_WIDE_INT) 1 << i);
3211 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3212 if (memory_address_addr_space_p (mem_mode, addr, as))
3213 break;
3215 data->min_offset = (i == -1? 0 : off);
3217 for (i = width; i >= 0; i--)
3219 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3220 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3221 if (memory_address_addr_space_p (mem_mode, addr, as))
3222 break;
3224 if (i == -1)
3225 off = 0;
3226 data->max_offset = off;
3228 if (dump_file && (dump_flags & TDF_DETAILS))
3230 fprintf (dump_file, "get_address_cost:\n");
3231 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3232 GET_MODE_NAME (mem_mode),
3233 data->min_offset);
3234 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3235 GET_MODE_NAME (mem_mode),
3236 data->max_offset);
3239 rat = 1;
3240 for (i = 2; i <= MAX_RATIO; i++)
3241 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3243 rat = i;
3244 break;
3247 /* Compute the cost of various addressing modes. */
3248 acost = 0;
3249 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3250 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3252 if (USE_LOAD_PRE_DECREMENT (mem_mode)
3253 || USE_STORE_PRE_DECREMENT (mem_mode))
3255 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3256 has_predec[mem_mode]
3257 = memory_address_addr_space_p (mem_mode, addr, as);
3259 if (USE_LOAD_POST_DECREMENT (mem_mode)
3260 || USE_STORE_POST_DECREMENT (mem_mode))
3262 addr = gen_rtx_POST_DEC (address_mode, reg0);
3263 has_postdec[mem_mode]
3264 = memory_address_addr_space_p (mem_mode, addr, as);
3266 if (USE_LOAD_PRE_INCREMENT (mem_mode)
3267 || USE_STORE_PRE_DECREMENT (mem_mode))
3269 addr = gen_rtx_PRE_INC (address_mode, reg0);
3270 has_preinc[mem_mode]
3271 = memory_address_addr_space_p (mem_mode, addr, as);
3273 if (USE_LOAD_POST_INCREMENT (mem_mode)
3274 || USE_STORE_POST_INCREMENT (mem_mode))
3276 addr = gen_rtx_POST_INC (address_mode, reg0);
3277 has_postinc[mem_mode]
3278 = memory_address_addr_space_p (mem_mode, addr, as);
3280 for (i = 0; i < 16; i++)
3282 sym_p = i & 1;
3283 var_p = (i >> 1) & 1;
3284 off_p = (i >> 2) & 1;
3285 rat_p = (i >> 3) & 1;
3287 addr = reg0;
3288 if (rat_p)
3289 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3290 gen_int_mode (rat, address_mode));
3292 if (var_p)
3293 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3295 if (sym_p)
3297 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3298 /* ??? We can run into trouble with some backends by presenting
3299 it with symbols which haven't been properly passed through
3300 targetm.encode_section_info. By setting the local bit, we
3301 enhance the probability of things working. */
3302 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3304 if (off_p)
3305 base = gen_rtx_fmt_e (CONST, address_mode,
3306 gen_rtx_fmt_ee
3307 (PLUS, address_mode, base,
3308 gen_int_mode (off, address_mode)));
3310 else if (off_p)
3311 base = gen_int_mode (off, address_mode);
3312 else
3313 base = NULL_RTX;
3315 if (base)
3316 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3318 start_sequence ();
3319 /* To avoid splitting addressing modes, pretend that no cse will
3320 follow. */
3321 old_cse_not_expected = cse_not_expected;
3322 cse_not_expected = true;
3323 addr = memory_address_addr_space (mem_mode, addr, as);
3324 cse_not_expected = old_cse_not_expected;
3325 seq = get_insns ();
3326 end_sequence ();
3328 acost = seq_cost (seq, speed);
3329 acost += address_cost (addr, mem_mode, as, speed);
3331 if (!acost)
3332 acost = 1;
3333 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3336 /* On some targets, it is quite expensive to load symbol to a register,
3337 which makes addresses that contain symbols look much more expensive.
3338 However, the symbol will have to be loaded in any case before the
3339 loop (and quite likely we have it in register already), so it does not
3340 make much sense to penalize them too heavily. So make some final
3341 tweaks for the SYMBOL_PRESENT modes:
3343 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3344 var is cheaper, use this mode with small penalty.
3345 If VAR_PRESENT is true, try whether the mode with
3346 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3347 if this is the case, use it. */
3348 add_c = add_cost (speed, address_mode);
3349 for (i = 0; i < 8; i++)
3351 var_p = i & 1;
3352 off_p = (i >> 1) & 1;
3353 rat_p = (i >> 2) & 1;
3355 acost = data->costs[0][1][off_p][rat_p] + 1;
3356 if (var_p)
3357 acost += add_c;
3359 if (acost < data->costs[1][var_p][off_p][rat_p])
3360 data->costs[1][var_p][off_p][rat_p] = acost;
3363 if (dump_file && (dump_flags & TDF_DETAILS))
3365 fprintf (dump_file, "Address costs:\n");
3367 for (i = 0; i < 16; i++)
3369 sym_p = i & 1;
3370 var_p = (i >> 1) & 1;
3371 off_p = (i >> 2) & 1;
3372 rat_p = (i >> 3) & 1;
3374 fprintf (dump_file, " ");
3375 if (sym_p)
3376 fprintf (dump_file, "sym + ");
3377 if (var_p)
3378 fprintf (dump_file, "var + ");
3379 if (off_p)
3380 fprintf (dump_file, "cst + ");
3381 if (rat_p)
3382 fprintf (dump_file, "rat * ");
3384 acost = data->costs[sym_p][var_p][off_p][rat_p];
3385 fprintf (dump_file, "index costs %d\n", acost);
3387 if (has_predec[mem_mode] || has_postdec[mem_mode]
3388 || has_preinc[mem_mode] || has_postinc[mem_mode])
3389 fprintf (dump_file, " May include autoinc/dec\n");
3390 fprintf (dump_file, "\n");
3393 address_cost_data_list[data_index] = data;
3396 bits = GET_MODE_BITSIZE (address_mode);
3397 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3398 offset &= mask;
3399 if ((offset >> (bits - 1) & 1))
3400 offset |= ~mask;
3401 s_offset = offset;
3403 autoinc = false;
3404 msize = GET_MODE_SIZE (mem_mode);
3405 autoinc_offset = offset;
3406 if (stmt_after_inc)
3407 autoinc_offset += ratio * cstep;
3408 if (symbol_present || var_present || ratio != 1)
3409 autoinc = false;
3410 else if ((has_postinc[mem_mode] && autoinc_offset == 0
3411 && msize == cstep)
3412 || (has_postdec[mem_mode] && autoinc_offset == 0
3413 && msize == -cstep)
3414 || (has_preinc[mem_mode] && autoinc_offset == msize
3415 && msize == cstep)
3416 || (has_predec[mem_mode] && autoinc_offset == -msize
3417 && msize == -cstep))
3418 autoinc = true;
3420 cost = 0;
3421 offset_p = (s_offset != 0
3422 && data->min_offset <= s_offset
3423 && s_offset <= data->max_offset);
3424 ratio_p = (ratio != 1
3425 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3427 if (ratio != 1 && !ratio_p)
3428 cost += mult_by_coeff_cost (ratio, address_mode, speed);
3430 if (s_offset && !offset_p && !symbol_present)
3431 cost += add_cost (speed, address_mode);
3433 if (may_autoinc)
3434 *may_autoinc = autoinc;
3435 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3436 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3437 return new_cost (cost + acost, complexity);
3440 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3441 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3442 calculating the operands of EXPR. Returns true if successful, and returns
3443 the cost in COST. */
3445 static bool
3446 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3447 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3449 comp_cost res;
3450 tree op1 = TREE_OPERAND (expr, 1);
3451 tree cst = TREE_OPERAND (mult, 1);
3452 tree multop = TREE_OPERAND (mult, 0);
3453 int m = exact_log2 (int_cst_value (cst));
3454 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3455 int sa_cost;
3457 if (!(m >= 0 && m < maxm))
3458 return false;
3460 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3461 ? shiftadd_cost (speed, mode, m)
3462 : (mult == op1
3463 ? shiftsub1_cost (speed, mode, m)
3464 : shiftsub0_cost (speed, mode, m)));
3465 res = new_cost (sa_cost, 0);
3466 res = add_costs (res, mult == op1 ? cost0 : cost1);
3468 STRIP_NOPS (multop);
3469 if (!is_gimple_val (multop))
3470 res = add_costs (res, force_expr_to_var_cost (multop, speed));
3472 *cost = res;
3474 return true;
3477 /* Estimates cost of forcing expression EXPR into a variable. */
3479 static comp_cost
3480 force_expr_to_var_cost (tree expr, bool speed)
3482 static bool costs_initialized = false;
3483 static unsigned integer_cost [2];
3484 static unsigned symbol_cost [2];
3485 static unsigned address_cost [2];
3486 tree op0, op1;
3487 comp_cost cost0, cost1, cost;
3488 enum machine_mode mode;
3490 if (!costs_initialized)
3492 tree type = build_pointer_type (integer_type_node);
3493 tree var, addr;
3494 rtx x;
3495 int i;
3497 var = create_tmp_var_raw (integer_type_node, "test_var");
3498 TREE_STATIC (var) = 1;
3499 x = produce_memory_decl_rtl (var, NULL);
3500 SET_DECL_RTL (var, x);
3502 addr = build1 (ADDR_EXPR, type, var);
3505 for (i = 0; i < 2; i++)
3507 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3508 2000), i);
3510 symbol_cost[i] = computation_cost (addr, i) + 1;
3512 address_cost[i]
3513 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3514 if (dump_file && (dump_flags & TDF_DETAILS))
3516 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3517 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3518 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3519 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3520 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3521 fprintf (dump_file, "\n");
3525 costs_initialized = true;
3528 STRIP_NOPS (expr);
3530 if (SSA_VAR_P (expr))
3531 return no_cost;
3533 if (is_gimple_min_invariant (expr))
3535 if (TREE_CODE (expr) == INTEGER_CST)
3536 return new_cost (integer_cost [speed], 0);
3538 if (TREE_CODE (expr) == ADDR_EXPR)
3540 tree obj = TREE_OPERAND (expr, 0);
3542 if (TREE_CODE (obj) == VAR_DECL
3543 || TREE_CODE (obj) == PARM_DECL
3544 || TREE_CODE (obj) == RESULT_DECL)
3545 return new_cost (symbol_cost [speed], 0);
3548 return new_cost (address_cost [speed], 0);
3551 switch (TREE_CODE (expr))
3553 case POINTER_PLUS_EXPR:
3554 case PLUS_EXPR:
3555 case MINUS_EXPR:
3556 case MULT_EXPR:
3557 op0 = TREE_OPERAND (expr, 0);
3558 op1 = TREE_OPERAND (expr, 1);
3559 STRIP_NOPS (op0);
3560 STRIP_NOPS (op1);
3562 if (is_gimple_val (op0))
3563 cost0 = no_cost;
3564 else
3565 cost0 = force_expr_to_var_cost (op0, speed);
3567 if (is_gimple_val (op1))
3568 cost1 = no_cost;
3569 else
3570 cost1 = force_expr_to_var_cost (op1, speed);
3572 break;
3574 case NEGATE_EXPR:
3575 op0 = TREE_OPERAND (expr, 0);
3576 STRIP_NOPS (op0);
3577 op1 = NULL_TREE;
3579 if (is_gimple_val (op0))
3580 cost0 = no_cost;
3581 else
3582 cost0 = force_expr_to_var_cost (op0, speed);
3584 cost1 = no_cost;
3585 break;
3587 default:
3589 /* Just an arbitrary value, FIXME. */
3590 return new_cost (target_spill_cost[speed], 0);
3593 mode = TYPE_MODE (TREE_TYPE (expr));
3594 switch (TREE_CODE (expr))
3596 case POINTER_PLUS_EXPR:
3597 case PLUS_EXPR:
3598 case MINUS_EXPR:
3599 case NEGATE_EXPR:
3600 cost = new_cost (add_cost (speed, mode), 0);
3601 if (TREE_CODE (expr) != NEGATE_EXPR)
3603 tree mult = NULL_TREE;
3604 comp_cost sa_cost;
3605 if (TREE_CODE (op1) == MULT_EXPR)
3606 mult = op1;
3607 else if (TREE_CODE (op0) == MULT_EXPR)
3608 mult = op0;
3610 if (mult != NULL_TREE
3611 && cst_fits_shwi_p (TREE_OPERAND (mult, 1))
3612 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3613 speed, &sa_cost))
3614 return sa_cost;
3616 break;
3618 case MULT_EXPR:
3619 if (cst_fits_shwi_p (op0))
3620 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3621 mode, speed), 0);
3622 else if (cst_fits_shwi_p (op1))
3623 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3624 mode, speed), 0);
3625 else
3626 return new_cost (target_spill_cost [speed], 0);
3627 break;
3629 default:
3630 gcc_unreachable ();
3633 cost = add_costs (cost, cost0);
3634 cost = add_costs (cost, cost1);
3636 /* Bound the cost by target_spill_cost. The parts of complicated
3637 computations often are either loop invariant or at least can
3638 be shared between several iv uses, so letting this grow without
3639 limits would not give reasonable results. */
3640 if (cost.cost > (int) target_spill_cost [speed])
3641 cost.cost = target_spill_cost [speed];
3643 return cost;
3646 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3647 invariants the computation depends on. */
3649 static comp_cost
3650 force_var_cost (struct ivopts_data *data,
3651 tree expr, bitmap *depends_on)
3653 if (depends_on)
3655 fd_ivopts_data = data;
3656 walk_tree (&expr, find_depends, depends_on, NULL);
3659 return force_expr_to_var_cost (expr, data->speed);
3662 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3663 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3664 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3665 invariants the computation depends on. */
3667 static comp_cost
3668 split_address_cost (struct ivopts_data *data,
3669 tree addr, bool *symbol_present, bool *var_present,
3670 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3672 tree core;
3673 HOST_WIDE_INT bitsize;
3674 HOST_WIDE_INT bitpos;
3675 tree toffset;
3676 enum machine_mode mode;
3677 int unsignedp, volatilep;
3679 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3680 &unsignedp, &volatilep, false);
3682 if (toffset != 0
3683 || bitpos % BITS_PER_UNIT != 0
3684 || TREE_CODE (core) != VAR_DECL)
3686 *symbol_present = false;
3687 *var_present = true;
3688 fd_ivopts_data = data;
3689 walk_tree (&addr, find_depends, depends_on, NULL);
3690 return new_cost (target_spill_cost[data->speed], 0);
3693 *offset += bitpos / BITS_PER_UNIT;
3694 if (TREE_STATIC (core)
3695 || DECL_EXTERNAL (core))
3697 *symbol_present = true;
3698 *var_present = false;
3699 return no_cost;
3702 *symbol_present = false;
3703 *var_present = true;
3704 return no_cost;
3707 /* Estimates cost of expressing difference of addresses E1 - E2 as
3708 var + symbol + offset. The value of offset is added to OFFSET,
3709 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3710 part is missing. DEPENDS_ON is a set of the invariants the computation
3711 depends on. */
3713 static comp_cost
3714 ptr_difference_cost (struct ivopts_data *data,
3715 tree e1, tree e2, bool *symbol_present, bool *var_present,
3716 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3718 HOST_WIDE_INT diff = 0;
3719 aff_tree aff_e1, aff_e2;
3720 tree type;
3722 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3724 if (ptr_difference_const (e1, e2, &diff))
3726 *offset += diff;
3727 *symbol_present = false;
3728 *var_present = false;
3729 return no_cost;
3732 if (integer_zerop (e2))
3733 return split_address_cost (data, TREE_OPERAND (e1, 0),
3734 symbol_present, var_present, offset, depends_on);
3736 *symbol_present = false;
3737 *var_present = true;
3739 type = signed_type_for (TREE_TYPE (e1));
3740 tree_to_aff_combination (e1, type, &aff_e1);
3741 tree_to_aff_combination (e2, type, &aff_e2);
3742 aff_combination_scale (&aff_e2, -1);
3743 aff_combination_add (&aff_e1, &aff_e2);
3745 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3748 /* Estimates cost of expressing difference E1 - E2 as
3749 var + symbol + offset. The value of offset is added to OFFSET,
3750 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3751 part is missing. DEPENDS_ON is a set of the invariants the computation
3752 depends on. */
3754 static comp_cost
3755 difference_cost (struct ivopts_data *data,
3756 tree e1, tree e2, bool *symbol_present, bool *var_present,
3757 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3759 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3760 unsigned HOST_WIDE_INT off1, off2;
3761 aff_tree aff_e1, aff_e2;
3762 tree type;
3764 e1 = strip_offset (e1, &off1);
3765 e2 = strip_offset (e2, &off2);
3766 *offset += off1 - off2;
3768 STRIP_NOPS (e1);
3769 STRIP_NOPS (e2);
3771 if (TREE_CODE (e1) == ADDR_EXPR)
3772 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3773 offset, depends_on);
3774 *symbol_present = false;
3776 if (operand_equal_p (e1, e2, 0))
3778 *var_present = false;
3779 return no_cost;
3782 *var_present = true;
3784 if (integer_zerop (e2))
3785 return force_var_cost (data, e1, depends_on);
3787 if (integer_zerop (e1))
3789 comp_cost cost = force_var_cost (data, e2, depends_on);
3790 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3791 return cost;
3794 type = signed_type_for (TREE_TYPE (e1));
3795 tree_to_aff_combination (e1, type, &aff_e1);
3796 tree_to_aff_combination (e2, type, &aff_e2);
3797 aff_combination_scale (&aff_e2, -1);
3798 aff_combination_add (&aff_e1, &aff_e2);
3800 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3803 /* Returns true if AFF1 and AFF2 are identical. */
3805 static bool
3806 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3808 unsigned i;
3810 if (aff1->n != aff2->n)
3811 return false;
3813 for (i = 0; i < aff1->n; i++)
3815 if (aff1->elts[i].coef != aff2->elts[i].coef)
3816 return false;
3818 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3819 return false;
3821 return true;
3824 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
3826 static int
3827 get_expr_id (struct ivopts_data *data, tree expr)
3829 struct iv_inv_expr_ent ent;
3830 struct iv_inv_expr_ent **slot;
3832 ent.expr = expr;
3833 ent.hash = iterative_hash_expr (expr, 0);
3834 slot = data->inv_expr_tab.find_slot (&ent, INSERT);
3835 if (*slot)
3836 return (*slot)->id;
3838 *slot = XNEW (struct iv_inv_expr_ent);
3839 (*slot)->expr = expr;
3840 (*slot)->hash = ent.hash;
3841 (*slot)->id = data->inv_expr_id++;
3842 return (*slot)->id;
3845 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3846 requires a new compiler generated temporary. Returns -1 otherwise.
3847 ADDRESS_P is a flag indicating if the expression is for address
3848 computation. */
3850 static int
3851 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3852 tree cbase, HOST_WIDE_INT ratio,
3853 bool address_p)
3855 aff_tree ubase_aff, cbase_aff;
3856 tree expr, ub, cb;
3858 STRIP_NOPS (ubase);
3859 STRIP_NOPS (cbase);
3860 ub = ubase;
3861 cb = cbase;
3863 if ((TREE_CODE (ubase) == INTEGER_CST)
3864 && (TREE_CODE (cbase) == INTEGER_CST))
3865 return -1;
3867 /* Strips the constant part. */
3868 if (TREE_CODE (ubase) == PLUS_EXPR
3869 || TREE_CODE (ubase) == MINUS_EXPR
3870 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3872 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3873 ubase = TREE_OPERAND (ubase, 0);
3876 /* Strips the constant part. */
3877 if (TREE_CODE (cbase) == PLUS_EXPR
3878 || TREE_CODE (cbase) == MINUS_EXPR
3879 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3881 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3882 cbase = TREE_OPERAND (cbase, 0);
3885 if (address_p)
3887 if (((TREE_CODE (ubase) == SSA_NAME)
3888 || (TREE_CODE (ubase) == ADDR_EXPR
3889 && is_gimple_min_invariant (ubase)))
3890 && (TREE_CODE (cbase) == INTEGER_CST))
3891 return -1;
3893 if (((TREE_CODE (cbase) == SSA_NAME)
3894 || (TREE_CODE (cbase) == ADDR_EXPR
3895 && is_gimple_min_invariant (cbase)))
3896 && (TREE_CODE (ubase) == INTEGER_CST))
3897 return -1;
3900 if (ratio == 1)
3902 if (operand_equal_p (ubase, cbase, 0))
3903 return -1;
3905 if (TREE_CODE (ubase) == ADDR_EXPR
3906 && TREE_CODE (cbase) == ADDR_EXPR)
3908 tree usym, csym;
3910 usym = TREE_OPERAND (ubase, 0);
3911 csym = TREE_OPERAND (cbase, 0);
3912 if (TREE_CODE (usym) == ARRAY_REF)
3914 tree ind = TREE_OPERAND (usym, 1);
3915 if (TREE_CODE (ind) == INTEGER_CST
3916 && tree_fits_shwi_p (ind)
3917 && tree_to_shwi (ind) == 0)
3918 usym = TREE_OPERAND (usym, 0);
3920 if (TREE_CODE (csym) == ARRAY_REF)
3922 tree ind = TREE_OPERAND (csym, 1);
3923 if (TREE_CODE (ind) == INTEGER_CST
3924 && tree_fits_shwi_p (ind)
3925 && tree_to_shwi (ind) == 0)
3926 csym = TREE_OPERAND (csym, 0);
3928 if (operand_equal_p (usym, csym, 0))
3929 return -1;
3931 /* Now do more complex comparison */
3932 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
3933 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
3934 if (compare_aff_trees (&ubase_aff, &cbase_aff))
3935 return -1;
3938 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
3939 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
3941 aff_combination_scale (&cbase_aff, -1 * ratio);
3942 aff_combination_add (&ubase_aff, &cbase_aff);
3943 expr = aff_combination_to_tree (&ubase_aff);
3944 return get_expr_id (data, expr);
3949 /* Determines the cost of the computation by that USE is expressed
3950 from induction variable CAND. If ADDRESS_P is true, we just need
3951 to create an address from it, otherwise we want to get it into
3952 register. A set of invariants we depend on is stored in
3953 DEPENDS_ON. AT is the statement at that the value is computed.
3954 If CAN_AUTOINC is nonnull, use it to record whether autoinc
3955 addressing is likely. */
3957 static comp_cost
3958 get_computation_cost_at (struct ivopts_data *data,
3959 struct iv_use *use, struct iv_cand *cand,
3960 bool address_p, bitmap *depends_on, gimple at,
3961 bool *can_autoinc,
3962 int *inv_expr_id)
3964 tree ubase = use->iv->base, ustep = use->iv->step;
3965 tree cbase, cstep;
3966 tree utype = TREE_TYPE (ubase), ctype;
3967 unsigned HOST_WIDE_INT cstepi, offset = 0;
3968 HOST_WIDE_INT ratio, aratio;
3969 bool var_present, symbol_present, stmt_is_after_inc;
3970 comp_cost cost;
3971 widest_int rat;
3972 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3973 enum machine_mode mem_mode = (address_p
3974 ? TYPE_MODE (TREE_TYPE (*use->op_p))
3975 : VOIDmode);
3977 *depends_on = NULL;
3979 /* Only consider real candidates. */
3980 if (!cand->iv)
3981 return infinite_cost;
3983 cbase = cand->iv->base;
3984 cstep = cand->iv->step;
3985 ctype = TREE_TYPE (cbase);
3987 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3989 /* We do not have a precision to express the values of use. */
3990 return infinite_cost;
3993 if (address_p
3994 || (use->iv->base_object
3995 && cand->iv->base_object
3996 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
3997 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
3999 /* Do not try to express address of an object with computation based
4000 on address of a different object. This may cause problems in rtl
4001 level alias analysis (that does not expect this to be happening,
4002 as this is illegal in C), and would be unlikely to be useful
4003 anyway. */
4004 if (use->iv->base_object
4005 && cand->iv->base_object
4006 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4007 return infinite_cost;
4010 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4012 /* TODO -- add direct handling of this case. */
4013 goto fallback;
4016 /* CSTEPI is removed from the offset in case statement is after the
4017 increment. If the step is not constant, we use zero instead.
4018 This is a bit imprecise (there is the extra addition), but
4019 redundancy elimination is likely to transform the code so that
4020 it uses value of the variable before increment anyway,
4021 so it is not that much unrealistic. */
4022 if (cst_fits_shwi_p (cstep))
4023 cstepi = int_cst_value (cstep);
4024 else
4025 cstepi = 0;
4027 if (!constant_multiple_of (ustep, cstep, &rat))
4028 return infinite_cost;
4030 if (wi::fits_shwi_p (rat))
4031 ratio = rat.to_shwi ();
4032 else
4033 return infinite_cost;
4035 STRIP_NOPS (cbase);
4036 ctype = TREE_TYPE (cbase);
4038 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4040 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4041 or ratio == 1, it is better to handle this like
4043 ubase - ratio * cbase + ratio * var
4045 (also holds in the case ratio == -1, TODO. */
4047 if (cst_fits_shwi_p (cbase))
4049 offset = - ratio * int_cst_value (cbase);
4050 cost = difference_cost (data,
4051 ubase, build_int_cst (utype, 0),
4052 &symbol_present, &var_present, &offset,
4053 depends_on);
4054 cost.cost /= avg_loop_niter (data->current_loop);
4056 else if (ratio == 1)
4058 tree real_cbase = cbase;
4060 /* Check to see if any adjustment is needed. */
4061 if (cstepi == 0 && stmt_is_after_inc)
4063 aff_tree real_cbase_aff;
4064 aff_tree cstep_aff;
4066 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4067 &real_cbase_aff);
4068 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4070 aff_combination_add (&real_cbase_aff, &cstep_aff);
4071 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4074 cost = difference_cost (data,
4075 ubase, real_cbase,
4076 &symbol_present, &var_present, &offset,
4077 depends_on);
4078 cost.cost /= avg_loop_niter (data->current_loop);
4080 else if (address_p
4081 && !POINTER_TYPE_P (ctype)
4082 && multiplier_allowed_in_address_p
4083 (ratio, mem_mode,
4084 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4086 cbase
4087 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4088 cost = difference_cost (data,
4089 ubase, cbase,
4090 &symbol_present, &var_present, &offset,
4091 depends_on);
4092 cost.cost /= avg_loop_niter (data->current_loop);
4094 else
4096 cost = force_var_cost (data, cbase, depends_on);
4097 cost = add_costs (cost,
4098 difference_cost (data,
4099 ubase, build_int_cst (utype, 0),
4100 &symbol_present, &var_present,
4101 &offset, depends_on));
4102 cost.cost /= avg_loop_niter (data->current_loop);
4103 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4106 if (inv_expr_id)
4108 *inv_expr_id =
4109 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4110 /* Clear depends on. */
4111 if (*inv_expr_id != -1 && depends_on && *depends_on)
4112 bitmap_clear (*depends_on);
4115 /* If we are after the increment, the value of the candidate is higher by
4116 one iteration. */
4117 if (stmt_is_after_inc)
4118 offset -= ratio * cstepi;
4120 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4121 (symbol/var1/const parts may be omitted). If we are looking for an
4122 address, find the cost of addressing this. */
4123 if (address_p)
4124 return add_costs (cost,
4125 get_address_cost (symbol_present, var_present,
4126 offset, ratio, cstepi,
4127 mem_mode,
4128 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4129 speed, stmt_is_after_inc,
4130 can_autoinc));
4132 /* Otherwise estimate the costs for computing the expression. */
4133 if (!symbol_present && !var_present && !offset)
4135 if (ratio != 1)
4136 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4137 return cost;
4140 /* Symbol + offset should be compile-time computable so consider that they
4141 are added once to the variable, if present. */
4142 if (var_present && (symbol_present || offset))
4143 cost.cost += adjust_setup_cost (data,
4144 add_cost (speed, TYPE_MODE (ctype)));
4146 /* Having offset does not affect runtime cost in case it is added to
4147 symbol, but it increases complexity. */
4148 if (offset)
4149 cost.complexity++;
4151 cost.cost += add_cost (speed, TYPE_MODE (ctype));
4153 aratio = ratio > 0 ? ratio : -ratio;
4154 if (aratio != 1)
4155 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4156 return cost;
4158 fallback:
4159 if (can_autoinc)
4160 *can_autoinc = false;
4163 /* Just get the expression, expand it and measure the cost. */
4164 tree comp = get_computation_at (data->current_loop, use, cand, at);
4166 if (!comp)
4167 return infinite_cost;
4169 if (address_p)
4170 comp = build_simple_mem_ref (comp);
4172 return new_cost (computation_cost (comp, speed), 0);
4176 /* Determines the cost of the computation by that USE is expressed
4177 from induction variable CAND. If ADDRESS_P is true, we just need
4178 to create an address from it, otherwise we want to get it into
4179 register. A set of invariants we depend on is stored in
4180 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4181 autoinc addressing is likely. */
4183 static comp_cost
4184 get_computation_cost (struct ivopts_data *data,
4185 struct iv_use *use, struct iv_cand *cand,
4186 bool address_p, bitmap *depends_on,
4187 bool *can_autoinc, int *inv_expr_id)
4189 return get_computation_cost_at (data,
4190 use, cand, address_p, depends_on, use->stmt,
4191 can_autoinc, inv_expr_id);
4194 /* Determines cost of basing replacement of USE on CAND in a generic
4195 expression. */
4197 static bool
4198 determine_use_iv_cost_generic (struct ivopts_data *data,
4199 struct iv_use *use, struct iv_cand *cand)
4201 bitmap depends_on;
4202 comp_cost cost;
4203 int inv_expr_id = -1;
4205 /* The simple case first -- if we need to express value of the preserved
4206 original biv, the cost is 0. This also prevents us from counting the
4207 cost of increment twice -- once at this use and once in the cost of
4208 the candidate. */
4209 if (cand->pos == IP_ORIGINAL
4210 && cand->incremented_at == use->stmt)
4212 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4213 ERROR_MARK, -1);
4214 return true;
4217 cost = get_computation_cost (data, use, cand, false, &depends_on,
4218 NULL, &inv_expr_id);
4220 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4221 inv_expr_id);
4223 return !infinite_cost_p (cost);
4226 /* Determines cost of basing replacement of USE on CAND in an address. */
4228 static bool
4229 determine_use_iv_cost_address (struct ivopts_data *data,
4230 struct iv_use *use, struct iv_cand *cand)
4232 bitmap depends_on;
4233 bool can_autoinc;
4234 int inv_expr_id = -1;
4235 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4236 &can_autoinc, &inv_expr_id);
4238 if (cand->ainc_use == use)
4240 if (can_autoinc)
4241 cost.cost -= cand->cost_step;
4242 /* If we generated the candidate solely for exploiting autoincrement
4243 opportunities, and it turns out it can't be used, set the cost to
4244 infinity to make sure we ignore it. */
4245 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4246 cost = infinite_cost;
4248 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4249 inv_expr_id);
4251 return !infinite_cost_p (cost);
4254 /* Computes value of candidate CAND at position AT in iteration NITER, and
4255 stores it to VAL. */
4257 static void
4258 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4259 aff_tree *val)
4261 aff_tree step, delta, nit;
4262 struct iv *iv = cand->iv;
4263 tree type = TREE_TYPE (iv->base);
4264 tree steptype = type;
4265 if (POINTER_TYPE_P (type))
4266 steptype = sizetype;
4268 tree_to_aff_combination (iv->step, steptype, &step);
4269 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4270 aff_combination_convert (&nit, steptype);
4271 aff_combination_mult (&nit, &step, &delta);
4272 if (stmt_after_increment (loop, cand, at))
4273 aff_combination_add (&delta, &step);
4275 tree_to_aff_combination (iv->base, type, val);
4276 aff_combination_add (val, &delta);
4279 /* Returns period of induction variable iv. */
4281 static tree
4282 iv_period (struct iv *iv)
4284 tree step = iv->step, period, type;
4285 tree pow2div;
4287 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4289 type = unsigned_type_for (TREE_TYPE (step));
4290 /* Period of the iv is lcm (step, type_range)/step -1,
4291 i.e., N*type_range/step - 1. Since type range is power
4292 of two, N == (step >> num_of_ending_zeros_binary (step),
4293 so the final result is
4295 (type_range >> num_of_ending_zeros_binary (step)) - 1
4298 pow2div = num_ending_zeros (step);
4300 period = build_low_bits_mask (type,
4301 (TYPE_PRECISION (type)
4302 - tree_to_uhwi (pow2div)));
4304 return period;
4307 /* Returns the comparison operator used when eliminating the iv USE. */
4309 static enum tree_code
4310 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4312 struct loop *loop = data->current_loop;
4313 basic_block ex_bb;
4314 edge exit;
4316 ex_bb = gimple_bb (use->stmt);
4317 exit = EDGE_SUCC (ex_bb, 0);
4318 if (flow_bb_inside_loop_p (loop, exit->dest))
4319 exit = EDGE_SUCC (ex_bb, 1);
4321 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4324 static tree
4325 strip_wrap_conserving_type_conversions (tree exp)
4327 while (tree_ssa_useless_type_conversion (exp)
4328 && (nowrap_type_p (TREE_TYPE (exp))
4329 == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0)))))
4330 exp = TREE_OPERAND (exp, 0);
4331 return exp;
4334 /* Walk the SSA form and check whether E == WHAT. Fairly simplistic, we
4335 check for an exact match. */
4337 static bool
4338 expr_equal_p (tree e, tree what)
4340 gimple stmt;
4341 enum tree_code code;
4343 e = strip_wrap_conserving_type_conversions (e);
4344 what = strip_wrap_conserving_type_conversions (what);
4346 code = TREE_CODE (what);
4347 if (TREE_TYPE (e) != TREE_TYPE (what))
4348 return false;
4350 if (operand_equal_p (e, what, 0))
4351 return true;
4353 if (TREE_CODE (e) != SSA_NAME)
4354 return false;
4356 stmt = SSA_NAME_DEF_STMT (e);
4357 if (gimple_code (stmt) != GIMPLE_ASSIGN
4358 || gimple_assign_rhs_code (stmt) != code)
4359 return false;
4361 switch (get_gimple_rhs_class (code))
4363 case GIMPLE_BINARY_RHS:
4364 if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1)))
4365 return false;
4366 /* Fallthru. */
4368 case GIMPLE_UNARY_RHS:
4369 case GIMPLE_SINGLE_RHS:
4370 return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0));
4371 default:
4372 return false;
4376 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4377 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4378 calculation is performed in non-wrapping type.
4380 TODO: More generally, we could test for the situation that
4381 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4382 This would require knowing the sign of OFFSET.
4384 Also, we only look for the first addition in the computation of BASE.
4385 More complex analysis would be better, but introducing it just for
4386 this optimization seems like an overkill. */
4388 static bool
4389 difference_cannot_overflow_p (tree base, tree offset)
4391 enum tree_code code;
4392 tree e1, e2;
4394 if (!nowrap_type_p (TREE_TYPE (base)))
4395 return false;
4397 base = expand_simple_operations (base);
4399 if (TREE_CODE (base) == SSA_NAME)
4401 gimple stmt = SSA_NAME_DEF_STMT (base);
4403 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4404 return false;
4406 code = gimple_assign_rhs_code (stmt);
4407 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4408 return false;
4410 e1 = gimple_assign_rhs1 (stmt);
4411 e2 = gimple_assign_rhs2 (stmt);
4413 else
4415 code = TREE_CODE (base);
4416 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4417 return false;
4418 e1 = TREE_OPERAND (base, 0);
4419 e2 = TREE_OPERAND (base, 1);
4422 /* TODO: deeper inspection may be necessary to prove the equality. */
4423 switch (code)
4425 case PLUS_EXPR:
4426 return expr_equal_p (e1, offset) || expr_equal_p (e2, offset);
4427 case POINTER_PLUS_EXPR:
4428 return expr_equal_p (e2, offset);
4430 default:
4431 return false;
4435 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4436 comparison with CAND. NITER describes the number of iterations of
4437 the loops. If successful, the comparison in COMP_P is altered accordingly.
4439 We aim to handle the following situation:
4441 sometype *base, *p;
4442 int a, b, i;
4444 i = a;
4445 p = p_0 = base + a;
4449 bla (*p);
4450 p++;
4451 i++;
4453 while (i < b);
4455 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4456 We aim to optimize this to
4458 p = p_0 = base + a;
4461 bla (*p);
4462 p++;
4464 while (p < p_0 - a + b);
4466 This preserves the correctness, since the pointer arithmetics does not
4467 overflow. More precisely:
4469 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4470 overflow in computing it or the values of p.
4471 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4472 overflow. To prove this, we use the fact that p_0 = base + a. */
4474 static bool
4475 iv_elimination_compare_lt (struct ivopts_data *data,
4476 struct iv_cand *cand, enum tree_code *comp_p,
4477 struct tree_niter_desc *niter)
4479 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4480 struct affine_tree_combination nit, tmpa, tmpb;
4481 enum tree_code comp;
4482 HOST_WIDE_INT step;
4484 /* We need to know that the candidate induction variable does not overflow.
4485 While more complex analysis may be used to prove this, for now just
4486 check that the variable appears in the original program and that it
4487 is computed in a type that guarantees no overflows. */
4488 cand_type = TREE_TYPE (cand->iv->base);
4489 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4490 return false;
4492 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4493 the calculation of the BOUND could overflow, making the comparison
4494 invalid. */
4495 if (!data->loop_single_exit_p)
4496 return false;
4498 /* We need to be able to decide whether candidate is increasing or decreasing
4499 in order to choose the right comparison operator. */
4500 if (!cst_fits_shwi_p (cand->iv->step))
4501 return false;
4502 step = int_cst_value (cand->iv->step);
4504 /* Check that the number of iterations matches the expected pattern:
4505 a + 1 > b ? 0 : b - a - 1. */
4506 mbz = niter->may_be_zero;
4507 if (TREE_CODE (mbz) == GT_EXPR)
4509 /* Handle a + 1 > b. */
4510 tree op0 = TREE_OPERAND (mbz, 0);
4511 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4513 a = TREE_OPERAND (op0, 0);
4514 b = TREE_OPERAND (mbz, 1);
4516 else
4517 return false;
4519 else if (TREE_CODE (mbz) == LT_EXPR)
4521 tree op1 = TREE_OPERAND (mbz, 1);
4523 /* Handle b < a + 1. */
4524 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4526 a = TREE_OPERAND (op1, 0);
4527 b = TREE_OPERAND (mbz, 0);
4529 else
4530 return false;
4532 else
4533 return false;
4535 /* Expected number of iterations is B - A - 1. Check that it matches
4536 the actual number, i.e., that B - A - NITER = 1. */
4537 tree_to_aff_combination (niter->niter, nit_type, &nit);
4538 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4539 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4540 aff_combination_scale (&nit, -1);
4541 aff_combination_scale (&tmpa, -1);
4542 aff_combination_add (&tmpb, &tmpa);
4543 aff_combination_add (&tmpb, &nit);
4544 if (tmpb.n != 0 || tmpb.offset != 1)
4545 return false;
4547 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4548 overflow. */
4549 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4550 cand->iv->step,
4551 fold_convert (TREE_TYPE (cand->iv->step), a));
4552 if (!difference_cannot_overflow_p (cand->iv->base, offset))
4553 return false;
4555 /* Determine the new comparison operator. */
4556 comp = step < 0 ? GT_EXPR : LT_EXPR;
4557 if (*comp_p == NE_EXPR)
4558 *comp_p = comp;
4559 else if (*comp_p == EQ_EXPR)
4560 *comp_p = invert_tree_comparison (comp, false);
4561 else
4562 gcc_unreachable ();
4564 return true;
4567 /* Check whether it is possible to express the condition in USE by comparison
4568 of candidate CAND. If so, store the value compared with to BOUND, and the
4569 comparison operator to COMP. */
4571 static bool
4572 may_eliminate_iv (struct ivopts_data *data,
4573 struct iv_use *use, struct iv_cand *cand, tree *bound,
4574 enum tree_code *comp)
4576 basic_block ex_bb;
4577 edge exit;
4578 tree period;
4579 struct loop *loop = data->current_loop;
4580 aff_tree bnd;
4581 struct tree_niter_desc *desc = NULL;
4583 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4584 return false;
4586 /* For now works only for exits that dominate the loop latch.
4587 TODO: extend to other conditions inside loop body. */
4588 ex_bb = gimple_bb (use->stmt);
4589 if (use->stmt != last_stmt (ex_bb)
4590 || gimple_code (use->stmt) != GIMPLE_COND
4591 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4592 return false;
4594 exit = EDGE_SUCC (ex_bb, 0);
4595 if (flow_bb_inside_loop_p (loop, exit->dest))
4596 exit = EDGE_SUCC (ex_bb, 1);
4597 if (flow_bb_inside_loop_p (loop, exit->dest))
4598 return false;
4600 desc = niter_for_exit (data, exit);
4601 if (!desc)
4602 return false;
4604 /* Determine whether we can use the variable to test the exit condition.
4605 This is the case iff the period of the induction variable is greater
4606 than the number of iterations for which the exit condition is true. */
4607 period = iv_period (cand->iv);
4609 /* If the number of iterations is constant, compare against it directly. */
4610 if (TREE_CODE (desc->niter) == INTEGER_CST)
4612 /* See cand_value_at. */
4613 if (stmt_after_increment (loop, cand, use->stmt))
4615 if (!tree_int_cst_lt (desc->niter, period))
4616 return false;
4618 else
4620 if (tree_int_cst_lt (period, desc->niter))
4621 return false;
4625 /* If not, and if this is the only possible exit of the loop, see whether
4626 we can get a conservative estimate on the number of iterations of the
4627 entire loop and compare against that instead. */
4628 else
4630 widest_int period_value, max_niter;
4632 max_niter = desc->max;
4633 if (stmt_after_increment (loop, cand, use->stmt))
4634 max_niter += 1;
4635 period_value = wi::to_widest (period);
4636 if (wi::gtu_p (max_niter, period_value))
4638 /* See if we can take advantage of inferred loop bound information. */
4639 if (data->loop_single_exit_p)
4641 if (!max_loop_iterations (loop, &max_niter))
4642 return false;
4643 /* The loop bound is already adjusted by adding 1. */
4644 if (wi::gtu_p (max_niter, period_value))
4645 return false;
4647 else
4648 return false;
4652 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4654 *bound = aff_combination_to_tree (&bnd);
4655 *comp = iv_elimination_compare (data, use);
4657 /* It is unlikely that computing the number of iterations using division
4658 would be more profitable than keeping the original induction variable. */
4659 if (expression_expensive_p (*bound))
4660 return false;
4662 /* Sometimes, it is possible to handle the situation that the number of
4663 iterations may be zero unless additional assumtions by using <
4664 instead of != in the exit condition.
4666 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4667 base the exit condition on it. However, that is often too
4668 expensive. */
4669 if (!integer_zerop (desc->may_be_zero))
4670 return iv_elimination_compare_lt (data, cand, comp, desc);
4672 return true;
4675 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4676 be copied, if is is used in the loop body and DATA->body_includes_call. */
4678 static int
4679 parm_decl_cost (struct ivopts_data *data, tree bound)
4681 tree sbound = bound;
4682 STRIP_NOPS (sbound);
4684 if (TREE_CODE (sbound) == SSA_NAME
4685 && SSA_NAME_IS_DEFAULT_DEF (sbound)
4686 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4687 && data->body_includes_call)
4688 return COSTS_N_INSNS (1);
4690 return 0;
4693 /* Determines cost of basing replacement of USE on CAND in a condition. */
4695 static bool
4696 determine_use_iv_cost_condition (struct ivopts_data *data,
4697 struct iv_use *use, struct iv_cand *cand)
4699 tree bound = NULL_TREE;
4700 struct iv *cmp_iv;
4701 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4702 comp_cost elim_cost, express_cost, cost, bound_cost;
4703 bool ok;
4704 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4705 tree *control_var, *bound_cst;
4706 enum tree_code comp = ERROR_MARK;
4708 /* Only consider real candidates. */
4709 if (!cand->iv)
4711 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4712 ERROR_MARK, -1);
4713 return false;
4716 /* Try iv elimination. */
4717 if (may_eliminate_iv (data, use, cand, &bound, &comp))
4719 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4720 if (elim_cost.cost == 0)
4721 elim_cost.cost = parm_decl_cost (data, bound);
4722 else if (TREE_CODE (bound) == INTEGER_CST)
4723 elim_cost.cost = 0;
4724 /* If we replace a loop condition 'i < n' with 'p < base + n',
4725 depends_on_elim will have 'base' and 'n' set, which implies
4726 that both 'base' and 'n' will be live during the loop. More likely,
4727 'base + n' will be loop invariant, resulting in only one live value
4728 during the loop. So in that case we clear depends_on_elim and set
4729 elim_inv_expr_id instead. */
4730 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4732 elim_inv_expr_id = get_expr_id (data, bound);
4733 bitmap_clear (depends_on_elim);
4735 /* The bound is a loop invariant, so it will be only computed
4736 once. */
4737 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4739 else
4740 elim_cost = infinite_cost;
4742 /* Try expressing the original giv. If it is compared with an invariant,
4743 note that we cannot get rid of it. */
4744 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4745 NULL, &cmp_iv);
4746 gcc_assert (ok);
4748 /* When the condition is a comparison of the candidate IV against
4749 zero, prefer this IV.
4751 TODO: The constant that we're subtracting from the cost should
4752 be target-dependent. This information should be added to the
4753 target costs for each backend. */
4754 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4755 && integer_zerop (*bound_cst)
4756 && (operand_equal_p (*control_var, cand->var_after, 0)
4757 || operand_equal_p (*control_var, cand->var_before, 0)))
4758 elim_cost.cost -= 1;
4760 express_cost = get_computation_cost (data, use, cand, false,
4761 &depends_on_express, NULL,
4762 &express_inv_expr_id);
4763 fd_ivopts_data = data;
4764 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4766 /* Count the cost of the original bound as well. */
4767 bound_cost = force_var_cost (data, *bound_cst, NULL);
4768 if (bound_cost.cost == 0)
4769 bound_cost.cost = parm_decl_cost (data, *bound_cst);
4770 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4771 bound_cost.cost = 0;
4772 express_cost.cost += bound_cost.cost;
4774 /* Choose the better approach, preferring the eliminated IV. */
4775 if (compare_costs (elim_cost, express_cost) <= 0)
4777 cost = elim_cost;
4778 depends_on = depends_on_elim;
4779 depends_on_elim = NULL;
4780 inv_expr_id = elim_inv_expr_id;
4782 else
4784 cost = express_cost;
4785 depends_on = depends_on_express;
4786 depends_on_express = NULL;
4787 bound = NULL_TREE;
4788 comp = ERROR_MARK;
4789 inv_expr_id = express_inv_expr_id;
4792 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4794 if (depends_on_elim)
4795 BITMAP_FREE (depends_on_elim);
4796 if (depends_on_express)
4797 BITMAP_FREE (depends_on_express);
4799 return !infinite_cost_p (cost);
4802 /* Determines cost of basing replacement of USE on CAND. Returns false
4803 if USE cannot be based on CAND. */
4805 static bool
4806 determine_use_iv_cost (struct ivopts_data *data,
4807 struct iv_use *use, struct iv_cand *cand)
4809 switch (use->type)
4811 case USE_NONLINEAR_EXPR:
4812 return determine_use_iv_cost_generic (data, use, cand);
4814 case USE_ADDRESS:
4815 return determine_use_iv_cost_address (data, use, cand);
4817 case USE_COMPARE:
4818 return determine_use_iv_cost_condition (data, use, cand);
4820 default:
4821 gcc_unreachable ();
4825 /* Return true if get_computation_cost indicates that autoincrement is
4826 a possibility for the pair of USE and CAND, false otherwise. */
4828 static bool
4829 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4830 struct iv_cand *cand)
4832 bitmap depends_on;
4833 bool can_autoinc;
4834 comp_cost cost;
4836 if (use->type != USE_ADDRESS)
4837 return false;
4839 cost = get_computation_cost (data, use, cand, true, &depends_on,
4840 &can_autoinc, NULL);
4842 BITMAP_FREE (depends_on);
4844 return !infinite_cost_p (cost) && can_autoinc;
4847 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4848 use that allows autoincrement, and set their AINC_USE if possible. */
4850 static void
4851 set_autoinc_for_original_candidates (struct ivopts_data *data)
4853 unsigned i, j;
4855 for (i = 0; i < n_iv_cands (data); i++)
4857 struct iv_cand *cand = iv_cand (data, i);
4858 struct iv_use *closest_before = NULL;
4859 struct iv_use *closest_after = NULL;
4860 if (cand->pos != IP_ORIGINAL)
4861 continue;
4863 for (j = 0; j < n_iv_uses (data); j++)
4865 struct iv_use *use = iv_use (data, j);
4866 unsigned uid = gimple_uid (use->stmt);
4868 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
4869 continue;
4871 if (uid < gimple_uid (cand->incremented_at)
4872 && (closest_before == NULL
4873 || uid > gimple_uid (closest_before->stmt)))
4874 closest_before = use;
4876 if (uid > gimple_uid (cand->incremented_at)
4877 && (closest_after == NULL
4878 || uid < gimple_uid (closest_after->stmt)))
4879 closest_after = use;
4882 if (closest_before != NULL
4883 && autoinc_possible_for_pair (data, closest_before, cand))
4884 cand->ainc_use = closest_before;
4885 else if (closest_after != NULL
4886 && autoinc_possible_for_pair (data, closest_after, cand))
4887 cand->ainc_use = closest_after;
4891 /* Finds the candidates for the induction variables. */
4893 static void
4894 find_iv_candidates (struct ivopts_data *data)
4896 /* Add commonly used ivs. */
4897 add_standard_iv_candidates (data);
4899 /* Add old induction variables. */
4900 add_old_ivs_candidates (data);
4902 /* Add induction variables derived from uses. */
4903 add_derived_ivs_candidates (data);
4905 set_autoinc_for_original_candidates (data);
4907 /* Record the important candidates. */
4908 record_important_candidates (data);
4911 /* Determines costs of basing the use of the iv on an iv candidate. */
4913 static void
4914 determine_use_iv_costs (struct ivopts_data *data)
4916 unsigned i, j;
4917 struct iv_use *use;
4918 struct iv_cand *cand;
4919 bitmap to_clear = BITMAP_ALLOC (NULL);
4921 alloc_use_cost_map (data);
4923 for (i = 0; i < n_iv_uses (data); i++)
4925 use = iv_use (data, i);
4927 if (data->consider_all_candidates)
4929 for (j = 0; j < n_iv_cands (data); j++)
4931 cand = iv_cand (data, j);
4932 determine_use_iv_cost (data, use, cand);
4935 else
4937 bitmap_iterator bi;
4939 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4941 cand = iv_cand (data, j);
4942 if (!determine_use_iv_cost (data, use, cand))
4943 bitmap_set_bit (to_clear, j);
4946 /* Remove the candidates for that the cost is infinite from
4947 the list of related candidates. */
4948 bitmap_and_compl_into (use->related_cands, to_clear);
4949 bitmap_clear (to_clear);
4953 BITMAP_FREE (to_clear);
4955 if (dump_file && (dump_flags & TDF_DETAILS))
4957 fprintf (dump_file, "Use-candidate costs:\n");
4959 for (i = 0; i < n_iv_uses (data); i++)
4961 use = iv_use (data, i);
4963 fprintf (dump_file, "Use %d:\n", i);
4964 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
4965 for (j = 0; j < use->n_map_members; j++)
4967 if (!use->cost_map[j].cand
4968 || infinite_cost_p (use->cost_map[j].cost))
4969 continue;
4971 fprintf (dump_file, " %d\t%d\t%d\t",
4972 use->cost_map[j].cand->id,
4973 use->cost_map[j].cost.cost,
4974 use->cost_map[j].cost.complexity);
4975 if (use->cost_map[j].depends_on)
4976 bitmap_print (dump_file,
4977 use->cost_map[j].depends_on, "","");
4978 if (use->cost_map[j].inv_expr_id != -1)
4979 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
4980 fprintf (dump_file, "\n");
4983 fprintf (dump_file, "\n");
4985 fprintf (dump_file, "\n");
4989 /* Determines cost of the candidate CAND. */
4991 static void
4992 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4994 comp_cost cost_base;
4995 unsigned cost, cost_step;
4996 tree base;
4998 if (!cand->iv)
5000 cand->cost = 0;
5001 return;
5004 /* There are two costs associated with the candidate -- its increment
5005 and its initialization. The second is almost negligible for any loop
5006 that rolls enough, so we take it just very little into account. */
5008 base = cand->iv->base;
5009 cost_base = force_var_cost (data, base, NULL);
5010 /* It will be exceptional that the iv register happens to be initialized with
5011 the proper value at no cost. In general, there will at least be a regcopy
5012 or a const set. */
5013 if (cost_base.cost == 0)
5014 cost_base.cost = COSTS_N_INSNS (1);
5015 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5017 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5019 /* Prefer the original ivs unless we may gain something by replacing it.
5020 The reason is to make debugging simpler; so this is not relevant for
5021 artificial ivs created by other optimization passes. */
5022 if (cand->pos != IP_ORIGINAL
5023 || !SSA_NAME_VAR (cand->var_before)
5024 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5025 cost++;
5027 /* Prefer not to insert statements into latch unless there are some
5028 already (so that we do not create unnecessary jumps). */
5029 if (cand->pos == IP_END
5030 && empty_block_p (ip_end_pos (data->current_loop)))
5031 cost++;
5033 cand->cost = cost;
5034 cand->cost_step = cost_step;
5037 /* Determines costs of computation of the candidates. */
5039 static void
5040 determine_iv_costs (struct ivopts_data *data)
5042 unsigned i;
5044 if (dump_file && (dump_flags & TDF_DETAILS))
5046 fprintf (dump_file, "Candidate costs:\n");
5047 fprintf (dump_file, " cand\tcost\n");
5050 for (i = 0; i < n_iv_cands (data); i++)
5052 struct iv_cand *cand = iv_cand (data, i);
5054 determine_iv_cost (data, cand);
5056 if (dump_file && (dump_flags & TDF_DETAILS))
5057 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5060 if (dump_file && (dump_flags & TDF_DETAILS))
5061 fprintf (dump_file, "\n");
5064 /* Calculates cost for having SIZE induction variables. */
5066 static unsigned
5067 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5069 /* We add size to the cost, so that we prefer eliminating ivs
5070 if possible. */
5071 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5072 data->body_includes_call);
5075 /* For each size of the induction variable set determine the penalty. */
5077 static void
5078 determine_set_costs (struct ivopts_data *data)
5080 unsigned j, n;
5081 gimple phi;
5082 gimple_stmt_iterator psi;
5083 tree op;
5084 struct loop *loop = data->current_loop;
5085 bitmap_iterator bi;
5087 if (dump_file && (dump_flags & TDF_DETAILS))
5089 fprintf (dump_file, "Global costs:\n");
5090 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5091 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5092 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5093 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5096 n = 0;
5097 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5099 phi = gsi_stmt (psi);
5100 op = PHI_RESULT (phi);
5102 if (virtual_operand_p (op))
5103 continue;
5105 if (get_iv (data, op))
5106 continue;
5108 n++;
5111 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5113 struct version_info *info = ver_info (data, j);
5115 if (info->inv_id && info->has_nonlin_use)
5116 n++;
5119 data->regs_used = n;
5120 if (dump_file && (dump_flags & TDF_DETAILS))
5121 fprintf (dump_file, " regs_used %d\n", n);
5123 if (dump_file && (dump_flags & TDF_DETAILS))
5125 fprintf (dump_file, " cost for size:\n");
5126 fprintf (dump_file, " ivs\tcost\n");
5127 for (j = 0; j <= 2 * target_avail_regs; j++)
5128 fprintf (dump_file, " %d\t%d\n", j,
5129 ivopts_global_cost_for_size (data, j));
5130 fprintf (dump_file, "\n");
5134 /* Returns true if A is a cheaper cost pair than B. */
5136 static bool
5137 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5139 int cmp;
5141 if (!a)
5142 return false;
5144 if (!b)
5145 return true;
5147 cmp = compare_costs (a->cost, b->cost);
5148 if (cmp < 0)
5149 return true;
5151 if (cmp > 0)
5152 return false;
5154 /* In case the costs are the same, prefer the cheaper candidate. */
5155 if (a->cand->cost < b->cand->cost)
5156 return true;
5158 return false;
5162 /* Returns candidate by that USE is expressed in IVS. */
5164 static struct cost_pair *
5165 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5167 return ivs->cand_for_use[use->id];
5170 /* Computes the cost field of IVS structure. */
5172 static void
5173 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5175 comp_cost cost = ivs->cand_use_cost;
5177 cost.cost += ivs->cand_cost;
5179 cost.cost += ivopts_global_cost_for_size (data,
5180 ivs->n_regs + ivs->num_used_inv_expr);
5182 ivs->cost = cost;
5185 /* Remove invariants in set INVS to set IVS. */
5187 static void
5188 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5190 bitmap_iterator bi;
5191 unsigned iid;
5193 if (!invs)
5194 return;
5196 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5198 ivs->n_invariant_uses[iid]--;
5199 if (ivs->n_invariant_uses[iid] == 0)
5200 ivs->n_regs--;
5204 /* Set USE not to be expressed by any candidate in IVS. */
5206 static void
5207 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5208 struct iv_use *use)
5210 unsigned uid = use->id, cid;
5211 struct cost_pair *cp;
5213 cp = ivs->cand_for_use[uid];
5214 if (!cp)
5215 return;
5216 cid = cp->cand->id;
5218 ivs->bad_uses++;
5219 ivs->cand_for_use[uid] = NULL;
5220 ivs->n_cand_uses[cid]--;
5222 if (ivs->n_cand_uses[cid] == 0)
5224 bitmap_clear_bit (ivs->cands, cid);
5225 /* Do not count the pseudocandidates. */
5226 if (cp->cand->iv)
5227 ivs->n_regs--;
5228 ivs->n_cands--;
5229 ivs->cand_cost -= cp->cand->cost;
5231 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5234 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5236 iv_ca_set_remove_invariants (ivs, cp->depends_on);
5238 if (cp->inv_expr_id != -1)
5240 ivs->used_inv_expr[cp->inv_expr_id]--;
5241 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5242 ivs->num_used_inv_expr--;
5244 iv_ca_recount_cost (data, ivs);
5247 /* Add invariants in set INVS to set IVS. */
5249 static void
5250 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5252 bitmap_iterator bi;
5253 unsigned iid;
5255 if (!invs)
5256 return;
5258 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5260 ivs->n_invariant_uses[iid]++;
5261 if (ivs->n_invariant_uses[iid] == 1)
5262 ivs->n_regs++;
5266 /* Set cost pair for USE in set IVS to CP. */
5268 static void
5269 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5270 struct iv_use *use, struct cost_pair *cp)
5272 unsigned uid = use->id, cid;
5274 if (ivs->cand_for_use[uid] == cp)
5275 return;
5277 if (ivs->cand_for_use[uid])
5278 iv_ca_set_no_cp (data, ivs, use);
5280 if (cp)
5282 cid = cp->cand->id;
5284 ivs->bad_uses--;
5285 ivs->cand_for_use[uid] = cp;
5286 ivs->n_cand_uses[cid]++;
5287 if (ivs->n_cand_uses[cid] == 1)
5289 bitmap_set_bit (ivs->cands, cid);
5290 /* Do not count the pseudocandidates. */
5291 if (cp->cand->iv)
5292 ivs->n_regs++;
5293 ivs->n_cands++;
5294 ivs->cand_cost += cp->cand->cost;
5296 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5299 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5300 iv_ca_set_add_invariants (ivs, cp->depends_on);
5302 if (cp->inv_expr_id != -1)
5304 ivs->used_inv_expr[cp->inv_expr_id]++;
5305 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5306 ivs->num_used_inv_expr++;
5308 iv_ca_recount_cost (data, ivs);
5312 /* Extend set IVS by expressing USE by some of the candidates in it
5313 if possible. All important candidates will be considered
5314 if IMPORTANT_CANDIDATES is true. */
5316 static void
5317 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5318 struct iv_use *use, bool important_candidates)
5320 struct cost_pair *best_cp = NULL, *cp;
5321 bitmap_iterator bi;
5322 bitmap cands;
5323 unsigned i;
5325 gcc_assert (ivs->upto >= use->id);
5327 if (ivs->upto == use->id)
5329 ivs->upto++;
5330 ivs->bad_uses++;
5333 cands = (important_candidates ? data->important_candidates : ivs->cands);
5334 EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5336 struct iv_cand *cand = iv_cand (data, i);
5338 cp = get_use_iv_cost (data, use, cand);
5340 if (cheaper_cost_pair (cp, best_cp))
5341 best_cp = cp;
5344 iv_ca_set_cp (data, ivs, use, best_cp);
5347 /* Get cost for assignment IVS. */
5349 static comp_cost
5350 iv_ca_cost (struct iv_ca *ivs)
5352 /* This was a conditional expression but it triggered a bug in
5353 Sun C 5.5. */
5354 if (ivs->bad_uses)
5355 return infinite_cost;
5356 else
5357 return ivs->cost;
5360 /* Returns true if all dependences of CP are among invariants in IVS. */
5362 static bool
5363 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5365 unsigned i;
5366 bitmap_iterator bi;
5368 if (!cp->depends_on)
5369 return true;
5371 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5373 if (ivs->n_invariant_uses[i] == 0)
5374 return false;
5377 return true;
5380 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5381 it before NEXT_CHANGE. */
5383 static struct iv_ca_delta *
5384 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5385 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5387 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5389 change->use = use;
5390 change->old_cp = old_cp;
5391 change->new_cp = new_cp;
5392 change->next_change = next_change;
5394 return change;
5397 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5398 are rewritten. */
5400 static struct iv_ca_delta *
5401 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5403 struct iv_ca_delta *last;
5405 if (!l2)
5406 return l1;
5408 if (!l1)
5409 return l2;
5411 for (last = l1; last->next_change; last = last->next_change)
5412 continue;
5413 last->next_change = l2;
5415 return l1;
5418 /* Reverse the list of changes DELTA, forming the inverse to it. */
5420 static struct iv_ca_delta *
5421 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5423 struct iv_ca_delta *act, *next, *prev = NULL;
5424 struct cost_pair *tmp;
5426 for (act = delta; act; act = next)
5428 next = act->next_change;
5429 act->next_change = prev;
5430 prev = act;
5432 tmp = act->old_cp;
5433 act->old_cp = act->new_cp;
5434 act->new_cp = tmp;
5437 return prev;
5440 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5441 reverted instead. */
5443 static void
5444 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5445 struct iv_ca_delta *delta, bool forward)
5447 struct cost_pair *from, *to;
5448 struct iv_ca_delta *act;
5450 if (!forward)
5451 delta = iv_ca_delta_reverse (delta);
5453 for (act = delta; act; act = act->next_change)
5455 from = act->old_cp;
5456 to = act->new_cp;
5457 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5458 iv_ca_set_cp (data, ivs, act->use, to);
5461 if (!forward)
5462 iv_ca_delta_reverse (delta);
5465 /* Returns true if CAND is used in IVS. */
5467 static bool
5468 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5470 return ivs->n_cand_uses[cand->id] > 0;
5473 /* Returns number of induction variable candidates in the set IVS. */
5475 static unsigned
5476 iv_ca_n_cands (struct iv_ca *ivs)
5478 return ivs->n_cands;
5481 /* Free the list of changes DELTA. */
5483 static void
5484 iv_ca_delta_free (struct iv_ca_delta **delta)
5486 struct iv_ca_delta *act, *next;
5488 for (act = *delta; act; act = next)
5490 next = act->next_change;
5491 free (act);
5494 *delta = NULL;
5497 /* Allocates new iv candidates assignment. */
5499 static struct iv_ca *
5500 iv_ca_new (struct ivopts_data *data)
5502 struct iv_ca *nw = XNEW (struct iv_ca);
5504 nw->upto = 0;
5505 nw->bad_uses = 0;
5506 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5507 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5508 nw->cands = BITMAP_ALLOC (NULL);
5509 nw->n_cands = 0;
5510 nw->n_regs = 0;
5511 nw->cand_use_cost = no_cost;
5512 nw->cand_cost = 0;
5513 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5514 nw->cost = no_cost;
5515 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5516 nw->num_used_inv_expr = 0;
5518 return nw;
5521 /* Free memory occupied by the set IVS. */
5523 static void
5524 iv_ca_free (struct iv_ca **ivs)
5526 free ((*ivs)->cand_for_use);
5527 free ((*ivs)->n_cand_uses);
5528 BITMAP_FREE ((*ivs)->cands);
5529 free ((*ivs)->n_invariant_uses);
5530 free ((*ivs)->used_inv_expr);
5531 free (*ivs);
5532 *ivs = NULL;
5535 /* Dumps IVS to FILE. */
5537 static void
5538 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5540 const char *pref = " invariants ";
5541 unsigned i;
5542 comp_cost cost = iv_ca_cost (ivs);
5544 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5545 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5546 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5547 bitmap_print (file, ivs->cands, " candidates: ","\n");
5549 for (i = 0; i < ivs->upto; i++)
5551 struct iv_use *use = iv_use (data, i);
5552 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5553 if (cp)
5554 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5555 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5556 else
5557 fprintf (file, " use:%d --> ??\n", use->id);
5560 for (i = 1; i <= data->max_inv_id; i++)
5561 if (ivs->n_invariant_uses[i])
5563 fprintf (file, "%s%d", pref, i);
5564 pref = ", ";
5566 fprintf (file, "\n\n");
5569 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5570 new set, and store differences in DELTA. Number of induction variables
5571 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5572 the function will try to find a solution with mimimal iv candidates. */
5574 static comp_cost
5575 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5576 struct iv_cand *cand, struct iv_ca_delta **delta,
5577 unsigned *n_ivs, bool min_ncand)
5579 unsigned i;
5580 comp_cost cost;
5581 struct iv_use *use;
5582 struct cost_pair *old_cp, *new_cp;
5584 *delta = NULL;
5585 for (i = 0; i < ivs->upto; i++)
5587 use = iv_use (data, i);
5588 old_cp = iv_ca_cand_for_use (ivs, use);
5590 if (old_cp
5591 && old_cp->cand == cand)
5592 continue;
5594 new_cp = get_use_iv_cost (data, use, cand);
5595 if (!new_cp)
5596 continue;
5598 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5599 continue;
5601 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5602 continue;
5604 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5607 iv_ca_delta_commit (data, ivs, *delta, true);
5608 cost = iv_ca_cost (ivs);
5609 if (n_ivs)
5610 *n_ivs = iv_ca_n_cands (ivs);
5611 iv_ca_delta_commit (data, ivs, *delta, false);
5613 return cost;
5616 /* Try narrowing set IVS by removing CAND. Return the cost of
5617 the new set and store the differences in DELTA. */
5619 static comp_cost
5620 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5621 struct iv_cand *cand, struct iv_ca_delta **delta)
5623 unsigned i, ci;
5624 struct iv_use *use;
5625 struct cost_pair *old_cp, *new_cp, *cp;
5626 bitmap_iterator bi;
5627 struct iv_cand *cnd;
5628 comp_cost cost;
5630 *delta = NULL;
5631 for (i = 0; i < n_iv_uses (data); i++)
5633 use = iv_use (data, i);
5635 old_cp = iv_ca_cand_for_use (ivs, use);
5636 if (old_cp->cand != cand)
5637 continue;
5639 new_cp = NULL;
5641 if (data->consider_all_candidates)
5643 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5645 if (ci == cand->id)
5646 continue;
5648 cnd = iv_cand (data, ci);
5650 cp = get_use_iv_cost (data, use, cnd);
5651 if (!cp)
5652 continue;
5654 if (!iv_ca_has_deps (ivs, cp))
5655 continue;
5657 if (!cheaper_cost_pair (cp, new_cp))
5658 continue;
5660 new_cp = cp;
5663 else
5665 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5667 if (ci == cand->id)
5668 continue;
5670 cnd = iv_cand (data, ci);
5672 cp = get_use_iv_cost (data, use, cnd);
5673 if (!cp)
5674 continue;
5675 if (!iv_ca_has_deps (ivs, cp))
5676 continue;
5678 if (!cheaper_cost_pair (cp, new_cp))
5679 continue;
5681 new_cp = cp;
5685 if (!new_cp)
5687 iv_ca_delta_free (delta);
5688 return infinite_cost;
5691 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5694 iv_ca_delta_commit (data, ivs, *delta, true);
5695 cost = iv_ca_cost (ivs);
5696 iv_ca_delta_commit (data, ivs, *delta, false);
5698 return cost;
5701 /* Try optimizing the set of candidates IVS by removing candidates different
5702 from to EXCEPT_CAND from it. Return cost of the new set, and store
5703 differences in DELTA. */
5705 static comp_cost
5706 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5707 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5709 bitmap_iterator bi;
5710 struct iv_ca_delta *act_delta, *best_delta;
5711 unsigned i;
5712 comp_cost best_cost, acost;
5713 struct iv_cand *cand;
5715 best_delta = NULL;
5716 best_cost = iv_ca_cost (ivs);
5718 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5720 cand = iv_cand (data, i);
5722 if (cand == except_cand)
5723 continue;
5725 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5727 if (compare_costs (acost, best_cost) < 0)
5729 best_cost = acost;
5730 iv_ca_delta_free (&best_delta);
5731 best_delta = act_delta;
5733 else
5734 iv_ca_delta_free (&act_delta);
5737 if (!best_delta)
5739 *delta = NULL;
5740 return best_cost;
5743 /* Recurse to possibly remove other unnecessary ivs. */
5744 iv_ca_delta_commit (data, ivs, best_delta, true);
5745 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5746 iv_ca_delta_commit (data, ivs, best_delta, false);
5747 *delta = iv_ca_delta_join (best_delta, *delta);
5748 return best_cost;
5751 /* Tries to extend the sets IVS in the best possible way in order
5752 to express the USE. If ORIGINALP is true, prefer candidates from
5753 the original set of IVs, otherwise favor important candidates not
5754 based on any memory object. */
5756 static bool
5757 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5758 struct iv_use *use, bool originalp)
5760 comp_cost best_cost, act_cost;
5761 unsigned i;
5762 bitmap_iterator bi;
5763 struct iv_cand *cand;
5764 struct iv_ca_delta *best_delta = NULL, *act_delta;
5765 struct cost_pair *cp;
5767 iv_ca_add_use (data, ivs, use, false);
5768 best_cost = iv_ca_cost (ivs);
5770 cp = iv_ca_cand_for_use (ivs, use);
5771 if (!cp)
5773 ivs->upto--;
5774 ivs->bad_uses--;
5775 iv_ca_add_use (data, ivs, use, true);
5776 best_cost = iv_ca_cost (ivs);
5777 cp = iv_ca_cand_for_use (ivs, use);
5779 if (cp)
5781 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5782 iv_ca_set_no_cp (data, ivs, use);
5785 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5786 first try important candidates not based on any memory object. Only if
5787 this fails, try the specific ones. Rationale -- in loops with many
5788 variables the best choice often is to use just one generic biv. If we
5789 added here many ivs specific to the uses, the optimization algorithm later
5790 would be likely to get stuck in a local minimum, thus causing us to create
5791 too many ivs. The approach from few ivs to more seems more likely to be
5792 successful -- starting from few ivs, replacing an expensive use by a
5793 specific iv should always be a win. */
5794 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5796 cand = iv_cand (data, i);
5798 if (originalp && cand->pos !=IP_ORIGINAL)
5799 continue;
5801 if (!originalp && cand->iv->base_object != NULL_TREE)
5802 continue;
5804 if (iv_ca_cand_used_p (ivs, cand))
5805 continue;
5807 cp = get_use_iv_cost (data, use, cand);
5808 if (!cp)
5809 continue;
5811 iv_ca_set_cp (data, ivs, use, cp);
5812 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5813 true);
5814 iv_ca_set_no_cp (data, ivs, use);
5815 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5817 if (compare_costs (act_cost, best_cost) < 0)
5819 best_cost = act_cost;
5821 iv_ca_delta_free (&best_delta);
5822 best_delta = act_delta;
5824 else
5825 iv_ca_delta_free (&act_delta);
5828 if (infinite_cost_p (best_cost))
5830 for (i = 0; i < use->n_map_members; i++)
5832 cp = use->cost_map + i;
5833 cand = cp->cand;
5834 if (!cand)
5835 continue;
5837 /* Already tried this. */
5838 if (cand->important)
5840 if (originalp && cand->pos == IP_ORIGINAL)
5841 continue;
5842 if (!originalp && cand->iv->base_object == NULL_TREE)
5843 continue;
5846 if (iv_ca_cand_used_p (ivs, cand))
5847 continue;
5849 act_delta = NULL;
5850 iv_ca_set_cp (data, ivs, use, cp);
5851 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5852 iv_ca_set_no_cp (data, ivs, use);
5853 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5854 cp, act_delta);
5856 if (compare_costs (act_cost, best_cost) < 0)
5858 best_cost = act_cost;
5860 if (best_delta)
5861 iv_ca_delta_free (&best_delta);
5862 best_delta = act_delta;
5864 else
5865 iv_ca_delta_free (&act_delta);
5869 iv_ca_delta_commit (data, ivs, best_delta, true);
5870 iv_ca_delta_free (&best_delta);
5872 return !infinite_cost_p (best_cost);
5875 /* Finds an initial assignment of candidates to uses. */
5877 static struct iv_ca *
5878 get_initial_solution (struct ivopts_data *data, bool originalp)
5880 struct iv_ca *ivs = iv_ca_new (data);
5881 unsigned i;
5883 for (i = 0; i < n_iv_uses (data); i++)
5884 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5886 iv_ca_free (&ivs);
5887 return NULL;
5890 return ivs;
5893 /* Tries to improve set of induction variables IVS. */
5895 static bool
5896 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5898 unsigned i, n_ivs;
5899 comp_cost acost, best_cost = iv_ca_cost (ivs);
5900 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5901 struct iv_cand *cand;
5903 /* Try extending the set of induction variables by one. */
5904 for (i = 0; i < n_iv_cands (data); i++)
5906 cand = iv_cand (data, i);
5908 if (iv_ca_cand_used_p (ivs, cand))
5909 continue;
5911 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
5912 if (!act_delta)
5913 continue;
5915 /* If we successfully added the candidate and the set is small enough,
5916 try optimizing it by removing other candidates. */
5917 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5919 iv_ca_delta_commit (data, ivs, act_delta, true);
5920 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5921 iv_ca_delta_commit (data, ivs, act_delta, false);
5922 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5925 if (compare_costs (acost, best_cost) < 0)
5927 best_cost = acost;
5928 iv_ca_delta_free (&best_delta);
5929 best_delta = act_delta;
5931 else
5932 iv_ca_delta_free (&act_delta);
5935 if (!best_delta)
5937 /* Try removing the candidates from the set instead. */
5938 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5940 /* Nothing more we can do. */
5941 if (!best_delta)
5942 return false;
5945 iv_ca_delta_commit (data, ivs, best_delta, true);
5946 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5947 iv_ca_delta_free (&best_delta);
5948 return true;
5951 /* Attempts to find the optimal set of induction variables. We do simple
5952 greedy heuristic -- we try to replace at most one candidate in the selected
5953 solution and remove the unused ivs while this improves the cost. */
5955 static struct iv_ca *
5956 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
5958 struct iv_ca *set;
5960 /* Get the initial solution. */
5961 set = get_initial_solution (data, originalp);
5962 if (!set)
5964 if (dump_file && (dump_flags & TDF_DETAILS))
5965 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5966 return NULL;
5969 if (dump_file && (dump_flags & TDF_DETAILS))
5971 fprintf (dump_file, "Initial set of candidates:\n");
5972 iv_ca_dump (data, dump_file, set);
5975 while (try_improve_iv_set (data, set))
5977 if (dump_file && (dump_flags & TDF_DETAILS))
5979 fprintf (dump_file, "Improved to:\n");
5980 iv_ca_dump (data, dump_file, set);
5984 return set;
5987 static struct iv_ca *
5988 find_optimal_iv_set (struct ivopts_data *data)
5990 unsigned i;
5991 struct iv_ca *set, *origset;
5992 struct iv_use *use;
5993 comp_cost cost, origcost;
5995 /* Determine the cost based on a strategy that starts with original IVs,
5996 and try again using a strategy that prefers candidates not based
5997 on any IVs. */
5998 origset = find_optimal_iv_set_1 (data, true);
5999 set = find_optimal_iv_set_1 (data, false);
6001 if (!origset && !set)
6002 return NULL;
6004 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6005 cost = set ? iv_ca_cost (set) : infinite_cost;
6007 if (dump_file && (dump_flags & TDF_DETAILS))
6009 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6010 origcost.cost, origcost.complexity);
6011 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6012 cost.cost, cost.complexity);
6015 /* Choose the one with the best cost. */
6016 if (compare_costs (origcost, cost) <= 0)
6018 if (set)
6019 iv_ca_free (&set);
6020 set = origset;
6022 else if (origset)
6023 iv_ca_free (&origset);
6025 for (i = 0; i < n_iv_uses (data); i++)
6027 use = iv_use (data, i);
6028 use->selected = iv_ca_cand_for_use (set, use)->cand;
6031 return set;
6034 /* Creates a new induction variable corresponding to CAND. */
6036 static void
6037 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6039 gimple_stmt_iterator incr_pos;
6040 tree base;
6041 bool after = false;
6043 if (!cand->iv)
6044 return;
6046 switch (cand->pos)
6048 case IP_NORMAL:
6049 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6050 break;
6052 case IP_END:
6053 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6054 after = true;
6055 break;
6057 case IP_AFTER_USE:
6058 after = true;
6059 /* fall through */
6060 case IP_BEFORE_USE:
6061 incr_pos = gsi_for_stmt (cand->incremented_at);
6062 break;
6064 case IP_ORIGINAL:
6065 /* Mark that the iv is preserved. */
6066 name_info (data, cand->var_before)->preserve_biv = true;
6067 name_info (data, cand->var_after)->preserve_biv = true;
6069 /* Rewrite the increment so that it uses var_before directly. */
6070 find_interesting_uses_op (data, cand->var_after)->selected = cand;
6071 return;
6074 gimple_add_tmp_var (cand->var_before);
6076 base = unshare_expr (cand->iv->base);
6078 create_iv (base, unshare_expr (cand->iv->step),
6079 cand->var_before, data->current_loop,
6080 &incr_pos, after, &cand->var_before, &cand->var_after);
6083 /* Creates new induction variables described in SET. */
6085 static void
6086 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6088 unsigned i;
6089 struct iv_cand *cand;
6090 bitmap_iterator bi;
6092 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6094 cand = iv_cand (data, i);
6095 create_new_iv (data, cand);
6098 if (dump_file && (dump_flags & TDF_DETAILS))
6100 fprintf (dump_file, "\nSelected IV set: \n");
6101 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6103 cand = iv_cand (data, i);
6104 dump_cand (dump_file, cand);
6106 fprintf (dump_file, "\n");
6110 /* Rewrites USE (definition of iv used in a nonlinear expression)
6111 using candidate CAND. */
6113 static void
6114 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6115 struct iv_use *use, struct iv_cand *cand)
6117 tree comp;
6118 tree op, tgt;
6119 gimple ass;
6120 gimple_stmt_iterator bsi;
6122 /* An important special case -- if we are asked to express value of
6123 the original iv by itself, just exit; there is no need to
6124 introduce a new computation (that might also need casting the
6125 variable to unsigned and back). */
6126 if (cand->pos == IP_ORIGINAL
6127 && cand->incremented_at == use->stmt)
6129 enum tree_code stmt_code;
6131 gcc_assert (is_gimple_assign (use->stmt));
6132 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6134 /* Check whether we may leave the computation unchanged.
6135 This is the case only if it does not rely on other
6136 computations in the loop -- otherwise, the computation
6137 we rely upon may be removed in remove_unused_ivs,
6138 thus leading to ICE. */
6139 stmt_code = gimple_assign_rhs_code (use->stmt);
6140 if (stmt_code == PLUS_EXPR
6141 || stmt_code == MINUS_EXPR
6142 || stmt_code == POINTER_PLUS_EXPR)
6144 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6145 op = gimple_assign_rhs2 (use->stmt);
6146 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6147 op = gimple_assign_rhs1 (use->stmt);
6148 else
6149 op = NULL_TREE;
6151 else
6152 op = NULL_TREE;
6154 if (op && expr_invariant_in_loop_p (data->current_loop, op))
6155 return;
6158 comp = get_computation (data->current_loop, use, cand);
6159 gcc_assert (comp != NULL_TREE);
6161 switch (gimple_code (use->stmt))
6163 case GIMPLE_PHI:
6164 tgt = PHI_RESULT (use->stmt);
6166 /* If we should keep the biv, do not replace it. */
6167 if (name_info (data, tgt)->preserve_biv)
6168 return;
6170 bsi = gsi_after_labels (gimple_bb (use->stmt));
6171 break;
6173 case GIMPLE_ASSIGN:
6174 tgt = gimple_assign_lhs (use->stmt);
6175 bsi = gsi_for_stmt (use->stmt);
6176 break;
6178 default:
6179 gcc_unreachable ();
6182 if (!valid_gimple_rhs_p (comp)
6183 || (gimple_code (use->stmt) != GIMPLE_PHI
6184 /* We can't allow re-allocating the stmt as it might be pointed
6185 to still. */
6186 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6187 >= gimple_num_ops (gsi_stmt (bsi)))))
6189 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6190 true, GSI_SAME_STMT);
6191 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6193 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6194 /* As this isn't a plain copy we have to reset alignment
6195 information. */
6196 if (SSA_NAME_PTR_INFO (comp))
6197 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6201 if (gimple_code (use->stmt) == GIMPLE_PHI)
6203 ass = gimple_build_assign (tgt, comp);
6204 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6206 bsi = gsi_for_stmt (use->stmt);
6207 remove_phi_node (&bsi, false);
6209 else
6211 gimple_assign_set_rhs_from_tree (&bsi, comp);
6212 use->stmt = gsi_stmt (bsi);
6216 /* Performs a peephole optimization to reorder the iv update statement with
6217 a mem ref to enable instruction combining in later phases. The mem ref uses
6218 the iv value before the update, so the reordering transformation requires
6219 adjustment of the offset. CAND is the selected IV_CAND.
6221 Example:
6223 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6224 iv2 = iv1 + 1;
6226 if (t < val) (1)
6227 goto L;
6228 goto Head;
6231 directly propagating t over to (1) will introduce overlapping live range
6232 thus increase register pressure. This peephole transform it into:
6235 iv2 = iv1 + 1;
6236 t = MEM_REF (base, iv2, 8, 8);
6237 if (t < val)
6238 goto L;
6239 goto Head;
6242 static void
6243 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6245 tree var_after;
6246 gimple iv_update, stmt;
6247 basic_block bb;
6248 gimple_stmt_iterator gsi, gsi_iv;
6250 if (cand->pos != IP_NORMAL)
6251 return;
6253 var_after = cand->var_after;
6254 iv_update = SSA_NAME_DEF_STMT (var_after);
6256 bb = gimple_bb (iv_update);
6257 gsi = gsi_last_nondebug_bb (bb);
6258 stmt = gsi_stmt (gsi);
6260 /* Only handle conditional statement for now. */
6261 if (gimple_code (stmt) != GIMPLE_COND)
6262 return;
6264 gsi_prev_nondebug (&gsi);
6265 stmt = gsi_stmt (gsi);
6266 if (stmt != iv_update)
6267 return;
6269 gsi_prev_nondebug (&gsi);
6270 if (gsi_end_p (gsi))
6271 return;
6273 stmt = gsi_stmt (gsi);
6274 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6275 return;
6277 if (stmt != use->stmt)
6278 return;
6280 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6281 return;
6283 if (dump_file && (dump_flags & TDF_DETAILS))
6285 fprintf (dump_file, "Reordering \n");
6286 print_gimple_stmt (dump_file, iv_update, 0, 0);
6287 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6288 fprintf (dump_file, "\n");
6291 gsi = gsi_for_stmt (use->stmt);
6292 gsi_iv = gsi_for_stmt (iv_update);
6293 gsi_move_before (&gsi_iv, &gsi);
6295 cand->pos = IP_BEFORE_USE;
6296 cand->incremented_at = use->stmt;
6299 /* Rewrites USE (address that is an iv) using candidate CAND. */
6301 static void
6302 rewrite_use_address (struct ivopts_data *data,
6303 struct iv_use *use, struct iv_cand *cand)
6305 aff_tree aff;
6306 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6307 tree base_hint = NULL_TREE;
6308 tree ref, iv;
6309 bool ok;
6311 adjust_iv_update_pos (cand, use);
6312 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6313 gcc_assert (ok);
6314 unshare_aff_combination (&aff);
6316 /* To avoid undefined overflow problems, all IV candidates use unsigned
6317 integer types. The drawback is that this makes it impossible for
6318 create_mem_ref to distinguish an IV that is based on a memory object
6319 from one that represents simply an offset.
6321 To work around this problem, we pass a hint to create_mem_ref that
6322 indicates which variable (if any) in aff is an IV based on a memory
6323 object. Note that we only consider the candidate. If this is not
6324 based on an object, the base of the reference is in some subexpression
6325 of the use -- but these will use pointer types, so they are recognized
6326 by the create_mem_ref heuristics anyway. */
6327 if (cand->iv->base_object)
6328 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6330 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6331 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6332 reference_alias_ptr_type (*use->op_p),
6333 iv, base_hint, data->speed);
6334 copy_ref_info (ref, *use->op_p);
6335 *use->op_p = ref;
6338 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6339 candidate CAND. */
6341 static void
6342 rewrite_use_compare (struct ivopts_data *data,
6343 struct iv_use *use, struct iv_cand *cand)
6345 tree comp, *var_p, op, bound;
6346 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6347 enum tree_code compare;
6348 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6349 bool ok;
6351 bound = cp->value;
6352 if (bound)
6354 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6355 tree var_type = TREE_TYPE (var);
6356 gimple_seq stmts;
6358 if (dump_file && (dump_flags & TDF_DETAILS))
6360 fprintf (dump_file, "Replacing exit test: ");
6361 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6363 compare = cp->comp;
6364 bound = unshare_expr (fold_convert (var_type, bound));
6365 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6366 if (stmts)
6367 gsi_insert_seq_on_edge_immediate (
6368 loop_preheader_edge (data->current_loop),
6369 stmts);
6371 gimple_cond_set_lhs (use->stmt, var);
6372 gimple_cond_set_code (use->stmt, compare);
6373 gimple_cond_set_rhs (use->stmt, op);
6374 return;
6377 /* The induction variable elimination failed; just express the original
6378 giv. */
6379 comp = get_computation (data->current_loop, use, cand);
6380 gcc_assert (comp != NULL_TREE);
6382 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6383 gcc_assert (ok);
6385 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6386 true, GSI_SAME_STMT);
6389 /* Rewrites USE using candidate CAND. */
6391 static void
6392 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6394 switch (use->type)
6396 case USE_NONLINEAR_EXPR:
6397 rewrite_use_nonlinear_expr (data, use, cand);
6398 break;
6400 case USE_ADDRESS:
6401 rewrite_use_address (data, use, cand);
6402 break;
6404 case USE_COMPARE:
6405 rewrite_use_compare (data, use, cand);
6406 break;
6408 default:
6409 gcc_unreachable ();
6412 update_stmt (use->stmt);
6415 /* Rewrite the uses using the selected induction variables. */
6417 static void
6418 rewrite_uses (struct ivopts_data *data)
6420 unsigned i;
6421 struct iv_cand *cand;
6422 struct iv_use *use;
6424 for (i = 0; i < n_iv_uses (data); i++)
6426 use = iv_use (data, i);
6427 cand = use->selected;
6428 gcc_assert (cand);
6430 rewrite_use (data, use, cand);
6434 /* Removes the ivs that are not used after rewriting. */
6436 static void
6437 remove_unused_ivs (struct ivopts_data *data)
6439 unsigned j;
6440 bitmap_iterator bi;
6441 bitmap toremove = BITMAP_ALLOC (NULL);
6443 /* Figure out an order in which to release SSA DEFs so that we don't
6444 release something that we'd have to propagate into a debug stmt
6445 afterwards. */
6446 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6448 struct version_info *info;
6450 info = ver_info (data, j);
6451 if (info->iv
6452 && !integer_zerop (info->iv->step)
6453 && !info->inv_id
6454 && !info->iv->have_use_for
6455 && !info->preserve_biv)
6457 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6459 tree def = info->iv->ssa_name;
6461 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6463 imm_use_iterator imm_iter;
6464 use_operand_p use_p;
6465 gimple stmt;
6466 int count = 0;
6468 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6470 if (!gimple_debug_bind_p (stmt))
6471 continue;
6473 /* We just want to determine whether to do nothing
6474 (count == 0), to substitute the computed
6475 expression into a single use of the SSA DEF by
6476 itself (count == 1), or to use a debug temp
6477 because the SSA DEF is used multiple times or as
6478 part of a larger expression (count > 1). */
6479 count++;
6480 if (gimple_debug_bind_get_value (stmt) != def)
6481 count++;
6483 if (count > 1)
6484 BREAK_FROM_IMM_USE_STMT (imm_iter);
6487 if (!count)
6488 continue;
6490 struct iv_use dummy_use;
6491 struct iv_cand *best_cand = NULL, *cand;
6492 unsigned i, best_pref = 0, cand_pref;
6494 memset (&dummy_use, 0, sizeof (dummy_use));
6495 dummy_use.iv = info->iv;
6496 for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6498 cand = iv_use (data, i)->selected;
6499 if (cand == best_cand)
6500 continue;
6501 cand_pref = operand_equal_p (cand->iv->step,
6502 info->iv->step, 0)
6503 ? 4 : 0;
6504 cand_pref
6505 += TYPE_MODE (TREE_TYPE (cand->iv->base))
6506 == TYPE_MODE (TREE_TYPE (info->iv->base))
6507 ? 2 : 0;
6508 cand_pref
6509 += TREE_CODE (cand->iv->base) == INTEGER_CST
6510 ? 1 : 0;
6511 if (best_cand == NULL || best_pref < cand_pref)
6513 best_cand = cand;
6514 best_pref = cand_pref;
6518 if (!best_cand)
6519 continue;
6521 tree comp = get_computation_at (data->current_loop,
6522 &dummy_use, best_cand,
6523 SSA_NAME_DEF_STMT (def));
6524 if (!comp)
6525 continue;
6527 if (count > 1)
6529 tree vexpr = make_node (DEBUG_EXPR_DECL);
6530 DECL_ARTIFICIAL (vexpr) = 1;
6531 TREE_TYPE (vexpr) = TREE_TYPE (comp);
6532 if (SSA_NAME_VAR (def))
6533 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6534 else
6535 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6536 gimple def_temp = gimple_build_debug_bind (vexpr, comp, NULL);
6537 gimple_stmt_iterator gsi;
6539 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6540 gsi = gsi_after_labels (gimple_bb
6541 (SSA_NAME_DEF_STMT (def)));
6542 else
6543 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6545 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6546 comp = vexpr;
6549 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6551 if (!gimple_debug_bind_p (stmt))
6552 continue;
6554 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6555 SET_USE (use_p, comp);
6557 update_stmt (stmt);
6563 release_defs_bitset (toremove);
6565 BITMAP_FREE (toremove);
6568 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6569 for pointer_map_traverse. */
6571 static bool
6572 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6573 void *data ATTRIBUTE_UNUSED)
6575 struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6577 free (niter);
6578 return true;
6581 /* Frees data allocated by the optimization of a single loop. */
6583 static void
6584 free_loop_data (struct ivopts_data *data)
6586 unsigned i, j;
6587 bitmap_iterator bi;
6588 tree obj;
6590 if (data->niters)
6592 pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6593 pointer_map_destroy (data->niters);
6594 data->niters = NULL;
6597 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6599 struct version_info *info;
6601 info = ver_info (data, i);
6602 free (info->iv);
6603 info->iv = NULL;
6604 info->has_nonlin_use = false;
6605 info->preserve_biv = false;
6606 info->inv_id = 0;
6608 bitmap_clear (data->relevant);
6609 bitmap_clear (data->important_candidates);
6611 for (i = 0; i < n_iv_uses (data); i++)
6613 struct iv_use *use = iv_use (data, i);
6615 free (use->iv);
6616 BITMAP_FREE (use->related_cands);
6617 for (j = 0; j < use->n_map_members; j++)
6618 if (use->cost_map[j].depends_on)
6619 BITMAP_FREE (use->cost_map[j].depends_on);
6620 free (use->cost_map);
6621 free (use);
6623 data->iv_uses.truncate (0);
6625 for (i = 0; i < n_iv_cands (data); i++)
6627 struct iv_cand *cand = iv_cand (data, i);
6629 free (cand->iv);
6630 if (cand->depends_on)
6631 BITMAP_FREE (cand->depends_on);
6632 free (cand);
6634 data->iv_candidates.truncate (0);
6636 if (data->version_info_size < num_ssa_names)
6638 data->version_info_size = 2 * num_ssa_names;
6639 free (data->version_info);
6640 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6643 data->max_inv_id = 0;
6645 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6646 SET_DECL_RTL (obj, NULL_RTX);
6648 decl_rtl_to_reset.truncate (0);
6650 data->inv_expr_tab.empty ();
6651 data->inv_expr_id = 0;
6654 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6655 loop tree. */
6657 static void
6658 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6660 free_loop_data (data);
6661 free (data->version_info);
6662 BITMAP_FREE (data->relevant);
6663 BITMAP_FREE (data->important_candidates);
6665 decl_rtl_to_reset.release ();
6666 data->iv_uses.release ();
6667 data->iv_candidates.release ();
6668 data->inv_expr_tab.dispose ();
6671 /* Returns true if the loop body BODY includes any function calls. */
6673 static bool
6674 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6676 gimple_stmt_iterator gsi;
6677 unsigned i;
6679 for (i = 0; i < num_nodes; i++)
6680 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6682 gimple stmt = gsi_stmt (gsi);
6683 if (is_gimple_call (stmt)
6684 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6685 return true;
6687 return false;
6690 /* Optimizes the LOOP. Returns true if anything changed. */
6692 static bool
6693 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6695 bool changed = false;
6696 struct iv_ca *iv_ca;
6697 edge exit = single_dom_exit (loop);
6698 basic_block *body;
6700 gcc_assert (!data->niters);
6701 data->current_loop = loop;
6702 data->speed = optimize_loop_for_speed_p (loop);
6704 if (dump_file && (dump_flags & TDF_DETAILS))
6706 fprintf (dump_file, "Processing loop %d\n", loop->num);
6708 if (exit)
6710 fprintf (dump_file, " single exit %d -> %d, exit condition ",
6711 exit->src->index, exit->dest->index);
6712 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6713 fprintf (dump_file, "\n");
6716 fprintf (dump_file, "\n");
6719 body = get_loop_body (loop);
6720 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6721 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6722 free (body);
6724 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6726 /* For each ssa name determines whether it behaves as an induction variable
6727 in some loop. */
6728 if (!find_induction_variables (data))
6729 goto finish;
6731 /* Finds interesting uses (item 1). */
6732 find_interesting_uses (data);
6733 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6734 goto finish;
6736 /* Finds candidates for the induction variables (item 2). */
6737 find_iv_candidates (data);
6739 /* Calculates the costs (item 3, part 1). */
6740 determine_iv_costs (data);
6741 determine_use_iv_costs (data);
6742 determine_set_costs (data);
6744 /* Find the optimal set of induction variables (item 3, part 2). */
6745 iv_ca = find_optimal_iv_set (data);
6746 if (!iv_ca)
6747 goto finish;
6748 changed = true;
6750 /* Create the new induction variables (item 4, part 1). */
6751 create_new_ivs (data, iv_ca);
6752 iv_ca_free (&iv_ca);
6754 /* Rewrite the uses (item 4, part 2). */
6755 rewrite_uses (data);
6757 /* Remove the ivs that are unused after rewriting. */
6758 remove_unused_ivs (data);
6760 /* We have changed the structure of induction variables; it might happen
6761 that definitions in the scev database refer to some of them that were
6762 eliminated. */
6763 scev_reset ();
6765 finish:
6766 free_loop_data (data);
6768 return changed;
6771 /* Main entry point. Optimizes induction variables in loops. */
6773 void
6774 tree_ssa_iv_optimize (void)
6776 struct loop *loop;
6777 struct ivopts_data data;
6778 loop_iterator li;
6780 tree_ssa_iv_optimize_init (&data);
6782 /* Optimize the loops starting with the innermost ones. */
6783 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
6785 if (dump_file && (dump_flags & TDF_DETAILS))
6786 flow_loop_dump (loop, dump_file, NULL, 1);
6788 tree_ssa_iv_optimize_loop (&data, loop);
6791 tree_ssa_iv_optimize_finalize (&data);