2007-05-27 H.J. Lu <hongjiu.lu@intel.com>
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob1e5c5039ac028eb6351ba4f2f205c7a775200d82
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301, USA. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "pointer-set.h"
87 #include "hashtab.h"
88 #include "tree-chrec.h"
89 #include "tree-scalar-evolution.h"
90 #include "cfgloop.h"
91 #include "params.h"
92 #include "langhooks.h"
93 #include "tree-affine.h"
94 #include "target.h"
96 /* The infinite cost. */
97 #define INFTY 10000000
99 /* The expected number of loop iterations. TODO -- use profiling instead of
100 this. */
101 #define AVG_LOOP_NITER(LOOP) 5
104 /* Representation of the induction variable. */
105 struct iv
107 tree base; /* Initial value of the iv. */
108 tree base_object; /* A memory object to that the induction variable points. */
109 tree step; /* Step of the iv (constant only). */
110 tree ssa_name; /* The ssa name with the value. */
111 bool biv_p; /* Is it a biv? */
112 bool have_use_for; /* Do we already have a use for it? */
113 unsigned use_id; /* The identifier in the use if it is the case. */
116 /* Per-ssa version information (induction variable descriptions, etc.). */
117 struct version_info
119 tree name; /* The ssa name. */
120 struct iv *iv; /* Induction variable description. */
121 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
122 an expression that is not an induction variable. */
123 unsigned inv_id; /* Id of an invariant. */
124 bool preserve_biv; /* For the original biv, whether to preserve it. */
127 /* Types of uses. */
128 enum use_type
130 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
131 USE_ADDRESS, /* Use in an address. */
132 USE_COMPARE /* Use is a compare. */
135 /* The candidate - cost pair. */
136 struct cost_pair
138 struct iv_cand *cand; /* The candidate. */
139 unsigned cost; /* The cost. */
140 bitmap depends_on; /* The list of invariants that have to be
141 preserved. */
142 tree value; /* For final value elimination, the expression for
143 the final value of the iv. For iv elimination,
144 the new bound to compare with. */
147 /* Use. */
148 struct iv_use
150 unsigned id; /* The id of the use. */
151 enum use_type type; /* Type of the use. */
152 struct iv *iv; /* The induction variable it is based on. */
153 tree stmt; /* Statement in that it occurs. */
154 tree *op_p; /* The place where it occurs. */
155 bitmap related_cands; /* The set of "related" iv candidates, plus the common
156 important ones. */
158 unsigned n_map_members; /* Number of candidates in the cost_map list. */
159 struct cost_pair *cost_map;
160 /* The costs wrto the iv candidates. */
162 struct iv_cand *selected;
163 /* The selected candidate. */
166 /* The position where the iv is computed. */
167 enum iv_position
169 IP_NORMAL, /* At the end, just before the exit condition. */
170 IP_END, /* At the end of the latch block. */
171 IP_ORIGINAL /* The original biv. */
174 /* The induction variable candidate. */
175 struct iv_cand
177 unsigned id; /* The number of the candidate. */
178 bool important; /* Whether this is an "important" candidate, i.e. such
179 that it should be considered by all uses. */
180 enum iv_position pos; /* Where it is computed. */
181 tree incremented_at; /* For original biv, the statement where it is
182 incremented. */
183 tree var_before; /* The variable used for it before increment. */
184 tree var_after; /* The variable used for it after increment. */
185 struct iv *iv; /* The value of the candidate. NULL for
186 "pseudocandidate" used to indicate the possibility
187 to replace the final value of an iv by direct
188 computation of the value. */
189 unsigned cost; /* Cost of the candidate. */
190 bitmap depends_on; /* The list of invariants that are used in step of the
191 biv. */
194 /* The data used by the induction variable optimizations. */
196 typedef struct iv_use *iv_use_p;
197 DEF_VEC_P(iv_use_p);
198 DEF_VEC_ALLOC_P(iv_use_p,heap);
200 typedef struct iv_cand *iv_cand_p;
201 DEF_VEC_P(iv_cand_p);
202 DEF_VEC_ALLOC_P(iv_cand_p,heap);
204 struct ivopts_data
206 /* The currently optimized loop. */
207 struct loop *current_loop;
209 /* Number of registers used in it. */
210 unsigned regs_used;
212 /* Numbers of iterations for all exits of the current loop. */
213 struct pointer_map_t *niters;
215 /* The size of version_info array allocated. */
216 unsigned version_info_size;
218 /* The array of information for the ssa names. */
219 struct version_info *version_info;
221 /* The bitmap of indices in version_info whose value was changed. */
222 bitmap relevant;
224 /* The maximum invariant id. */
225 unsigned max_inv_id;
227 /* The uses of induction variables. */
228 VEC(iv_use_p,heap) *iv_uses;
230 /* The candidates. */
231 VEC(iv_cand_p,heap) *iv_candidates;
233 /* A bitmap of important candidates. */
234 bitmap important_candidates;
236 /* Whether to consider just related and important candidates when replacing a
237 use. */
238 bool consider_all_candidates;
241 /* An assignment of iv candidates to uses. */
243 struct iv_ca
245 /* The number of uses covered by the assignment. */
246 unsigned upto;
248 /* Number of uses that cannot be expressed by the candidates in the set. */
249 unsigned bad_uses;
251 /* Candidate assigned to a use, together with the related costs. */
252 struct cost_pair **cand_for_use;
254 /* Number of times each candidate is used. */
255 unsigned *n_cand_uses;
257 /* The candidates used. */
258 bitmap cands;
260 /* The number of candidates in the set. */
261 unsigned n_cands;
263 /* Total number of registers needed. */
264 unsigned n_regs;
266 /* Total cost of expressing uses. */
267 unsigned cand_use_cost;
269 /* Total cost of candidates. */
270 unsigned cand_cost;
272 /* Number of times each invariant is used. */
273 unsigned *n_invariant_uses;
275 /* Total cost of the assignment. */
276 unsigned cost;
279 /* Difference of two iv candidate assignments. */
281 struct iv_ca_delta
283 /* Changed use. */
284 struct iv_use *use;
286 /* An old assignment (for rollback purposes). */
287 struct cost_pair *old_cp;
289 /* A new assignment. */
290 struct cost_pair *new_cp;
292 /* Next change in the list. */
293 struct iv_ca_delta *next_change;
296 /* Bound on number of candidates below that all candidates are considered. */
298 #define CONSIDER_ALL_CANDIDATES_BOUND \
299 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
301 /* If there are more iv occurrences, we just give up (it is quite unlikely that
302 optimizing such a loop would help, and it would take ages). */
304 #define MAX_CONSIDERED_USES \
305 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
307 /* If there are at most this number of ivs in the set, try removing unnecessary
308 ivs from the set always. */
310 #define ALWAYS_PRUNE_CAND_SET_BOUND \
311 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
313 /* The list of trees for that the decl_rtl field must be reset is stored
314 here. */
316 static VEC(tree,heap) *decl_rtl_to_reset;
318 /* Number of uses recorded in DATA. */
320 static inline unsigned
321 n_iv_uses (struct ivopts_data *data)
323 return VEC_length (iv_use_p, data->iv_uses);
326 /* Ith use recorded in DATA. */
328 static inline struct iv_use *
329 iv_use (struct ivopts_data *data, unsigned i)
331 return VEC_index (iv_use_p, data->iv_uses, i);
334 /* Number of candidates recorded in DATA. */
336 static inline unsigned
337 n_iv_cands (struct ivopts_data *data)
339 return VEC_length (iv_cand_p, data->iv_candidates);
342 /* Ith candidate recorded in DATA. */
344 static inline struct iv_cand *
345 iv_cand (struct ivopts_data *data, unsigned i)
347 return VEC_index (iv_cand_p, data->iv_candidates, i);
350 /* The single loop exit if it dominates the latch, NULL otherwise. */
352 edge
353 single_dom_exit (struct loop *loop)
355 edge exit = single_exit (loop);
357 if (!exit)
358 return NULL;
360 if (!just_once_each_iteration_p (loop, exit->src))
361 return NULL;
363 return exit;
366 /* Dumps information about the induction variable IV to FILE. */
368 extern void dump_iv (FILE *, struct iv *);
369 void
370 dump_iv (FILE *file, struct iv *iv)
372 if (iv->ssa_name)
374 fprintf (file, "ssa name ");
375 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
376 fprintf (file, "\n");
379 fprintf (file, " type ");
380 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
381 fprintf (file, "\n");
383 if (iv->step)
385 fprintf (file, " base ");
386 print_generic_expr (file, iv->base, TDF_SLIM);
387 fprintf (file, "\n");
389 fprintf (file, " step ");
390 print_generic_expr (file, iv->step, TDF_SLIM);
391 fprintf (file, "\n");
393 else
395 fprintf (file, " invariant ");
396 print_generic_expr (file, iv->base, TDF_SLIM);
397 fprintf (file, "\n");
400 if (iv->base_object)
402 fprintf (file, " base object ");
403 print_generic_expr (file, iv->base_object, TDF_SLIM);
404 fprintf (file, "\n");
407 if (iv->biv_p)
408 fprintf (file, " is a biv\n");
411 /* Dumps information about the USE to FILE. */
413 extern void dump_use (FILE *, struct iv_use *);
414 void
415 dump_use (FILE *file, struct iv_use *use)
417 fprintf (file, "use %d\n", use->id);
419 switch (use->type)
421 case USE_NONLINEAR_EXPR:
422 fprintf (file, " generic\n");
423 break;
425 case USE_ADDRESS:
426 fprintf (file, " address\n");
427 break;
429 case USE_COMPARE:
430 fprintf (file, " compare\n");
431 break;
433 default:
434 gcc_unreachable ();
437 fprintf (file, " in statement ");
438 print_generic_expr (file, use->stmt, TDF_SLIM);
439 fprintf (file, "\n");
441 fprintf (file, " at position ");
442 if (use->op_p)
443 print_generic_expr (file, *use->op_p, TDF_SLIM);
444 fprintf (file, "\n");
446 dump_iv (file, use->iv);
448 if (use->related_cands)
450 fprintf (file, " related candidates ");
451 dump_bitmap (file, use->related_cands);
455 /* Dumps information about the uses to FILE. */
457 extern void dump_uses (FILE *, struct ivopts_data *);
458 void
459 dump_uses (FILE *file, struct ivopts_data *data)
461 unsigned i;
462 struct iv_use *use;
464 for (i = 0; i < n_iv_uses (data); i++)
466 use = iv_use (data, i);
468 dump_use (file, use);
469 fprintf (file, "\n");
473 /* Dumps information about induction variable candidate CAND to FILE. */
475 extern void dump_cand (FILE *, struct iv_cand *);
476 void
477 dump_cand (FILE *file, struct iv_cand *cand)
479 struct iv *iv = cand->iv;
481 fprintf (file, "candidate %d%s\n",
482 cand->id, cand->important ? " (important)" : "");
484 if (cand->depends_on)
486 fprintf (file, " depends on ");
487 dump_bitmap (file, cand->depends_on);
490 if (!iv)
492 fprintf (file, " final value replacement\n");
493 return;
496 switch (cand->pos)
498 case IP_NORMAL:
499 fprintf (file, " incremented before exit test\n");
500 break;
502 case IP_END:
503 fprintf (file, " incremented at end\n");
504 break;
506 case IP_ORIGINAL:
507 fprintf (file, " original biv\n");
508 break;
511 dump_iv (file, iv);
514 /* Returns the info for ssa version VER. */
516 static inline struct version_info *
517 ver_info (struct ivopts_data *data, unsigned ver)
519 return data->version_info + ver;
522 /* Returns the info for ssa name NAME. */
524 static inline struct version_info *
525 name_info (struct ivopts_data *data, tree name)
527 return ver_info (data, SSA_NAME_VERSION (name));
530 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
531 emitted in LOOP. */
533 static bool
534 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
536 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
538 gcc_assert (bb);
540 if (sbb == loop->latch)
541 return true;
543 if (sbb != bb)
544 return false;
546 return stmt == last_stmt (bb);
549 /* Returns true if STMT if after the place where the original induction
550 variable CAND is incremented. */
552 static bool
553 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
555 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
556 basic_block stmt_bb = bb_for_stmt (stmt);
557 block_stmt_iterator bsi;
559 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
560 return false;
562 if (stmt_bb != cand_bb)
563 return true;
565 /* Scan the block from the end, since the original ivs are usually
566 incremented at the end of the loop body. */
567 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
569 if (bsi_stmt (bsi) == cand->incremented_at)
570 return false;
571 if (bsi_stmt (bsi) == stmt)
572 return true;
576 /* Returns true if STMT if after the place where the induction variable
577 CAND is incremented in LOOP. */
579 static bool
580 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
582 switch (cand->pos)
584 case IP_END:
585 return false;
587 case IP_NORMAL:
588 return stmt_after_ip_normal_pos (loop, stmt);
590 case IP_ORIGINAL:
591 return stmt_after_ip_original_pos (cand, stmt);
593 default:
594 gcc_unreachable ();
598 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
600 static bool
601 abnormal_ssa_name_p (tree exp)
603 if (!exp)
604 return false;
606 if (TREE_CODE (exp) != SSA_NAME)
607 return false;
609 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
612 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
613 abnormal phi node. Callback for for_each_index. */
615 static bool
616 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
617 void *data ATTRIBUTE_UNUSED)
619 if (TREE_CODE (base) == ARRAY_REF)
621 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
622 return false;
623 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
624 return false;
627 return !abnormal_ssa_name_p (*index);
630 /* Returns true if EXPR contains a ssa name that occurs in an
631 abnormal phi node. */
633 bool
634 contains_abnormal_ssa_name_p (tree expr)
636 enum tree_code code;
637 enum tree_code_class class;
639 if (!expr)
640 return false;
642 code = TREE_CODE (expr);
643 class = TREE_CODE_CLASS (code);
645 if (code == SSA_NAME)
646 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
648 if (code == INTEGER_CST
649 || is_gimple_min_invariant (expr))
650 return false;
652 if (code == ADDR_EXPR)
653 return !for_each_index (&TREE_OPERAND (expr, 0),
654 idx_contains_abnormal_ssa_name_p,
655 NULL);
657 switch (class)
659 case tcc_binary:
660 case tcc_comparison:
661 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
662 return true;
664 /* Fallthru. */
665 case tcc_unary:
666 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
667 return true;
669 break;
671 default:
672 gcc_unreachable ();
675 return false;
678 /* Returns tree describing number of iterations determined from
679 EXIT of DATA->current_loop, or NULL if something goes wrong. */
681 static tree
682 niter_for_exit (struct ivopts_data *data, edge exit)
684 struct tree_niter_desc desc;
685 tree niter;
686 void **slot;
688 if (!data->niters)
690 data->niters = pointer_map_create ();
691 slot = NULL;
693 else
694 slot = pointer_map_contains (data->niters, exit);
696 if (!slot)
698 /* Try to determine number of iterations. We must know it
699 unconditionally (i.e., without possibility of # of iterations
700 being zero). Also, we cannot safely work with ssa names that
701 appear in phi nodes on abnormal edges, so that we do not create
702 overlapping life ranges for them (PR 27283). */
703 if (number_of_iterations_exit (data->current_loop,
704 exit, &desc, true)
705 && integer_zerop (desc.may_be_zero)
706 && !contains_abnormal_ssa_name_p (desc.niter))
707 niter = desc.niter;
708 else
709 niter = NULL_TREE;
711 *pointer_map_insert (data->niters, exit) = niter;
713 else
714 niter = *slot;
716 return niter;
719 /* Returns tree describing number of iterations determined from
720 single dominating exit of DATA->current_loop, or NULL if something
721 goes wrong. */
723 static tree
724 niter_for_single_dom_exit (struct ivopts_data *data)
726 edge exit = single_dom_exit (data->current_loop);
728 if (!exit)
729 return NULL;
731 return niter_for_exit (data, exit);
734 /* Initializes data structures used by the iv optimization pass, stored
735 in DATA. */
737 static void
738 tree_ssa_iv_optimize_init (struct ivopts_data *data)
740 data->version_info_size = 2 * num_ssa_names;
741 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
742 data->relevant = BITMAP_ALLOC (NULL);
743 data->important_candidates = BITMAP_ALLOC (NULL);
744 data->max_inv_id = 0;
745 data->niters = NULL;
746 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
747 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
748 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
751 /* Returns a memory object to that EXPR points. In case we are able to
752 determine that it does not point to any such object, NULL is returned. */
754 static tree
755 determine_base_object (tree expr)
757 enum tree_code code = TREE_CODE (expr);
758 tree base, obj, op0, op1;
760 /* If this is a pointer casted to any type, we need to determine
761 the base object for the pointer; so handle conversions before
762 throwing away non-pointer expressions. */
763 if (TREE_CODE (expr) == NOP_EXPR
764 || TREE_CODE (expr) == CONVERT_EXPR)
765 return determine_base_object (TREE_OPERAND (expr, 0));
767 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
768 return NULL_TREE;
770 switch (code)
772 case INTEGER_CST:
773 return NULL_TREE;
775 case ADDR_EXPR:
776 obj = TREE_OPERAND (expr, 0);
777 base = get_base_address (obj);
779 if (!base)
780 return expr;
782 if (TREE_CODE (base) == INDIRECT_REF)
783 return determine_base_object (TREE_OPERAND (base, 0));
785 return fold_convert (ptr_type_node,
786 build_fold_addr_expr (base));
788 case PLUS_EXPR:
789 case MINUS_EXPR:
790 op0 = determine_base_object (TREE_OPERAND (expr, 0));
791 op1 = determine_base_object (TREE_OPERAND (expr, 1));
793 if (!op1)
794 return op0;
796 if (!op0)
797 return (code == PLUS_EXPR
798 ? op1
799 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
801 return fold_build2 (code, ptr_type_node, op0, op1);
803 default:
804 return fold_convert (ptr_type_node, expr);
808 /* Allocates an induction variable with given initial value BASE and step STEP
809 for loop LOOP. */
811 static struct iv *
812 alloc_iv (tree base, tree step)
814 struct iv *iv = XCNEW (struct iv);
815 gcc_assert (step != NULL_TREE);
817 iv->base = base;
818 iv->base_object = determine_base_object (base);
819 iv->step = step;
820 iv->biv_p = false;
821 iv->have_use_for = false;
822 iv->use_id = 0;
823 iv->ssa_name = NULL_TREE;
825 return iv;
828 /* Sets STEP and BASE for induction variable IV. */
830 static void
831 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
833 struct version_info *info = name_info (data, iv);
835 gcc_assert (!info->iv);
837 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
838 info->iv = alloc_iv (base, step);
839 info->iv->ssa_name = iv;
842 /* Finds induction variable declaration for VAR. */
844 static struct iv *
845 get_iv (struct ivopts_data *data, tree var)
847 basic_block bb;
848 tree type = TREE_TYPE (var);
850 if (!POINTER_TYPE_P (type)
851 && !INTEGRAL_TYPE_P (type))
852 return NULL;
854 if (!name_info (data, var)->iv)
856 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
858 if (!bb
859 || !flow_bb_inside_loop_p (data->current_loop, bb))
860 set_iv (data, var, var, build_int_cst (type, 0));
863 return name_info (data, var)->iv;
866 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
867 not define a simple affine biv with nonzero step. */
869 static tree
870 determine_biv_step (tree phi)
872 struct loop *loop = bb_for_stmt (phi)->loop_father;
873 tree name = PHI_RESULT (phi);
874 affine_iv iv;
876 if (!is_gimple_reg (name))
877 return NULL_TREE;
879 if (!simple_iv (loop, phi, name, &iv, true))
880 return NULL_TREE;
882 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
885 /* Finds basic ivs. */
887 static bool
888 find_bivs (struct ivopts_data *data)
890 tree phi, step, type, base;
891 bool found = false;
892 struct loop *loop = data->current_loop;
894 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
896 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
897 continue;
899 step = determine_biv_step (phi);
900 if (!step)
901 continue;
903 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
904 base = expand_simple_operations (base);
905 if (contains_abnormal_ssa_name_p (base)
906 || contains_abnormal_ssa_name_p (step))
907 continue;
909 type = TREE_TYPE (PHI_RESULT (phi));
910 base = fold_convert (type, base);
911 if (step)
912 step = fold_convert (type, step);
914 set_iv (data, PHI_RESULT (phi), base, step);
915 found = true;
918 return found;
921 /* Marks basic ivs. */
923 static void
924 mark_bivs (struct ivopts_data *data)
926 tree phi, var;
927 struct iv *iv, *incr_iv;
928 struct loop *loop = data->current_loop;
929 basic_block incr_bb;
931 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
933 iv = get_iv (data, PHI_RESULT (phi));
934 if (!iv)
935 continue;
937 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
938 incr_iv = get_iv (data, var);
939 if (!incr_iv)
940 continue;
942 /* If the increment is in the subloop, ignore it. */
943 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
944 if (incr_bb->loop_father != data->current_loop
945 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
946 continue;
948 iv->biv_p = true;
949 incr_iv->biv_p = true;
953 /* Checks whether STMT defines a linear induction variable and stores its
954 parameters to IV. */
956 static bool
957 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
959 tree lhs;
960 struct loop *loop = data->current_loop;
962 iv->base = NULL_TREE;
963 iv->step = NULL_TREE;
965 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
966 return false;
968 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
969 if (TREE_CODE (lhs) != SSA_NAME)
970 return false;
972 if (!simple_iv (loop, stmt, GIMPLE_STMT_OPERAND (stmt, 1), iv, true))
973 return false;
974 iv->base = expand_simple_operations (iv->base);
976 if (contains_abnormal_ssa_name_p (iv->base)
977 || contains_abnormal_ssa_name_p (iv->step))
978 return false;
980 return true;
983 /* Finds general ivs in statement STMT. */
985 static void
986 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
988 affine_iv iv;
990 if (!find_givs_in_stmt_scev (data, stmt, &iv))
991 return;
993 set_iv (data, GIMPLE_STMT_OPERAND (stmt, 0), iv.base, iv.step);
996 /* Finds general ivs in basic block BB. */
998 static void
999 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1001 block_stmt_iterator bsi;
1003 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1004 find_givs_in_stmt (data, bsi_stmt (bsi));
1007 /* Finds general ivs. */
1009 static void
1010 find_givs (struct ivopts_data *data)
1012 struct loop *loop = data->current_loop;
1013 basic_block *body = get_loop_body_in_dom_order (loop);
1014 unsigned i;
1016 for (i = 0; i < loop->num_nodes; i++)
1017 find_givs_in_bb (data, body[i]);
1018 free (body);
1021 /* For each ssa name defined in LOOP determines whether it is an induction
1022 variable and if so, its initial value and step. */
1024 static bool
1025 find_induction_variables (struct ivopts_data *data)
1027 unsigned i;
1028 bitmap_iterator bi;
1030 if (!find_bivs (data))
1031 return false;
1033 find_givs (data);
1034 mark_bivs (data);
1036 if (dump_file && (dump_flags & TDF_DETAILS))
1038 tree niter = niter_for_single_dom_exit (data);
1040 if (niter)
1042 fprintf (dump_file, " number of iterations ");
1043 print_generic_expr (dump_file, niter, TDF_SLIM);
1044 fprintf (dump_file, "\n\n");
1047 fprintf (dump_file, "Induction variables:\n\n");
1049 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1051 if (ver_info (data, i)->iv)
1052 dump_iv (dump_file, ver_info (data, i)->iv);
1056 return true;
1059 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1061 static struct iv_use *
1062 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1063 tree stmt, enum use_type use_type)
1065 struct iv_use *use = XCNEW (struct iv_use);
1067 use->id = n_iv_uses (data);
1068 use->type = use_type;
1069 use->iv = iv;
1070 use->stmt = stmt;
1071 use->op_p = use_p;
1072 use->related_cands = BITMAP_ALLOC (NULL);
1074 /* To avoid showing ssa name in the dumps, if it was not reset by the
1075 caller. */
1076 iv->ssa_name = NULL_TREE;
1078 if (dump_file && (dump_flags & TDF_DETAILS))
1079 dump_use (dump_file, use);
1081 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1083 return use;
1086 /* Checks whether OP is a loop-level invariant and if so, records it.
1087 NONLINEAR_USE is true if the invariant is used in a way we do not
1088 handle specially. */
1090 static void
1091 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1093 basic_block bb;
1094 struct version_info *info;
1096 if (TREE_CODE (op) != SSA_NAME
1097 || !is_gimple_reg (op))
1098 return;
1100 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1101 if (bb
1102 && flow_bb_inside_loop_p (data->current_loop, bb))
1103 return;
1105 info = name_info (data, op);
1106 info->name = op;
1107 info->has_nonlin_use |= nonlinear_use;
1108 if (!info->inv_id)
1109 info->inv_id = ++data->max_inv_id;
1110 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1113 /* Checks whether the use OP is interesting and if so, records it. */
1115 static struct iv_use *
1116 find_interesting_uses_op (struct ivopts_data *data, tree op)
1118 struct iv *iv;
1119 struct iv *civ;
1120 tree stmt;
1121 struct iv_use *use;
1123 if (TREE_CODE (op) != SSA_NAME)
1124 return NULL;
1126 iv = get_iv (data, op);
1127 if (!iv)
1128 return NULL;
1130 if (iv->have_use_for)
1132 use = iv_use (data, iv->use_id);
1134 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1135 return use;
1138 if (integer_zerop (iv->step))
1140 record_invariant (data, op, true);
1141 return NULL;
1143 iv->have_use_for = true;
1145 civ = XNEW (struct iv);
1146 *civ = *iv;
1148 stmt = SSA_NAME_DEF_STMT (op);
1149 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1150 || TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
1152 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1153 iv->use_id = use->id;
1155 return use;
1158 /* Given a condition *COND_P, checks whether it is a compare of an induction
1159 variable and an invariant. If this is the case, CONTROL_VAR is set
1160 to location of the iv, BOUND to the location of the invariant,
1161 IV_VAR and IV_BOUND are set to the corresponding induction variable
1162 descriptions, and true is returned. If this is not the case,
1163 CONTROL_VAR and BOUND are set to the arguments of the condition and
1164 false is returned. */
1166 static bool
1167 extract_cond_operands (struct ivopts_data *data, tree *cond_p,
1168 tree **control_var, tree **bound,
1169 struct iv **iv_var, struct iv **iv_bound)
1171 /* The nodes returned when COND has just one operand. Note that you should
1172 not modify anything in BOUND or IV_BOUND because of this. */
1173 static struct iv const_iv;
1174 static tree zero;
1175 tree cond = *cond_p;
1176 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1177 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1178 bool ret = false;
1180 zero = integer_zero_node;
1181 const_iv.step = integer_zero_node;
1183 if (TREE_CODE (cond) == SSA_NAME)
1185 op0 = cond_p;
1186 iv0 = get_iv (data, cond);
1187 ret = (iv0 && !integer_zerop (iv0->step));
1188 goto end;
1191 if (!COMPARISON_CLASS_P (cond))
1193 op0 = cond_p;
1194 goto end;
1197 op0 = &TREE_OPERAND (cond, 0);
1198 op1 = &TREE_OPERAND (cond, 1);
1199 if (TREE_CODE (*op0) == SSA_NAME)
1200 iv0 = get_iv (data, *op0);
1201 if (TREE_CODE (*op1) == SSA_NAME)
1202 iv1 = get_iv (data, *op1);
1204 /* Exactly one of the compared values must be an iv, and the other one must
1205 be an invariant. */
1206 if (!iv0 || !iv1)
1207 goto end;
1209 if (integer_zerop (iv0->step))
1211 /* Control variable may be on the other side. */
1212 tmp_op = op0; op0 = op1; op1 = tmp_op;
1213 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1215 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1217 end:
1218 if (control_var)
1219 *control_var = op0;;
1220 if (iv_var)
1221 *iv_var = iv0;;
1222 if (bound)
1223 *bound = op1;
1224 if (iv_bound)
1225 *iv_bound = iv1;
1227 return ret;
1230 /* Checks whether the condition *COND_P in STMT is interesting
1231 and if so, records it. */
1233 static void
1234 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1236 tree *var_p, *bound_p;
1237 struct iv *var_iv, *civ;
1239 if (!extract_cond_operands (data, cond_p, &var_p, &bound_p, &var_iv, NULL))
1241 find_interesting_uses_op (data, *var_p);
1242 find_interesting_uses_op (data, *bound_p);
1243 return;
1246 civ = XNEW (struct iv);
1247 *civ = *var_iv;
1248 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1251 /* Returns true if expression EXPR is obviously invariant in LOOP,
1252 i.e. if all its operands are defined outside of the LOOP. */
1254 bool
1255 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1257 basic_block def_bb;
1258 unsigned i, len;
1260 if (is_gimple_min_invariant (expr))
1261 return true;
1263 if (TREE_CODE (expr) == SSA_NAME)
1265 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1266 if (def_bb
1267 && flow_bb_inside_loop_p (loop, def_bb))
1268 return false;
1270 return true;
1273 if (!EXPR_P (expr) && !GIMPLE_STMT_P (expr))
1274 return false;
1276 len = TREE_OPERAND_LENGTH (expr);
1277 for (i = 0; i < len; i++)
1278 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1279 return false;
1281 return true;
1284 /* Cumulates the steps of indices into DATA and replaces their values with the
1285 initial ones. Returns false when the value of the index cannot be determined.
1286 Callback for for_each_index. */
1288 struct ifs_ivopts_data
1290 struct ivopts_data *ivopts_data;
1291 tree stmt;
1292 tree step;
1295 static bool
1296 idx_find_step (tree base, tree *idx, void *data)
1298 struct ifs_ivopts_data *dta = data;
1299 struct iv *iv;
1300 tree step, iv_base, iv_step, lbound, off;
1301 struct loop *loop = dta->ivopts_data->current_loop;
1303 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1304 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1305 return false;
1307 /* If base is a component ref, require that the offset of the reference
1308 be invariant. */
1309 if (TREE_CODE (base) == COMPONENT_REF)
1311 off = component_ref_field_offset (base);
1312 return expr_invariant_in_loop_p (loop, off);
1315 /* If base is array, first check whether we will be able to move the
1316 reference out of the loop (in order to take its address in strength
1317 reduction). In order for this to work we need both lower bound
1318 and step to be loop invariants. */
1319 if (TREE_CODE (base) == ARRAY_REF)
1321 step = array_ref_element_size (base);
1322 lbound = array_ref_low_bound (base);
1324 if (!expr_invariant_in_loop_p (loop, step)
1325 || !expr_invariant_in_loop_p (loop, lbound))
1326 return false;
1329 if (TREE_CODE (*idx) != SSA_NAME)
1330 return true;
1332 iv = get_iv (dta->ivopts_data, *idx);
1333 if (!iv)
1334 return false;
1336 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1337 *&x[0], which is not folded and does not trigger the
1338 ARRAY_REF path below. */
1339 *idx = iv->base;
1341 if (integer_zerop (iv->step))
1342 return true;
1344 if (TREE_CODE (base) == ARRAY_REF)
1346 step = array_ref_element_size (base);
1348 /* We only handle addresses whose step is an integer constant. */
1349 if (TREE_CODE (step) != INTEGER_CST)
1350 return false;
1352 else
1353 /* The step for pointer arithmetics already is 1 byte. */
1354 step = build_int_cst (sizetype, 1);
1356 iv_base = iv->base;
1357 iv_step = iv->step;
1358 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1359 sizetype, &iv_base, &iv_step, dta->stmt,
1360 false))
1362 /* The index might wrap. */
1363 return false;
1366 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1367 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1369 return true;
1372 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1373 object is passed to it in DATA. */
1375 static bool
1376 idx_record_use (tree base, tree *idx,
1377 void *data)
1379 find_interesting_uses_op (data, *idx);
1380 if (TREE_CODE (base) == ARRAY_REF)
1382 find_interesting_uses_op (data, array_ref_element_size (base));
1383 find_interesting_uses_op (data, array_ref_low_bound (base));
1385 return true;
1388 /* Returns true if memory reference REF may be unaligned. */
1390 static bool
1391 may_be_unaligned_p (tree ref)
1393 tree base;
1394 tree base_type;
1395 HOST_WIDE_INT bitsize;
1396 HOST_WIDE_INT bitpos;
1397 tree toffset;
1398 enum machine_mode mode;
1399 int unsignedp, volatilep;
1400 unsigned base_align;
1402 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1403 thus they are not misaligned. */
1404 if (TREE_CODE (ref) == TARGET_MEM_REF)
1405 return false;
1407 /* The test below is basically copy of what expr.c:normal_inner_ref
1408 does to check whether the object must be loaded by parts when
1409 STRICT_ALIGNMENT is true. */
1410 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1411 &unsignedp, &volatilep, true);
1412 base_type = TREE_TYPE (base);
1413 base_align = TYPE_ALIGN (base_type);
1415 if (mode != BLKmode
1416 && (base_align < GET_MODE_ALIGNMENT (mode)
1417 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1418 || bitpos % BITS_PER_UNIT != 0))
1419 return true;
1421 return false;
1424 /* Return true if EXPR may be non-addressable. */
1426 static bool
1427 may_be_nonaddressable_p (tree expr)
1429 switch (TREE_CODE (expr))
1431 case COMPONENT_REF:
1432 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1433 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1435 case ARRAY_REF:
1436 case ARRAY_RANGE_REF:
1437 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1439 case VIEW_CONVERT_EXPR:
1440 /* This kind of view-conversions may wrap non-addressable objects
1441 and make them look addressable. After some processing the
1442 non-addressability may be uncovered again, causing ADDR_EXPRs
1443 of inappropriate objects to be built. */
1444 return AGGREGATE_TYPE_P (TREE_TYPE (expr))
1445 && !AGGREGATE_TYPE_P (TREE_TYPE (TREE_OPERAND (expr, 0)));
1447 default:
1448 break;
1451 return false;
1454 /* Finds addresses in *OP_P inside STMT. */
1456 static void
1457 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1459 tree base = *op_p, step = build_int_cst (sizetype, 0);
1460 struct iv *civ;
1461 struct ifs_ivopts_data ifs_ivopts_data;
1463 /* Do not play with volatile memory references. A bit too conservative,
1464 perhaps, but safe. */
1465 if (stmt_ann (stmt)->has_volatile_ops)
1466 goto fail;
1468 /* Ignore bitfields for now. Not really something terribly complicated
1469 to handle. TODO. */
1470 if (TREE_CODE (base) == BIT_FIELD_REF)
1471 goto fail;
1473 if (may_be_nonaddressable_p (base))
1474 goto fail;
1476 if (STRICT_ALIGNMENT
1477 && may_be_unaligned_p (base))
1478 goto fail;
1480 base = unshare_expr (base);
1482 if (TREE_CODE (base) == TARGET_MEM_REF)
1484 tree type = build_pointer_type (TREE_TYPE (base));
1485 tree astep;
1487 if (TMR_BASE (base)
1488 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1490 civ = get_iv (data, TMR_BASE (base));
1491 if (!civ)
1492 goto fail;
1494 TMR_BASE (base) = civ->base;
1495 step = civ->step;
1497 if (TMR_INDEX (base)
1498 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1500 civ = get_iv (data, TMR_INDEX (base));
1501 if (!civ)
1502 goto fail;
1504 TMR_INDEX (base) = civ->base;
1505 astep = civ->step;
1507 if (astep)
1509 if (TMR_STEP (base))
1510 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1512 step = fold_build2 (PLUS_EXPR, type, step, astep);
1516 if (integer_zerop (step))
1517 goto fail;
1518 base = tree_mem_ref_addr (type, base);
1520 else
1522 ifs_ivopts_data.ivopts_data = data;
1523 ifs_ivopts_data.stmt = stmt;
1524 ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1525 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1526 || integer_zerop (ifs_ivopts_data.step))
1527 goto fail;
1528 step = ifs_ivopts_data.step;
1530 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1531 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1533 base = build_fold_addr_expr (base);
1535 /* Substituting bases of IVs into the base expression might
1536 have caused folding opportunities. */
1537 if (TREE_CODE (base) == ADDR_EXPR)
1539 tree *ref = &TREE_OPERAND (base, 0);
1540 while (handled_component_p (*ref))
1541 ref = &TREE_OPERAND (*ref, 0);
1542 if (TREE_CODE (*ref) == INDIRECT_REF)
1543 *ref = fold_indirect_ref (*ref);
1547 civ = alloc_iv (base, step);
1548 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1549 return;
1551 fail:
1552 for_each_index (op_p, idx_record_use, data);
1555 /* Finds and records invariants used in STMT. */
1557 static void
1558 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1560 ssa_op_iter iter;
1561 use_operand_p use_p;
1562 tree op;
1564 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1566 op = USE_FROM_PTR (use_p);
1567 record_invariant (data, op, false);
1571 /* Finds interesting uses of induction variables in the statement STMT. */
1573 static void
1574 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1576 struct iv *iv;
1577 tree op, lhs, rhs;
1578 ssa_op_iter iter;
1579 use_operand_p use_p;
1581 find_invariants_stmt (data, stmt);
1583 if (TREE_CODE (stmt) == COND_EXPR)
1585 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1586 return;
1589 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
1591 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
1592 rhs = GIMPLE_STMT_OPERAND (stmt, 1);
1594 if (TREE_CODE (lhs) == SSA_NAME)
1596 /* If the statement defines an induction variable, the uses are not
1597 interesting by themselves. */
1599 iv = get_iv (data, lhs);
1601 if (iv && !integer_zerop (iv->step))
1602 return;
1605 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1607 case tcc_comparison:
1608 find_interesting_uses_cond (data, stmt,
1609 &GIMPLE_STMT_OPERAND (stmt, 1));
1610 return;
1612 case tcc_reference:
1613 find_interesting_uses_address (data, stmt,
1614 &GIMPLE_STMT_OPERAND (stmt, 1));
1615 if (REFERENCE_CLASS_P (lhs))
1616 find_interesting_uses_address (data, stmt,
1617 &GIMPLE_STMT_OPERAND (stmt, 0));
1618 return;
1620 default: ;
1623 if (REFERENCE_CLASS_P (lhs)
1624 && is_gimple_val (rhs))
1626 find_interesting_uses_address (data, stmt,
1627 &GIMPLE_STMT_OPERAND (stmt, 0));
1628 find_interesting_uses_op (data, rhs);
1629 return;
1632 /* TODO -- we should also handle address uses of type
1634 memory = call (whatever);
1638 call (memory). */
1641 if (TREE_CODE (stmt) == PHI_NODE
1642 && bb_for_stmt (stmt) == data->current_loop->header)
1644 lhs = PHI_RESULT (stmt);
1645 iv = get_iv (data, lhs);
1647 if (iv && !integer_zerop (iv->step))
1648 return;
1651 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1653 op = USE_FROM_PTR (use_p);
1655 if (TREE_CODE (op) != SSA_NAME)
1656 continue;
1658 iv = get_iv (data, op);
1659 if (!iv)
1660 continue;
1662 find_interesting_uses_op (data, op);
1666 /* Finds interesting uses of induction variables outside of loops
1667 on loop exit edge EXIT. */
1669 static void
1670 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1672 tree phi, def;
1674 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1676 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1677 if (is_gimple_reg (def))
1678 find_interesting_uses_op (data, def);
1682 /* Finds uses of the induction variables that are interesting. */
1684 static void
1685 find_interesting_uses (struct ivopts_data *data)
1687 basic_block bb;
1688 block_stmt_iterator bsi;
1689 tree phi;
1690 basic_block *body = get_loop_body (data->current_loop);
1691 unsigned i;
1692 struct version_info *info;
1693 edge e;
1695 if (dump_file && (dump_flags & TDF_DETAILS))
1696 fprintf (dump_file, "Uses:\n\n");
1698 for (i = 0; i < data->current_loop->num_nodes; i++)
1700 edge_iterator ei;
1701 bb = body[i];
1703 FOR_EACH_EDGE (e, ei, bb->succs)
1704 if (e->dest != EXIT_BLOCK_PTR
1705 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1706 find_interesting_uses_outside (data, e);
1708 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1709 find_interesting_uses_stmt (data, phi);
1710 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1711 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1714 if (dump_file && (dump_flags & TDF_DETAILS))
1716 bitmap_iterator bi;
1718 fprintf (dump_file, "\n");
1720 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1722 info = ver_info (data, i);
1723 if (info->inv_id)
1725 fprintf (dump_file, " ");
1726 print_generic_expr (dump_file, info->name, TDF_SLIM);
1727 fprintf (dump_file, " is invariant (%d)%s\n",
1728 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1732 fprintf (dump_file, "\n");
1735 free (body);
1738 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1739 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1740 we are at the top-level of the processed address. */
1742 static tree
1743 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1744 unsigned HOST_WIDE_INT *offset)
1746 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1747 enum tree_code code;
1748 tree type, orig_type = TREE_TYPE (expr);
1749 unsigned HOST_WIDE_INT off0, off1, st;
1750 tree orig_expr = expr;
1752 STRIP_NOPS (expr);
1754 type = TREE_TYPE (expr);
1755 code = TREE_CODE (expr);
1756 *offset = 0;
1758 switch (code)
1760 case INTEGER_CST:
1761 if (!cst_and_fits_in_hwi (expr)
1762 || integer_zerop (expr))
1763 return orig_expr;
1765 *offset = int_cst_value (expr);
1766 return build_int_cst (orig_type, 0);
1768 case PLUS_EXPR:
1769 case MINUS_EXPR:
1770 op0 = TREE_OPERAND (expr, 0);
1771 op1 = TREE_OPERAND (expr, 1);
1773 op0 = strip_offset_1 (op0, false, false, &off0);
1774 op1 = strip_offset_1 (op1, false, false, &off1);
1776 *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1777 if (op0 == TREE_OPERAND (expr, 0)
1778 && op1 == TREE_OPERAND (expr, 1))
1779 return orig_expr;
1781 if (integer_zerop (op1))
1782 expr = op0;
1783 else if (integer_zerop (op0))
1785 if (code == PLUS_EXPR)
1786 expr = op1;
1787 else
1788 expr = fold_build1 (NEGATE_EXPR, type, op1);
1790 else
1791 expr = fold_build2 (code, type, op0, op1);
1793 return fold_convert (orig_type, expr);
1795 case ARRAY_REF:
1796 if (!inside_addr)
1797 return orig_expr;
1799 step = array_ref_element_size (expr);
1800 if (!cst_and_fits_in_hwi (step))
1801 break;
1803 st = int_cst_value (step);
1804 op1 = TREE_OPERAND (expr, 1);
1805 op1 = strip_offset_1 (op1, false, false, &off1);
1806 *offset = off1 * st;
1808 if (top_compref
1809 && integer_zerop (op1))
1811 /* Strip the component reference completely. */
1812 op0 = TREE_OPERAND (expr, 0);
1813 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1814 *offset += off0;
1815 return op0;
1817 break;
1819 case COMPONENT_REF:
1820 if (!inside_addr)
1821 return orig_expr;
1823 tmp = component_ref_field_offset (expr);
1824 if (top_compref
1825 && cst_and_fits_in_hwi (tmp))
1827 /* Strip the component reference completely. */
1828 op0 = TREE_OPERAND (expr, 0);
1829 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1830 *offset = off0 + int_cst_value (tmp);
1831 return op0;
1833 break;
1835 case ADDR_EXPR:
1836 op0 = TREE_OPERAND (expr, 0);
1837 op0 = strip_offset_1 (op0, true, true, &off0);
1838 *offset += off0;
1840 if (op0 == TREE_OPERAND (expr, 0))
1841 return orig_expr;
1843 expr = build_fold_addr_expr (op0);
1844 return fold_convert (orig_type, expr);
1846 case INDIRECT_REF:
1847 inside_addr = false;
1848 break;
1850 default:
1851 return orig_expr;
1854 /* Default handling of expressions for that we want to recurse into
1855 the first operand. */
1856 op0 = TREE_OPERAND (expr, 0);
1857 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1858 *offset += off0;
1860 if (op0 == TREE_OPERAND (expr, 0)
1861 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1862 return orig_expr;
1864 expr = copy_node (expr);
1865 TREE_OPERAND (expr, 0) = op0;
1866 if (op1)
1867 TREE_OPERAND (expr, 1) = op1;
1869 /* Inside address, we might strip the top level component references,
1870 thus changing type of the expression. Handling of ADDR_EXPR
1871 will fix that. */
1872 expr = fold_convert (orig_type, expr);
1874 return expr;
1877 /* Strips constant offsets from EXPR and stores them to OFFSET. */
1879 static tree
1880 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1882 return strip_offset_1 (expr, false, false, offset);
1885 /* Returns variant of TYPE that can be used as base for different uses.
1886 We return unsigned type with the same precision, which avoids problems
1887 with overflows. */
1889 static tree
1890 generic_type_for (tree type)
1892 if (POINTER_TYPE_P (type))
1893 return unsigned_type_for (type);
1895 if (TYPE_UNSIGNED (type))
1896 return type;
1898 return unsigned_type_for (type);
1901 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
1902 the bitmap to that we should store it. */
1904 static struct ivopts_data *fd_ivopts_data;
1905 static tree
1906 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1908 bitmap *depends_on = data;
1909 struct version_info *info;
1911 if (TREE_CODE (*expr_p) != SSA_NAME)
1912 return NULL_TREE;
1913 info = name_info (fd_ivopts_data, *expr_p);
1915 if (!info->inv_id || info->has_nonlin_use)
1916 return NULL_TREE;
1918 if (!*depends_on)
1919 *depends_on = BITMAP_ALLOC (NULL);
1920 bitmap_set_bit (*depends_on, info->inv_id);
1922 return NULL_TREE;
1925 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1926 position to POS. If USE is not NULL, the candidate is set as related to
1927 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1928 replacement of the final value of the iv by a direct computation. */
1930 static struct iv_cand *
1931 add_candidate_1 (struct ivopts_data *data,
1932 tree base, tree step, bool important, enum iv_position pos,
1933 struct iv_use *use, tree incremented_at)
1935 unsigned i;
1936 struct iv_cand *cand = NULL;
1937 tree type, orig_type;
1939 if (base)
1941 orig_type = TREE_TYPE (base);
1942 type = generic_type_for (orig_type);
1943 if (type != orig_type)
1945 base = fold_convert (type, base);
1946 step = fold_convert (type, step);
1950 for (i = 0; i < n_iv_cands (data); i++)
1952 cand = iv_cand (data, i);
1954 if (cand->pos != pos)
1955 continue;
1957 if (cand->incremented_at != incremented_at)
1958 continue;
1960 if (!cand->iv)
1962 if (!base && !step)
1963 break;
1965 continue;
1968 if (!base && !step)
1969 continue;
1971 if (operand_equal_p (base, cand->iv->base, 0)
1972 && operand_equal_p (step, cand->iv->step, 0))
1973 break;
1976 if (i == n_iv_cands (data))
1978 cand = XCNEW (struct iv_cand);
1979 cand->id = i;
1981 if (!base && !step)
1982 cand->iv = NULL;
1983 else
1984 cand->iv = alloc_iv (base, step);
1986 cand->pos = pos;
1987 if (pos != IP_ORIGINAL && cand->iv)
1989 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
1990 cand->var_after = cand->var_before;
1992 cand->important = important;
1993 cand->incremented_at = incremented_at;
1994 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
1996 if (step
1997 && TREE_CODE (step) != INTEGER_CST)
1999 fd_ivopts_data = data;
2000 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2003 if (dump_file && (dump_flags & TDF_DETAILS))
2004 dump_cand (dump_file, cand);
2007 if (important && !cand->important)
2009 cand->important = true;
2010 if (dump_file && (dump_flags & TDF_DETAILS))
2011 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2014 if (use)
2016 bitmap_set_bit (use->related_cands, i);
2017 if (dump_file && (dump_flags & TDF_DETAILS))
2018 fprintf (dump_file, "Candidate %d is related to use %d\n",
2019 cand->id, use->id);
2022 return cand;
2025 /* Returns true if incrementing the induction variable at the end of the LOOP
2026 is allowed.
2028 The purpose is to avoid splitting latch edge with a biv increment, thus
2029 creating a jump, possibly confusing other optimization passes and leaving
2030 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2031 is not available (so we do not have a better alternative), or if the latch
2032 edge is already nonempty. */
2034 static bool
2035 allow_ip_end_pos_p (struct loop *loop)
2037 if (!ip_normal_pos (loop))
2038 return true;
2040 if (!empty_block_p (ip_end_pos (loop)))
2041 return true;
2043 return false;
2046 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2047 position to POS. If USE is not NULL, the candidate is set as related to
2048 it. The candidate computation is scheduled on all available positions. */
2050 static void
2051 add_candidate (struct ivopts_data *data,
2052 tree base, tree step, bool important, struct iv_use *use)
2054 if (ip_normal_pos (data->current_loop))
2055 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2056 if (ip_end_pos (data->current_loop)
2057 && allow_ip_end_pos_p (data->current_loop))
2058 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2061 /* Add a standard "0 + 1 * iteration" iv candidate for a
2062 type with SIZE bits. */
2064 static void
2065 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2066 unsigned int size)
2068 tree type = lang_hooks.types.type_for_size (size, true);
2069 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2070 true, NULL);
2073 /* Adds standard iv candidates. */
2075 static void
2076 add_standard_iv_candidates (struct ivopts_data *data)
2078 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2080 /* The same for a double-integer type if it is still fast enough. */
2081 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2082 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2086 /* Adds candidates bases on the old induction variable IV. */
2088 static void
2089 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2091 tree phi, def;
2092 struct iv_cand *cand;
2094 add_candidate (data, iv->base, iv->step, true, NULL);
2096 /* The same, but with initial value zero. */
2097 add_candidate (data,
2098 build_int_cst (TREE_TYPE (iv->base), 0),
2099 iv->step, true, NULL);
2101 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2102 if (TREE_CODE (phi) == PHI_NODE)
2104 /* Additionally record the possibility of leaving the original iv
2105 untouched. */
2106 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2107 cand = add_candidate_1 (data,
2108 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2109 SSA_NAME_DEF_STMT (def));
2110 cand->var_before = iv->ssa_name;
2111 cand->var_after = def;
2115 /* Adds candidates based on the old induction variables. */
2117 static void
2118 add_old_ivs_candidates (struct ivopts_data *data)
2120 unsigned i;
2121 struct iv *iv;
2122 bitmap_iterator bi;
2124 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2126 iv = ver_info (data, i)->iv;
2127 if (iv && iv->biv_p && !integer_zerop (iv->step))
2128 add_old_iv_candidates (data, iv);
2132 /* Adds candidates based on the value of the induction variable IV and USE. */
2134 static void
2135 add_iv_value_candidates (struct ivopts_data *data,
2136 struct iv *iv, struct iv_use *use)
2138 unsigned HOST_WIDE_INT offset;
2139 tree base;
2141 add_candidate (data, iv->base, iv->step, false, use);
2143 /* The same, but with initial value zero. Make such variable important,
2144 since it is generic enough so that possibly many uses may be based
2145 on it. */
2146 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2147 iv->step, true, use);
2149 /* Third, try removing the constant offset. */
2150 base = strip_offset (iv->base, &offset);
2151 if (offset)
2152 add_candidate (data, base, iv->step, false, use);
2155 /* Adds candidates based on the uses. */
2157 static void
2158 add_derived_ivs_candidates (struct ivopts_data *data)
2160 unsigned i;
2162 for (i = 0; i < n_iv_uses (data); i++)
2164 struct iv_use *use = iv_use (data, i);
2166 if (!use)
2167 continue;
2169 switch (use->type)
2171 case USE_NONLINEAR_EXPR:
2172 case USE_COMPARE:
2173 case USE_ADDRESS:
2174 /* Just add the ivs based on the value of the iv used here. */
2175 add_iv_value_candidates (data, use->iv, use);
2176 break;
2178 default:
2179 gcc_unreachable ();
2184 /* Record important candidates and add them to related_cands bitmaps
2185 if needed. */
2187 static void
2188 record_important_candidates (struct ivopts_data *data)
2190 unsigned i;
2191 struct iv_use *use;
2193 for (i = 0; i < n_iv_cands (data); i++)
2195 struct iv_cand *cand = iv_cand (data, i);
2197 if (cand->important)
2198 bitmap_set_bit (data->important_candidates, i);
2201 data->consider_all_candidates = (n_iv_cands (data)
2202 <= CONSIDER_ALL_CANDIDATES_BOUND);
2204 if (data->consider_all_candidates)
2206 /* We will not need "related_cands" bitmaps in this case,
2207 so release them to decrease peak memory consumption. */
2208 for (i = 0; i < n_iv_uses (data); i++)
2210 use = iv_use (data, i);
2211 BITMAP_FREE (use->related_cands);
2214 else
2216 /* Add important candidates to the related_cands bitmaps. */
2217 for (i = 0; i < n_iv_uses (data); i++)
2218 bitmap_ior_into (iv_use (data, i)->related_cands,
2219 data->important_candidates);
2223 /* Finds the candidates for the induction variables. */
2225 static void
2226 find_iv_candidates (struct ivopts_data *data)
2228 /* Add commonly used ivs. */
2229 add_standard_iv_candidates (data);
2231 /* Add old induction variables. */
2232 add_old_ivs_candidates (data);
2234 /* Add induction variables derived from uses. */
2235 add_derived_ivs_candidates (data);
2237 /* Record the important candidates. */
2238 record_important_candidates (data);
2241 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2242 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2243 we allocate a simple list to every use. */
2245 static void
2246 alloc_use_cost_map (struct ivopts_data *data)
2248 unsigned i, size, s, j;
2250 for (i = 0; i < n_iv_uses (data); i++)
2252 struct iv_use *use = iv_use (data, i);
2253 bitmap_iterator bi;
2255 if (data->consider_all_candidates)
2256 size = n_iv_cands (data);
2257 else
2259 s = 0;
2260 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2262 s++;
2265 /* Round up to the power of two, so that moduling by it is fast. */
2266 for (size = 1; size < s; size <<= 1)
2267 continue;
2270 use->n_map_members = size;
2271 use->cost_map = XCNEWVEC (struct cost_pair, size);
2275 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2276 on invariants DEPENDS_ON and that the value used in expressing it
2277 is VALUE.*/
2279 static void
2280 set_use_iv_cost (struct ivopts_data *data,
2281 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2282 bitmap depends_on, tree value)
2284 unsigned i, s;
2286 if (cost == INFTY)
2288 BITMAP_FREE (depends_on);
2289 return;
2292 if (data->consider_all_candidates)
2294 use->cost_map[cand->id].cand = cand;
2295 use->cost_map[cand->id].cost = cost;
2296 use->cost_map[cand->id].depends_on = depends_on;
2297 use->cost_map[cand->id].value = value;
2298 return;
2301 /* n_map_members is a power of two, so this computes modulo. */
2302 s = cand->id & (use->n_map_members - 1);
2303 for (i = s; i < use->n_map_members; i++)
2304 if (!use->cost_map[i].cand)
2305 goto found;
2306 for (i = 0; i < s; i++)
2307 if (!use->cost_map[i].cand)
2308 goto found;
2310 gcc_unreachable ();
2312 found:
2313 use->cost_map[i].cand = cand;
2314 use->cost_map[i].cost = cost;
2315 use->cost_map[i].depends_on = depends_on;
2316 use->cost_map[i].value = value;
2319 /* Gets cost of (USE, CANDIDATE) pair. */
2321 static struct cost_pair *
2322 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2323 struct iv_cand *cand)
2325 unsigned i, s;
2326 struct cost_pair *ret;
2328 if (!cand)
2329 return NULL;
2331 if (data->consider_all_candidates)
2333 ret = use->cost_map + cand->id;
2334 if (!ret->cand)
2335 return NULL;
2337 return ret;
2340 /* n_map_members is a power of two, so this computes modulo. */
2341 s = cand->id & (use->n_map_members - 1);
2342 for (i = s; i < use->n_map_members; i++)
2343 if (use->cost_map[i].cand == cand)
2344 return use->cost_map + i;
2346 for (i = 0; i < s; i++)
2347 if (use->cost_map[i].cand == cand)
2348 return use->cost_map + i;
2350 return NULL;
2353 /* Returns estimate on cost of computing SEQ. */
2355 static unsigned
2356 seq_cost (rtx seq)
2358 unsigned cost = 0;
2359 rtx set;
2361 for (; seq; seq = NEXT_INSN (seq))
2363 set = single_set (seq);
2364 if (set)
2365 cost += rtx_cost (set, SET);
2366 else
2367 cost++;
2370 return cost;
2373 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2374 static rtx
2375 produce_memory_decl_rtl (tree obj, int *regno)
2377 rtx x;
2379 gcc_assert (obj);
2380 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2382 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2383 x = gen_rtx_SYMBOL_REF (Pmode, name);
2384 SET_SYMBOL_REF_DECL (x, obj);
2385 x = gen_rtx_MEM (DECL_MODE (obj), x);
2386 targetm.encode_section_info (obj, x, true);
2388 else
2390 x = gen_raw_REG (Pmode, (*regno)++);
2391 x = gen_rtx_MEM (DECL_MODE (obj), x);
2394 return x;
2397 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2398 walk_tree. DATA contains the actual fake register number. */
2400 static tree
2401 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2403 tree obj = NULL_TREE;
2404 rtx x = NULL_RTX;
2405 int *regno = data;
2407 switch (TREE_CODE (*expr_p))
2409 case ADDR_EXPR:
2410 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2411 handled_component_p (*expr_p);
2412 expr_p = &TREE_OPERAND (*expr_p, 0))
2413 continue;
2414 obj = *expr_p;
2415 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2416 x = produce_memory_decl_rtl (obj, regno);
2417 break;
2419 case SSA_NAME:
2420 *ws = 0;
2421 obj = SSA_NAME_VAR (*expr_p);
2422 if (!DECL_RTL_SET_P (obj))
2423 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2424 break;
2426 case VAR_DECL:
2427 case PARM_DECL:
2428 case RESULT_DECL:
2429 *ws = 0;
2430 obj = *expr_p;
2432 if (DECL_RTL_SET_P (obj))
2433 break;
2435 if (DECL_MODE (obj) == BLKmode)
2436 x = produce_memory_decl_rtl (obj, regno);
2437 else
2438 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2440 break;
2442 default:
2443 break;
2446 if (x)
2448 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2449 SET_DECL_RTL (obj, x);
2452 return NULL_TREE;
2455 /* Determines cost of the computation of EXPR. */
2457 static unsigned
2458 computation_cost (tree expr)
2460 rtx seq, rslt;
2461 tree type = TREE_TYPE (expr);
2462 unsigned cost;
2463 /* Avoid using hard regs in ways which may be unsupported. */
2464 int regno = LAST_VIRTUAL_REGISTER + 1;
2466 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2467 start_sequence ();
2468 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2469 seq = get_insns ();
2470 end_sequence ();
2472 cost = seq_cost (seq);
2473 if (MEM_P (rslt))
2474 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2476 return cost;
2479 /* Returns variable containing the value of candidate CAND at statement AT. */
2481 static tree
2482 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2484 if (stmt_after_increment (loop, cand, stmt))
2485 return cand->var_after;
2486 else
2487 return cand->var_before;
2490 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2491 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2494 tree_int_cst_sign_bit (tree t)
2496 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2497 unsigned HOST_WIDE_INT w;
2499 if (bitno < HOST_BITS_PER_WIDE_INT)
2500 w = TREE_INT_CST_LOW (t);
2501 else
2503 w = TREE_INT_CST_HIGH (t);
2504 bitno -= HOST_BITS_PER_WIDE_INT;
2507 return (w >> bitno) & 1;
2510 /* If we can prove that TOP = cst * BOT for some constant cst,
2511 store cst to MUL and return true. Otherwise return false.
2512 The returned value is always sign-extended, regardless of the
2513 signedness of TOP and BOT. */
2515 static bool
2516 constant_multiple_of (tree top, tree bot, double_int *mul)
2518 tree mby;
2519 enum tree_code code;
2520 double_int res, p0, p1;
2521 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2523 STRIP_NOPS (top);
2524 STRIP_NOPS (bot);
2526 if (operand_equal_p (top, bot, 0))
2528 *mul = double_int_one;
2529 return true;
2532 code = TREE_CODE (top);
2533 switch (code)
2535 case MULT_EXPR:
2536 mby = TREE_OPERAND (top, 1);
2537 if (TREE_CODE (mby) != INTEGER_CST)
2538 return false;
2540 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2541 return false;
2543 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
2544 precision);
2545 return true;
2547 case PLUS_EXPR:
2548 case MINUS_EXPR:
2549 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2550 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2551 return false;
2553 if (code == MINUS_EXPR)
2554 p1 = double_int_neg (p1);
2555 *mul = double_int_sext (double_int_add (p0, p1), precision);
2556 return true;
2558 case INTEGER_CST:
2559 if (TREE_CODE (bot) != INTEGER_CST)
2560 return false;
2562 p0 = double_int_sext (tree_to_double_int (top), precision);
2563 p1 = double_int_sext (tree_to_double_int (bot), precision);
2564 if (double_int_zero_p (p1))
2565 return false;
2566 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
2567 precision);
2568 return double_int_zero_p (res);
2570 default:
2571 return false;
2575 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2576 same precision that is at least as wide as the precision of TYPE, stores
2577 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2578 type of A and B. */
2580 static tree
2581 determine_common_wider_type (tree *a, tree *b)
2583 tree wider_type = NULL;
2584 tree suba, subb;
2585 tree atype = TREE_TYPE (*a);
2587 if ((TREE_CODE (*a) == NOP_EXPR
2588 || TREE_CODE (*a) == CONVERT_EXPR))
2590 suba = TREE_OPERAND (*a, 0);
2591 wider_type = TREE_TYPE (suba);
2592 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2593 return atype;
2595 else
2596 return atype;
2598 if ((TREE_CODE (*b) == NOP_EXPR
2599 || TREE_CODE (*b) == CONVERT_EXPR))
2601 subb = TREE_OPERAND (*b, 0);
2602 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2603 return atype;
2605 else
2606 return atype;
2608 *a = suba;
2609 *b = subb;
2610 return wider_type;
2613 /* Determines the expression by that USE is expressed from induction variable
2614 CAND at statement AT in LOOP. The expression is stored in a decomposed
2615 form into AFF. Returns false if USE cannot be expressed using CAND. */
2617 static bool
2618 get_computation_aff (struct loop *loop,
2619 struct iv_use *use, struct iv_cand *cand, tree at,
2620 struct affine_tree_combination *aff)
2622 tree ubase = use->iv->base;
2623 tree ustep = use->iv->step;
2624 tree cbase = cand->iv->base;
2625 tree cstep = cand->iv->step, cstep_common;
2626 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2627 tree common_type, var;
2628 tree uutype;
2629 aff_tree cbase_aff, var_aff;
2630 double_int rat;
2632 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2634 /* We do not have a precision to express the values of use. */
2635 return false;
2638 var = var_at_stmt (loop, cand, at);
2639 uutype = unsigned_type_for (utype);
2641 /* If the conversion is not noop, perform it. */
2642 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2644 cstep = fold_convert (uutype, cstep);
2645 cbase = fold_convert (uutype, cbase);
2646 var = fold_convert (uutype, var);
2649 if (!constant_multiple_of (ustep, cstep, &rat))
2650 return false;
2652 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2653 type, we achieve better folding by computing their difference in this
2654 wider type, and cast the result to UUTYPE. We do not need to worry about
2655 overflows, as all the arithmetics will in the end be performed in UUTYPE
2656 anyway. */
2657 common_type = determine_common_wider_type (&ubase, &cbase);
2659 /* use = ubase - ratio * cbase + ratio * var. */
2660 tree_to_aff_combination (ubase, common_type, aff);
2661 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2662 tree_to_aff_combination (var, uutype, &var_aff);
2664 /* We need to shift the value if we are after the increment. */
2665 if (stmt_after_increment (loop, cand, at))
2667 aff_tree cstep_aff;
2669 if (common_type != uutype)
2670 cstep_common = fold_convert (common_type, cstep);
2671 else
2672 cstep_common = cstep;
2674 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2675 aff_combination_add (&cbase_aff, &cstep_aff);
2678 aff_combination_scale (&cbase_aff, double_int_neg (rat));
2679 aff_combination_add (aff, &cbase_aff);
2680 if (common_type != uutype)
2681 aff_combination_convert (aff, uutype);
2683 aff_combination_scale (&var_aff, rat);
2684 aff_combination_add (aff, &var_aff);
2686 return true;
2689 /* Determines the expression by that USE is expressed from induction variable
2690 CAND at statement AT in LOOP. The computation is unshared. */
2692 static tree
2693 get_computation_at (struct loop *loop,
2694 struct iv_use *use, struct iv_cand *cand, tree at)
2696 aff_tree aff;
2697 tree type = TREE_TYPE (use->iv->base);
2699 if (!get_computation_aff (loop, use, cand, at, &aff))
2700 return NULL_TREE;
2701 unshare_aff_combination (&aff);
2702 return fold_convert (type, aff_combination_to_tree (&aff));
2705 /* Determines the expression by that USE is expressed from induction variable
2706 CAND in LOOP. The computation is unshared. */
2708 static tree
2709 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2711 return get_computation_at (loop, use, cand, use->stmt);
2714 /* Returns cost of addition in MODE. */
2716 static unsigned
2717 add_cost (enum machine_mode mode)
2719 static unsigned costs[NUM_MACHINE_MODES];
2720 rtx seq;
2721 unsigned cost;
2723 if (costs[mode])
2724 return costs[mode];
2726 start_sequence ();
2727 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2728 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2729 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2730 NULL_RTX);
2731 seq = get_insns ();
2732 end_sequence ();
2734 cost = seq_cost (seq);
2735 if (!cost)
2736 cost = 1;
2738 costs[mode] = cost;
2740 if (dump_file && (dump_flags & TDF_DETAILS))
2741 fprintf (dump_file, "Addition in %s costs %d\n",
2742 GET_MODE_NAME (mode), cost);
2743 return cost;
2746 /* Entry in a hashtable of already known costs for multiplication. */
2747 struct mbc_entry
2749 HOST_WIDE_INT cst; /* The constant to multiply by. */
2750 enum machine_mode mode; /* In mode. */
2751 unsigned cost; /* The cost. */
2754 /* Counts hash value for the ENTRY. */
2756 static hashval_t
2757 mbc_entry_hash (const void *entry)
2759 const struct mbc_entry *e = entry;
2761 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2764 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2766 static int
2767 mbc_entry_eq (const void *entry1, const void *entry2)
2769 const struct mbc_entry *e1 = entry1;
2770 const struct mbc_entry *e2 = entry2;
2772 return (e1->mode == e2->mode
2773 && e1->cst == e2->cst);
2776 /* Returns cost of multiplication by constant CST in MODE. */
2778 unsigned
2779 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2781 static htab_t costs;
2782 struct mbc_entry **cached, act;
2783 rtx seq;
2784 unsigned cost;
2786 if (!costs)
2787 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2789 act.mode = mode;
2790 act.cst = cst;
2791 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2792 if (*cached)
2793 return (*cached)->cost;
2795 *cached = XNEW (struct mbc_entry);
2796 (*cached)->mode = mode;
2797 (*cached)->cst = cst;
2799 start_sequence ();
2800 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2801 gen_int_mode (cst, mode), NULL_RTX, 0);
2802 seq = get_insns ();
2803 end_sequence ();
2805 cost = seq_cost (seq);
2807 if (dump_file && (dump_flags & TDF_DETAILS))
2808 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2809 (int) cst, GET_MODE_NAME (mode), cost);
2811 (*cached)->cost = cost;
2813 return cost;
2816 /* Returns true if multiplying by RATIO is allowed in an address. Test the
2817 validity for a memory reference accessing memory of mode MODE. */
2819 bool
2820 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
2822 #define MAX_RATIO 128
2823 static sbitmap valid_mult[MAX_MACHINE_MODE];
2825 if (!valid_mult[mode])
2827 rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2828 rtx addr;
2829 HOST_WIDE_INT i;
2831 valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
2832 sbitmap_zero (valid_mult[mode]);
2833 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2834 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2836 XEXP (addr, 1) = gen_int_mode (i, Pmode);
2837 if (memory_address_p (mode, addr))
2838 SET_BIT (valid_mult[mode], i + MAX_RATIO);
2841 if (dump_file && (dump_flags & TDF_DETAILS))
2843 fprintf (dump_file, " allowed multipliers:");
2844 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2845 if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
2846 fprintf (dump_file, " %d", (int) i);
2847 fprintf (dump_file, "\n");
2848 fprintf (dump_file, "\n");
2852 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
2853 return false;
2855 return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
2858 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2859 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2860 variable is omitted. Compute the cost for a memory reference that accesses
2861 a memory location of mode MEM_MODE.
2863 TODO -- there must be some better way. This all is quite crude. */
2865 static unsigned
2866 get_address_cost (bool symbol_present, bool var_present,
2867 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
2868 enum machine_mode mem_mode)
2870 static bool initialized[MAX_MACHINE_MODE];
2871 static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
2872 static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
2873 static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
2874 unsigned cost, acost;
2875 bool offset_p, ratio_p;
2876 HOST_WIDE_INT s_offset;
2877 unsigned HOST_WIDE_INT mask;
2878 unsigned bits;
2880 if (!initialized[mem_mode])
2882 HOST_WIDE_INT i;
2883 HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
2884 int old_cse_not_expected;
2885 unsigned sym_p, var_p, off_p, rat_p, add_c;
2886 rtx seq, addr, base;
2887 rtx reg0, reg1;
2889 initialized[mem_mode] = true;
2891 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2893 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2894 for (i = start; i <= 1 << 20; i <<= 1)
2896 XEXP (addr, 1) = gen_int_mode (i, Pmode);
2897 if (!memory_address_p (mem_mode, addr))
2898 break;
2900 max_offset[mem_mode] = i == start ? 0 : i >> 1;
2901 off[mem_mode] = max_offset[mem_mode];
2903 for (i = start; i <= 1 << 20; i <<= 1)
2905 XEXP (addr, 1) = gen_int_mode (-i, Pmode);
2906 if (!memory_address_p (mem_mode, addr))
2907 break;
2909 min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
2911 if (dump_file && (dump_flags & TDF_DETAILS))
2913 fprintf (dump_file, "get_address_cost:\n");
2914 fprintf (dump_file, " min offset %s %d\n",
2915 GET_MODE_NAME (mem_mode),
2916 (int) min_offset[mem_mode]);
2917 fprintf (dump_file, " max offset %s %d\n",
2918 GET_MODE_NAME (mem_mode),
2919 (int) max_offset[mem_mode]);
2922 rat[mem_mode] = 1;
2923 for (i = 2; i <= MAX_RATIO; i++)
2924 if (multiplier_allowed_in_address_p (i, mem_mode))
2926 rat[mem_mode] = i;
2927 break;
2930 /* Compute the cost of various addressing modes. */
2931 acost = 0;
2932 reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2933 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
2935 for (i = 0; i < 16; i++)
2937 sym_p = i & 1;
2938 var_p = (i >> 1) & 1;
2939 off_p = (i >> 2) & 1;
2940 rat_p = (i >> 3) & 1;
2942 addr = reg0;
2943 if (rat_p)
2944 addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
2945 gen_int_mode (rat[mem_mode], Pmode));
2947 if (var_p)
2948 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
2950 if (sym_p)
2952 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2953 /* ??? We can run into trouble with some backends by presenting
2954 it with symbols which havn't been properly passed through
2955 targetm.encode_section_info. By setting the local bit, we
2956 enhance the probability of things working. */
2957 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
2959 if (off_p)
2960 base = gen_rtx_fmt_e (CONST, Pmode,
2961 gen_rtx_fmt_ee (PLUS, Pmode,
2962 base,
2963 gen_int_mode (off[mem_mode],
2964 Pmode)));
2966 else if (off_p)
2967 base = gen_int_mode (off[mem_mode], Pmode);
2968 else
2969 base = NULL_RTX;
2971 if (base)
2972 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
2974 start_sequence ();
2975 /* To avoid splitting addressing modes, pretend that no cse will
2976 follow. */
2977 old_cse_not_expected = cse_not_expected;
2978 cse_not_expected = true;
2979 addr = memory_address (mem_mode, addr);
2980 cse_not_expected = old_cse_not_expected;
2981 seq = get_insns ();
2982 end_sequence ();
2984 acost = seq_cost (seq);
2985 acost += address_cost (addr, mem_mode);
2987 if (!acost)
2988 acost = 1;
2989 costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
2992 /* On some targets, it is quite expensive to load symbol to a register,
2993 which makes addresses that contain symbols look much more expensive.
2994 However, the symbol will have to be loaded in any case before the
2995 loop (and quite likely we have it in register already), so it does not
2996 make much sense to penalize them too heavily. So make some final
2997 tweaks for the SYMBOL_PRESENT modes:
2999 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3000 var is cheaper, use this mode with small penalty.
3001 If VAR_PRESENT is true, try whether the mode with
3002 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3003 if this is the case, use it. */
3004 add_c = add_cost (Pmode);
3005 for (i = 0; i < 8; i++)
3007 var_p = i & 1;
3008 off_p = (i >> 1) & 1;
3009 rat_p = (i >> 2) & 1;
3011 acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
3012 if (var_p)
3013 acost += add_c;
3015 if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3016 costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3019 if (dump_file && (dump_flags & TDF_DETAILS))
3021 fprintf (dump_file, "Address costs:\n");
3023 for (i = 0; i < 16; i++)
3025 sym_p = i & 1;
3026 var_p = (i >> 1) & 1;
3027 off_p = (i >> 2) & 1;
3028 rat_p = (i >> 3) & 1;
3030 fprintf (dump_file, " ");
3031 if (sym_p)
3032 fprintf (dump_file, "sym + ");
3033 if (var_p)
3034 fprintf (dump_file, "var + ");
3035 if (off_p)
3036 fprintf (dump_file, "cst + ");
3037 if (rat_p)
3038 fprintf (dump_file, "rat * ");
3040 acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3041 fprintf (dump_file, "index costs %d\n", acost);
3043 fprintf (dump_file, "\n");
3047 bits = GET_MODE_BITSIZE (Pmode);
3048 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3049 offset &= mask;
3050 if ((offset >> (bits - 1) & 1))
3051 offset |= ~mask;
3052 s_offset = offset;
3054 cost = 0;
3055 offset_p = (s_offset != 0
3056 && min_offset[mem_mode] <= s_offset
3057 && s_offset <= max_offset[mem_mode]);
3058 ratio_p = (ratio != 1
3059 && multiplier_allowed_in_address_p (ratio, mem_mode));
3061 if (ratio != 1 && !ratio_p)
3062 cost += multiply_by_cost (ratio, Pmode);
3064 if (s_offset && !offset_p && !symbol_present)
3065 cost += add_cost (Pmode);
3067 acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3068 return cost + acost;
3071 /* Estimates cost of forcing expression EXPR into a variable. */
3073 unsigned
3074 force_expr_to_var_cost (tree expr)
3076 static bool costs_initialized = false;
3077 static unsigned integer_cost;
3078 static unsigned symbol_cost;
3079 static unsigned address_cost;
3080 tree op0, op1;
3081 unsigned cost0, cost1, cost;
3082 enum machine_mode mode;
3084 if (!costs_initialized)
3086 tree type = build_pointer_type (integer_type_node);
3087 tree var, addr;
3088 rtx x;
3090 var = create_tmp_var_raw (integer_type_node, "test_var");
3091 TREE_STATIC (var) = 1;
3092 x = produce_memory_decl_rtl (var, NULL);
3093 SET_DECL_RTL (var, x);
3095 integer_cost = computation_cost (build_int_cst (integer_type_node,
3096 2000));
3098 addr = build1 (ADDR_EXPR, type, var);
3099 symbol_cost = computation_cost (addr) + 1;
3101 address_cost
3102 = computation_cost (build2 (PLUS_EXPR, type,
3103 addr,
3104 build_int_cst (type, 2000))) + 1;
3105 if (dump_file && (dump_flags & TDF_DETAILS))
3107 fprintf (dump_file, "force_expr_to_var_cost:\n");
3108 fprintf (dump_file, " integer %d\n", (int) integer_cost);
3109 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
3110 fprintf (dump_file, " address %d\n", (int) address_cost);
3111 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
3112 fprintf (dump_file, "\n");
3115 costs_initialized = true;
3118 STRIP_NOPS (expr);
3120 if (SSA_VAR_P (expr))
3121 return 0;
3123 if (TREE_INVARIANT (expr))
3125 if (TREE_CODE (expr) == INTEGER_CST)
3126 return integer_cost;
3128 if (TREE_CODE (expr) == ADDR_EXPR)
3130 tree obj = TREE_OPERAND (expr, 0);
3132 if (TREE_CODE (obj) == VAR_DECL
3133 || TREE_CODE (obj) == PARM_DECL
3134 || TREE_CODE (obj) == RESULT_DECL)
3135 return symbol_cost;
3138 return address_cost;
3141 switch (TREE_CODE (expr))
3143 case PLUS_EXPR:
3144 case MINUS_EXPR:
3145 case MULT_EXPR:
3146 op0 = TREE_OPERAND (expr, 0);
3147 op1 = TREE_OPERAND (expr, 1);
3148 STRIP_NOPS (op0);
3149 STRIP_NOPS (op1);
3151 if (is_gimple_val (op0))
3152 cost0 = 0;
3153 else
3154 cost0 = force_expr_to_var_cost (op0);
3156 if (is_gimple_val (op1))
3157 cost1 = 0;
3158 else
3159 cost1 = force_expr_to_var_cost (op1);
3161 break;
3163 default:
3164 /* Just an arbitrary value, FIXME. */
3165 return target_spill_cost;
3168 mode = TYPE_MODE (TREE_TYPE (expr));
3169 switch (TREE_CODE (expr))
3171 case PLUS_EXPR:
3172 case MINUS_EXPR:
3173 cost = add_cost (mode);
3174 break;
3176 case MULT_EXPR:
3177 if (cst_and_fits_in_hwi (op0))
3178 cost = multiply_by_cost (int_cst_value (op0), mode);
3179 else if (cst_and_fits_in_hwi (op1))
3180 cost = multiply_by_cost (int_cst_value (op1), mode);
3181 else
3182 return target_spill_cost;
3183 break;
3185 default:
3186 gcc_unreachable ();
3189 cost += cost0;
3190 cost += cost1;
3192 /* Bound the cost by target_spill_cost. The parts of complicated
3193 computations often are either loop invariant or at least can
3194 be shared between several iv uses, so letting this grow without
3195 limits would not give reasonable results. */
3196 return cost < target_spill_cost ? cost : target_spill_cost;
3199 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3200 invariants the computation depends on. */
3202 static unsigned
3203 force_var_cost (struct ivopts_data *data,
3204 tree expr, bitmap *depends_on)
3206 if (depends_on)
3208 fd_ivopts_data = data;
3209 walk_tree (&expr, find_depends, depends_on, NULL);
3212 return force_expr_to_var_cost (expr);
3215 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3216 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3217 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3218 invariants the computation depends on. */
3220 static unsigned
3221 split_address_cost (struct ivopts_data *data,
3222 tree addr, bool *symbol_present, bool *var_present,
3223 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3225 tree core;
3226 HOST_WIDE_INT bitsize;
3227 HOST_WIDE_INT bitpos;
3228 tree toffset;
3229 enum machine_mode mode;
3230 int unsignedp, volatilep;
3232 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3233 &unsignedp, &volatilep, false);
3235 if (toffset != 0
3236 || bitpos % BITS_PER_UNIT != 0
3237 || TREE_CODE (core) != VAR_DECL)
3239 *symbol_present = false;
3240 *var_present = true;
3241 fd_ivopts_data = data;
3242 walk_tree (&addr, find_depends, depends_on, NULL);
3243 return target_spill_cost;
3246 *offset += bitpos / BITS_PER_UNIT;
3247 if (TREE_STATIC (core)
3248 || DECL_EXTERNAL (core))
3250 *symbol_present = true;
3251 *var_present = false;
3252 return 0;
3255 *symbol_present = false;
3256 *var_present = true;
3257 return 0;
3260 /* Estimates cost of expressing difference of addresses E1 - E2 as
3261 var + symbol + offset. The value of offset is added to OFFSET,
3262 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3263 part is missing. DEPENDS_ON is a set of the invariants the computation
3264 depends on. */
3266 static unsigned
3267 ptr_difference_cost (struct ivopts_data *data,
3268 tree e1, tree e2, bool *symbol_present, bool *var_present,
3269 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3271 HOST_WIDE_INT diff = 0;
3272 unsigned cost;
3274 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3276 if (ptr_difference_const (e1, e2, &diff))
3278 *offset += diff;
3279 *symbol_present = false;
3280 *var_present = false;
3281 return 0;
3284 if (e2 == integer_zero_node)
3285 return split_address_cost (data, TREE_OPERAND (e1, 0),
3286 symbol_present, var_present, offset, depends_on);
3288 *symbol_present = false;
3289 *var_present = true;
3291 cost = force_var_cost (data, e1, depends_on);
3292 cost += force_var_cost (data, e2, depends_on);
3293 cost += add_cost (Pmode);
3295 return cost;
3298 /* Estimates cost of expressing difference E1 - E2 as
3299 var + symbol + offset. The value of offset is added to OFFSET,
3300 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3301 part is missing. DEPENDS_ON is a set of the invariants the computation
3302 depends on. */
3304 static unsigned
3305 difference_cost (struct ivopts_data *data,
3306 tree e1, tree e2, bool *symbol_present, bool *var_present,
3307 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3309 unsigned cost;
3310 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3311 unsigned HOST_WIDE_INT off1, off2;
3313 e1 = strip_offset (e1, &off1);
3314 e2 = strip_offset (e2, &off2);
3315 *offset += off1 - off2;
3317 STRIP_NOPS (e1);
3318 STRIP_NOPS (e2);
3320 if (TREE_CODE (e1) == ADDR_EXPR)
3321 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3322 depends_on);
3323 *symbol_present = false;
3325 if (operand_equal_p (e1, e2, 0))
3327 *var_present = false;
3328 return 0;
3330 *var_present = true;
3331 if (integer_zerop (e2))
3332 return force_var_cost (data, e1, depends_on);
3334 if (integer_zerop (e1))
3336 cost = force_var_cost (data, e2, depends_on);
3337 cost += multiply_by_cost (-1, mode);
3339 return cost;
3342 cost = force_var_cost (data, e1, depends_on);
3343 cost += force_var_cost (data, e2, depends_on);
3344 cost += add_cost (mode);
3346 return cost;
3349 /* Determines the cost of the computation by that USE is expressed
3350 from induction variable CAND. If ADDRESS_P is true, we just need
3351 to create an address from it, otherwise we want to get it into
3352 register. A set of invariants we depend on is stored in
3353 DEPENDS_ON. AT is the statement at that the value is computed. */
3355 static unsigned
3356 get_computation_cost_at (struct ivopts_data *data,
3357 struct iv_use *use, struct iv_cand *cand,
3358 bool address_p, bitmap *depends_on, tree at)
3360 tree ubase = use->iv->base, ustep = use->iv->step;
3361 tree cbase, cstep;
3362 tree utype = TREE_TYPE (ubase), ctype;
3363 unsigned HOST_WIDE_INT cstepi, offset = 0;
3364 HOST_WIDE_INT ratio, aratio;
3365 bool var_present, symbol_present;
3366 unsigned cost = 0, n_sums;
3367 double_int rat;
3369 *depends_on = NULL;
3371 /* Only consider real candidates. */
3372 if (!cand->iv)
3373 return INFTY;
3375 cbase = cand->iv->base;
3376 cstep = cand->iv->step;
3377 ctype = TREE_TYPE (cbase);
3379 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3381 /* We do not have a precision to express the values of use. */
3382 return INFTY;
3385 if (address_p)
3387 /* Do not try to express address of an object with computation based
3388 on address of a different object. This may cause problems in rtl
3389 level alias analysis (that does not expect this to be happening,
3390 as this is illegal in C), and would be unlikely to be useful
3391 anyway. */
3392 if (use->iv->base_object
3393 && cand->iv->base_object
3394 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3395 return INFTY;
3398 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3400 /* TODO -- add direct handling of this case. */
3401 goto fallback;
3404 /* CSTEPI is removed from the offset in case statement is after the
3405 increment. If the step is not constant, we use zero instead.
3406 This is a bit imprecise (there is the extra addition), but
3407 redundancy elimination is likely to transform the code so that
3408 it uses value of the variable before increment anyway,
3409 so it is not that much unrealistic. */
3410 if (cst_and_fits_in_hwi (cstep))
3411 cstepi = int_cst_value (cstep);
3412 else
3413 cstepi = 0;
3415 if (!constant_multiple_of (ustep, cstep, &rat))
3416 return INFTY;
3418 if (double_int_fits_in_shwi_p (rat))
3419 ratio = double_int_to_shwi (rat);
3420 else
3421 return INFTY;
3423 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3424 or ratio == 1, it is better to handle this like
3426 ubase - ratio * cbase + ratio * var
3428 (also holds in the case ratio == -1, TODO. */
3430 if (cst_and_fits_in_hwi (cbase))
3432 offset = - ratio * int_cst_value (cbase);
3433 cost += difference_cost (data,
3434 ubase, integer_zero_node,
3435 &symbol_present, &var_present, &offset,
3436 depends_on);
3438 else if (ratio == 1)
3440 cost += difference_cost (data,
3441 ubase, cbase,
3442 &symbol_present, &var_present, &offset,
3443 depends_on);
3445 else
3447 cost += force_var_cost (data, cbase, depends_on);
3448 cost += add_cost (TYPE_MODE (ctype));
3449 cost += difference_cost (data,
3450 ubase, integer_zero_node,
3451 &symbol_present, &var_present, &offset,
3452 depends_on);
3455 /* If we are after the increment, the value of the candidate is higher by
3456 one iteration. */
3457 if (stmt_after_increment (data->current_loop, cand, at))
3458 offset -= ratio * cstepi;
3460 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3461 (symbol/var/const parts may be omitted). If we are looking for an address,
3462 find the cost of addressing this. */
3463 if (address_p)
3464 return cost + get_address_cost (symbol_present, var_present, offset, ratio,
3465 TYPE_MODE (TREE_TYPE (*use->op_p)));
3467 /* Otherwise estimate the costs for computing the expression. */
3468 aratio = ratio > 0 ? ratio : -ratio;
3469 if (!symbol_present && !var_present && !offset)
3471 if (ratio != 1)
3472 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3474 return cost;
3477 if (aratio != 1)
3478 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3480 n_sums = 1;
3481 if (var_present
3482 /* Symbol + offset should be compile-time computable. */
3483 && (symbol_present || offset))
3484 n_sums++;
3486 return cost + n_sums * add_cost (TYPE_MODE (ctype));
3488 fallback:
3490 /* Just get the expression, expand it and measure the cost. */
3491 tree comp = get_computation_at (data->current_loop, use, cand, at);
3493 if (!comp)
3494 return INFTY;
3496 if (address_p)
3497 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3499 return computation_cost (comp);
3503 /* Determines the cost of the computation by that USE is expressed
3504 from induction variable CAND. If ADDRESS_P is true, we just need
3505 to create an address from it, otherwise we want to get it into
3506 register. A set of invariants we depend on is stored in
3507 DEPENDS_ON. */
3509 static unsigned
3510 get_computation_cost (struct ivopts_data *data,
3511 struct iv_use *use, struct iv_cand *cand,
3512 bool address_p, bitmap *depends_on)
3514 return get_computation_cost_at (data,
3515 use, cand, address_p, depends_on, use->stmt);
3518 /* Determines cost of basing replacement of USE on CAND in a generic
3519 expression. */
3521 static bool
3522 determine_use_iv_cost_generic (struct ivopts_data *data,
3523 struct iv_use *use, struct iv_cand *cand)
3525 bitmap depends_on;
3526 unsigned cost;
3528 /* The simple case first -- if we need to express value of the preserved
3529 original biv, the cost is 0. This also prevents us from counting the
3530 cost of increment twice -- once at this use and once in the cost of
3531 the candidate. */
3532 if (cand->pos == IP_ORIGINAL
3533 && cand->incremented_at == use->stmt)
3535 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3536 return true;
3539 cost = get_computation_cost (data, use, cand, false, &depends_on);
3540 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3542 return cost != INFTY;
3545 /* Determines cost of basing replacement of USE on CAND in an address. */
3547 static bool
3548 determine_use_iv_cost_address (struct ivopts_data *data,
3549 struct iv_use *use, struct iv_cand *cand)
3551 bitmap depends_on;
3552 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3554 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3556 return cost != INFTY;
3559 /* Computes value of candidate CAND at position AT in iteration NITER, and
3560 stores it to VAL. */
3562 static void
3563 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter,
3564 aff_tree *val)
3566 aff_tree step, delta, nit;
3567 struct iv *iv = cand->iv;
3568 tree type = TREE_TYPE (iv->base);
3570 tree_to_aff_combination (iv->step, type, &step);
3571 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3572 aff_combination_convert (&nit, type);
3573 aff_combination_mult (&nit, &step, &delta);
3574 if (stmt_after_increment (loop, cand, at))
3575 aff_combination_add (&delta, &step);
3577 tree_to_aff_combination (iv->base, type, val);
3578 aff_combination_add (val, &delta);
3581 /* Returns period of induction variable iv. */
3583 static tree
3584 iv_period (struct iv *iv)
3586 tree step = iv->step, period, type;
3587 tree pow2div;
3589 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3591 /* Period of the iv is gcd (step, type range). Since type range is power
3592 of two, it suffices to determine the maximum power of two that divides
3593 step. */
3594 pow2div = num_ending_zeros (step);
3595 type = unsigned_type_for (TREE_TYPE (step));
3597 period = build_low_bits_mask (type,
3598 (TYPE_PRECISION (type)
3599 - tree_low_cst (pow2div, 1)));
3601 return period;
3604 /* Returns the comparison operator used when eliminating the iv USE. */
3606 static enum tree_code
3607 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3609 struct loop *loop = data->current_loop;
3610 basic_block ex_bb;
3611 edge exit;
3613 ex_bb = bb_for_stmt (use->stmt);
3614 exit = EDGE_SUCC (ex_bb, 0);
3615 if (flow_bb_inside_loop_p (loop, exit->dest))
3616 exit = EDGE_SUCC (ex_bb, 1);
3618 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3621 /* Check whether it is possible to express the condition in USE by comparison
3622 of candidate CAND. If so, store the value compared with to BOUND. */
3624 static bool
3625 may_eliminate_iv (struct ivopts_data *data,
3626 struct iv_use *use, struct iv_cand *cand, tree *bound)
3628 basic_block ex_bb;
3629 edge exit;
3630 tree nit, nit_type;
3631 tree wider_type, period, per_type;
3632 struct loop *loop = data->current_loop;
3633 aff_tree bnd;
3635 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3636 return false;
3638 /* For now works only for exits that dominate the loop latch. TODO -- extend
3639 for other conditions inside loop body. */
3640 ex_bb = bb_for_stmt (use->stmt);
3641 if (use->stmt != last_stmt (ex_bb)
3642 || TREE_CODE (use->stmt) != COND_EXPR)
3643 return false;
3644 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3645 return false;
3647 exit = EDGE_SUCC (ex_bb, 0);
3648 if (flow_bb_inside_loop_p (loop, exit->dest))
3649 exit = EDGE_SUCC (ex_bb, 1);
3650 if (flow_bb_inside_loop_p (loop, exit->dest))
3651 return false;
3653 nit = niter_for_exit (data, exit);
3654 if (!nit)
3655 return false;
3657 nit_type = TREE_TYPE (nit);
3659 /* Determine whether we may use the variable to test whether niter iterations
3660 elapsed. This is the case iff the period of the induction variable is
3661 greater than the number of iterations. */
3662 period = iv_period (cand->iv);
3663 if (!period)
3664 return false;
3665 per_type = TREE_TYPE (period);
3667 wider_type = TREE_TYPE (period);
3668 if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
3669 wider_type = per_type;
3670 else
3671 wider_type = nit_type;
3673 if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
3674 fold_convert (wider_type, period),
3675 fold_convert (wider_type, nit))))
3676 return false;
3678 cand_value_at (loop, cand, use->stmt, nit, &bnd);
3679 *bound = aff_combination_to_tree (&bnd);
3680 return true;
3683 /* Determines cost of basing replacement of USE on CAND in a condition. */
3685 static bool
3686 determine_use_iv_cost_condition (struct ivopts_data *data,
3687 struct iv_use *use, struct iv_cand *cand)
3689 tree bound = NULL_TREE;
3690 struct iv *cmp_iv;
3691 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
3692 unsigned elim_cost, express_cost, cost;
3693 bool ok;
3695 /* Only consider real candidates. */
3696 if (!cand->iv)
3698 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
3699 return false;
3702 /* Try iv elimination. */
3703 if (may_eliminate_iv (data, use, cand, &bound))
3704 elim_cost = force_var_cost (data, bound, &depends_on_elim);
3705 else
3706 elim_cost = INFTY;
3708 /* Try expressing the original giv. If it is compared with an invariant,
3709 note that we cannot get rid of it. */
3710 ok = extract_cond_operands (data, use->op_p, NULL, NULL, NULL, &cmp_iv);
3711 gcc_assert (ok);
3713 express_cost = get_computation_cost (data, use, cand, false,
3714 &depends_on_express);
3715 fd_ivopts_data = data;
3716 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
3718 /* Choose the better approach. */
3719 if (elim_cost < express_cost)
3721 cost = elim_cost;
3722 depends_on = depends_on_elim;
3723 depends_on_elim = NULL;
3725 else
3727 cost = express_cost;
3728 depends_on = depends_on_express;
3729 depends_on_express = NULL;
3730 bound = NULL_TREE;
3733 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
3735 if (depends_on_elim)
3736 BITMAP_FREE (depends_on_elim);
3737 if (depends_on_express)
3738 BITMAP_FREE (depends_on_express);
3740 return cost != INFTY;
3743 /* Determines cost of basing replacement of USE on CAND. Returns false
3744 if USE cannot be based on CAND. */
3746 static bool
3747 determine_use_iv_cost (struct ivopts_data *data,
3748 struct iv_use *use, struct iv_cand *cand)
3750 switch (use->type)
3752 case USE_NONLINEAR_EXPR:
3753 return determine_use_iv_cost_generic (data, use, cand);
3755 case USE_ADDRESS:
3756 return determine_use_iv_cost_address (data, use, cand);
3758 case USE_COMPARE:
3759 return determine_use_iv_cost_condition (data, use, cand);
3761 default:
3762 gcc_unreachable ();
3766 /* Determines costs of basing the use of the iv on an iv candidate. */
3768 static void
3769 determine_use_iv_costs (struct ivopts_data *data)
3771 unsigned i, j;
3772 struct iv_use *use;
3773 struct iv_cand *cand;
3774 bitmap to_clear = BITMAP_ALLOC (NULL);
3776 alloc_use_cost_map (data);
3778 for (i = 0; i < n_iv_uses (data); i++)
3780 use = iv_use (data, i);
3782 if (data->consider_all_candidates)
3784 for (j = 0; j < n_iv_cands (data); j++)
3786 cand = iv_cand (data, j);
3787 determine_use_iv_cost (data, use, cand);
3790 else
3792 bitmap_iterator bi;
3794 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3796 cand = iv_cand (data, j);
3797 if (!determine_use_iv_cost (data, use, cand))
3798 bitmap_set_bit (to_clear, j);
3801 /* Remove the candidates for that the cost is infinite from
3802 the list of related candidates. */
3803 bitmap_and_compl_into (use->related_cands, to_clear);
3804 bitmap_clear (to_clear);
3808 BITMAP_FREE (to_clear);
3810 if (dump_file && (dump_flags & TDF_DETAILS))
3812 fprintf (dump_file, "Use-candidate costs:\n");
3814 for (i = 0; i < n_iv_uses (data); i++)
3816 use = iv_use (data, i);
3818 fprintf (dump_file, "Use %d:\n", i);
3819 fprintf (dump_file, " cand\tcost\tdepends on\n");
3820 for (j = 0; j < use->n_map_members; j++)
3822 if (!use->cost_map[j].cand
3823 || use->cost_map[j].cost == INFTY)
3824 continue;
3826 fprintf (dump_file, " %d\t%d\t",
3827 use->cost_map[j].cand->id,
3828 use->cost_map[j].cost);
3829 if (use->cost_map[j].depends_on)
3830 bitmap_print (dump_file,
3831 use->cost_map[j].depends_on, "","");
3832 fprintf (dump_file, "\n");
3835 fprintf (dump_file, "\n");
3837 fprintf (dump_file, "\n");
3841 /* Determines cost of the candidate CAND. */
3843 static void
3844 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3846 unsigned cost_base, cost_step;
3847 tree base;
3849 if (!cand->iv)
3851 cand->cost = 0;
3852 return;
3855 /* There are two costs associated with the candidate -- its increment
3856 and its initialization. The second is almost negligible for any loop
3857 that rolls enough, so we take it just very little into account. */
3859 base = cand->iv->base;
3860 cost_base = force_var_cost (data, base, NULL);
3861 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3863 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3865 /* Prefer the original iv unless we may gain something by replacing it;
3866 this is not really relevant for artificial ivs created by other
3867 passes. */
3868 if (cand->pos == IP_ORIGINAL
3869 && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
3870 cand->cost--;
3872 /* Prefer not to insert statements into latch unless there are some
3873 already (so that we do not create unnecessary jumps). */
3874 if (cand->pos == IP_END
3875 && empty_block_p (ip_end_pos (data->current_loop)))
3876 cand->cost++;
3879 /* Determines costs of computation of the candidates. */
3881 static void
3882 determine_iv_costs (struct ivopts_data *data)
3884 unsigned i;
3886 if (dump_file && (dump_flags & TDF_DETAILS))
3888 fprintf (dump_file, "Candidate costs:\n");
3889 fprintf (dump_file, " cand\tcost\n");
3892 for (i = 0; i < n_iv_cands (data); i++)
3894 struct iv_cand *cand = iv_cand (data, i);
3896 determine_iv_cost (data, cand);
3898 if (dump_file && (dump_flags & TDF_DETAILS))
3899 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
3902 if (dump_file && (dump_flags & TDF_DETAILS))
3903 fprintf (dump_file, "\n");
3906 /* Calculates cost for having SIZE induction variables. */
3908 static unsigned
3909 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3911 /* We add size to the cost, so that we prefer eliminating ivs
3912 if possible. */
3913 return size + estimate_reg_pressure_cost (size, data->regs_used);
3916 /* For each size of the induction variable set determine the penalty. */
3918 static void
3919 determine_set_costs (struct ivopts_data *data)
3921 unsigned j, n;
3922 tree phi, op;
3923 struct loop *loop = data->current_loop;
3924 bitmap_iterator bi;
3926 /* We use the following model (definitely improvable, especially the
3927 cost function -- TODO):
3929 We estimate the number of registers available (using MD data), name it A.
3931 We estimate the number of registers used by the loop, name it U. This
3932 number is obtained as the number of loop phi nodes (not counting virtual
3933 registers and bivs) + the number of variables from outside of the loop.
3935 We set a reserve R (free regs that are used for temporary computations,
3936 etc.). For now the reserve is a constant 3.
3938 Let I be the number of induction variables.
3940 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3941 make a lot of ivs without a reason).
3942 -- if A - R < U + I <= A, the cost is I * PRES_COST
3943 -- if U + I > A, the cost is I * PRES_COST and
3944 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
3946 if (dump_file && (dump_flags & TDF_DETAILS))
3948 fprintf (dump_file, "Global costs:\n");
3949 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
3950 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost);
3951 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
3954 n = 0;
3955 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
3957 op = PHI_RESULT (phi);
3959 if (!is_gimple_reg (op))
3960 continue;
3962 if (get_iv (data, op))
3963 continue;
3965 n++;
3968 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
3970 struct version_info *info = ver_info (data, j);
3972 if (info->inv_id && info->has_nonlin_use)
3973 n++;
3976 data->regs_used = n;
3977 if (dump_file && (dump_flags & TDF_DETAILS))
3978 fprintf (dump_file, " regs_used %d\n", n);
3980 if (dump_file && (dump_flags & TDF_DETAILS))
3982 fprintf (dump_file, " cost for size:\n");
3983 fprintf (dump_file, " ivs\tcost\n");
3984 for (j = 0; j <= 2 * target_avail_regs; j++)
3985 fprintf (dump_file, " %d\t%d\n", j,
3986 ivopts_global_cost_for_size (data, j));
3987 fprintf (dump_file, "\n");
3991 /* Returns true if A is a cheaper cost pair than B. */
3993 static bool
3994 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
3996 if (!a)
3997 return false;
3999 if (!b)
4000 return true;
4002 if (a->cost < b->cost)
4003 return true;
4005 if (a->cost > b->cost)
4006 return false;
4008 /* In case the costs are the same, prefer the cheaper candidate. */
4009 if (a->cand->cost < b->cand->cost)
4010 return true;
4012 return false;
4015 /* Computes the cost field of IVS structure. */
4017 static void
4018 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4020 unsigned cost = 0;
4022 cost += ivs->cand_use_cost;
4023 cost += ivs->cand_cost;
4024 cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4026 ivs->cost = cost;
4029 /* Remove invariants in set INVS to set IVS. */
4031 static void
4032 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4034 bitmap_iterator bi;
4035 unsigned iid;
4037 if (!invs)
4038 return;
4040 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4042 ivs->n_invariant_uses[iid]--;
4043 if (ivs->n_invariant_uses[iid] == 0)
4044 ivs->n_regs--;
4048 /* Set USE not to be expressed by any candidate in IVS. */
4050 static void
4051 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4052 struct iv_use *use)
4054 unsigned uid = use->id, cid;
4055 struct cost_pair *cp;
4057 cp = ivs->cand_for_use[uid];
4058 if (!cp)
4059 return;
4060 cid = cp->cand->id;
4062 ivs->bad_uses++;
4063 ivs->cand_for_use[uid] = NULL;
4064 ivs->n_cand_uses[cid]--;
4066 if (ivs->n_cand_uses[cid] == 0)
4068 bitmap_clear_bit (ivs->cands, cid);
4069 /* Do not count the pseudocandidates. */
4070 if (cp->cand->iv)
4071 ivs->n_regs--;
4072 ivs->n_cands--;
4073 ivs->cand_cost -= cp->cand->cost;
4075 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4078 ivs->cand_use_cost -= cp->cost;
4080 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4081 iv_ca_recount_cost (data, ivs);
4084 /* Add invariants in set INVS to set IVS. */
4086 static void
4087 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4089 bitmap_iterator bi;
4090 unsigned iid;
4092 if (!invs)
4093 return;
4095 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4097 ivs->n_invariant_uses[iid]++;
4098 if (ivs->n_invariant_uses[iid] == 1)
4099 ivs->n_regs++;
4103 /* Set cost pair for USE in set IVS to CP. */
4105 static void
4106 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4107 struct iv_use *use, struct cost_pair *cp)
4109 unsigned uid = use->id, cid;
4111 if (ivs->cand_for_use[uid] == cp)
4112 return;
4114 if (ivs->cand_for_use[uid])
4115 iv_ca_set_no_cp (data, ivs, use);
4117 if (cp)
4119 cid = cp->cand->id;
4121 ivs->bad_uses--;
4122 ivs->cand_for_use[uid] = cp;
4123 ivs->n_cand_uses[cid]++;
4124 if (ivs->n_cand_uses[cid] == 1)
4126 bitmap_set_bit (ivs->cands, cid);
4127 /* Do not count the pseudocandidates. */
4128 if (cp->cand->iv)
4129 ivs->n_regs++;
4130 ivs->n_cands++;
4131 ivs->cand_cost += cp->cand->cost;
4133 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4136 ivs->cand_use_cost += cp->cost;
4137 iv_ca_set_add_invariants (ivs, cp->depends_on);
4138 iv_ca_recount_cost (data, ivs);
4142 /* Extend set IVS by expressing USE by some of the candidates in it
4143 if possible. */
4145 static void
4146 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4147 struct iv_use *use)
4149 struct cost_pair *best_cp = NULL, *cp;
4150 bitmap_iterator bi;
4151 unsigned i;
4153 gcc_assert (ivs->upto >= use->id);
4155 if (ivs->upto == use->id)
4157 ivs->upto++;
4158 ivs->bad_uses++;
4161 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4163 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4165 if (cheaper_cost_pair (cp, best_cp))
4166 best_cp = cp;
4169 iv_ca_set_cp (data, ivs, use, best_cp);
4172 /* Get cost for assignment IVS. */
4174 static unsigned
4175 iv_ca_cost (struct iv_ca *ivs)
4177 return (ivs->bad_uses ? INFTY : ivs->cost);
4180 /* Returns true if all dependences of CP are among invariants in IVS. */
4182 static bool
4183 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4185 unsigned i;
4186 bitmap_iterator bi;
4188 if (!cp->depends_on)
4189 return true;
4191 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4193 if (ivs->n_invariant_uses[i] == 0)
4194 return false;
4197 return true;
4200 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4201 it before NEXT_CHANGE. */
4203 static struct iv_ca_delta *
4204 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4205 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4207 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4209 change->use = use;
4210 change->old_cp = old_cp;
4211 change->new_cp = new_cp;
4212 change->next_change = next_change;
4214 return change;
4217 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4218 are rewritten. */
4220 static struct iv_ca_delta *
4221 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4223 struct iv_ca_delta *last;
4225 if (!l2)
4226 return l1;
4228 if (!l1)
4229 return l2;
4231 for (last = l1; last->next_change; last = last->next_change)
4232 continue;
4233 last->next_change = l2;
4235 return l1;
4238 /* Returns candidate by that USE is expressed in IVS. */
4240 static struct cost_pair *
4241 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4243 return ivs->cand_for_use[use->id];
4246 /* Reverse the list of changes DELTA, forming the inverse to it. */
4248 static struct iv_ca_delta *
4249 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4251 struct iv_ca_delta *act, *next, *prev = NULL;
4252 struct cost_pair *tmp;
4254 for (act = delta; act; act = next)
4256 next = act->next_change;
4257 act->next_change = prev;
4258 prev = act;
4260 tmp = act->old_cp;
4261 act->old_cp = act->new_cp;
4262 act->new_cp = tmp;
4265 return prev;
4268 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4269 reverted instead. */
4271 static void
4272 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4273 struct iv_ca_delta *delta, bool forward)
4275 struct cost_pair *from, *to;
4276 struct iv_ca_delta *act;
4278 if (!forward)
4279 delta = iv_ca_delta_reverse (delta);
4281 for (act = delta; act; act = act->next_change)
4283 from = act->old_cp;
4284 to = act->new_cp;
4285 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4286 iv_ca_set_cp (data, ivs, act->use, to);
4289 if (!forward)
4290 iv_ca_delta_reverse (delta);
4293 /* Returns true if CAND is used in IVS. */
4295 static bool
4296 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4298 return ivs->n_cand_uses[cand->id] > 0;
4301 /* Returns number of induction variable candidates in the set IVS. */
4303 static unsigned
4304 iv_ca_n_cands (struct iv_ca *ivs)
4306 return ivs->n_cands;
4309 /* Free the list of changes DELTA. */
4311 static void
4312 iv_ca_delta_free (struct iv_ca_delta **delta)
4314 struct iv_ca_delta *act, *next;
4316 for (act = *delta; act; act = next)
4318 next = act->next_change;
4319 free (act);
4322 *delta = NULL;
4325 /* Allocates new iv candidates assignment. */
4327 static struct iv_ca *
4328 iv_ca_new (struct ivopts_data *data)
4330 struct iv_ca *nw = XNEW (struct iv_ca);
4332 nw->upto = 0;
4333 nw->bad_uses = 0;
4334 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4335 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4336 nw->cands = BITMAP_ALLOC (NULL);
4337 nw->n_cands = 0;
4338 nw->n_regs = 0;
4339 nw->cand_use_cost = 0;
4340 nw->cand_cost = 0;
4341 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4342 nw->cost = 0;
4344 return nw;
4347 /* Free memory occupied by the set IVS. */
4349 static void
4350 iv_ca_free (struct iv_ca **ivs)
4352 free ((*ivs)->cand_for_use);
4353 free ((*ivs)->n_cand_uses);
4354 BITMAP_FREE ((*ivs)->cands);
4355 free ((*ivs)->n_invariant_uses);
4356 free (*ivs);
4357 *ivs = NULL;
4360 /* Dumps IVS to FILE. */
4362 static void
4363 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4365 const char *pref = " invariants ";
4366 unsigned i;
4368 fprintf (file, " cost %d\n", iv_ca_cost (ivs));
4369 bitmap_print (file, ivs->cands, " candidates ","\n");
4371 for (i = 1; i <= data->max_inv_id; i++)
4372 if (ivs->n_invariant_uses[i])
4374 fprintf (file, "%s%d", pref, i);
4375 pref = ", ";
4377 fprintf (file, "\n");
4380 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4381 new set, and store differences in DELTA. Number of induction variables
4382 in the new set is stored to N_IVS. */
4384 static unsigned
4385 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4386 struct iv_cand *cand, struct iv_ca_delta **delta,
4387 unsigned *n_ivs)
4389 unsigned i, cost;
4390 struct iv_use *use;
4391 struct cost_pair *old_cp, *new_cp;
4393 *delta = NULL;
4394 for (i = 0; i < ivs->upto; i++)
4396 use = iv_use (data, i);
4397 old_cp = iv_ca_cand_for_use (ivs, use);
4399 if (old_cp
4400 && old_cp->cand == cand)
4401 continue;
4403 new_cp = get_use_iv_cost (data, use, cand);
4404 if (!new_cp)
4405 continue;
4407 if (!iv_ca_has_deps (ivs, new_cp))
4408 continue;
4410 if (!cheaper_cost_pair (new_cp, old_cp))
4411 continue;
4413 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4416 iv_ca_delta_commit (data, ivs, *delta, true);
4417 cost = iv_ca_cost (ivs);
4418 if (n_ivs)
4419 *n_ivs = iv_ca_n_cands (ivs);
4420 iv_ca_delta_commit (data, ivs, *delta, false);
4422 return cost;
4425 /* Try narrowing set IVS by removing CAND. Return the cost of
4426 the new set and store the differences in DELTA. */
4428 static unsigned
4429 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4430 struct iv_cand *cand, struct iv_ca_delta **delta)
4432 unsigned i, ci;
4433 struct iv_use *use;
4434 struct cost_pair *old_cp, *new_cp, *cp;
4435 bitmap_iterator bi;
4436 struct iv_cand *cnd;
4437 unsigned cost;
4439 *delta = NULL;
4440 for (i = 0; i < n_iv_uses (data); i++)
4442 use = iv_use (data, i);
4444 old_cp = iv_ca_cand_for_use (ivs, use);
4445 if (old_cp->cand != cand)
4446 continue;
4448 new_cp = NULL;
4450 if (data->consider_all_candidates)
4452 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4454 if (ci == cand->id)
4455 continue;
4457 cnd = iv_cand (data, ci);
4459 cp = get_use_iv_cost (data, use, cnd);
4460 if (!cp)
4461 continue;
4462 if (!iv_ca_has_deps (ivs, cp))
4463 continue;
4465 if (!cheaper_cost_pair (cp, new_cp))
4466 continue;
4468 new_cp = cp;
4471 else
4473 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4475 if (ci == cand->id)
4476 continue;
4478 cnd = iv_cand (data, ci);
4480 cp = get_use_iv_cost (data, use, cnd);
4481 if (!cp)
4482 continue;
4483 if (!iv_ca_has_deps (ivs, cp))
4484 continue;
4486 if (!cheaper_cost_pair (cp, new_cp))
4487 continue;
4489 new_cp = cp;
4493 if (!new_cp)
4495 iv_ca_delta_free (delta);
4496 return INFTY;
4499 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4502 iv_ca_delta_commit (data, ivs, *delta, true);
4503 cost = iv_ca_cost (ivs);
4504 iv_ca_delta_commit (data, ivs, *delta, false);
4506 return cost;
4509 /* Try optimizing the set of candidates IVS by removing candidates different
4510 from to EXCEPT_CAND from it. Return cost of the new set, and store
4511 differences in DELTA. */
4513 static unsigned
4514 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4515 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4517 bitmap_iterator bi;
4518 struct iv_ca_delta *act_delta, *best_delta;
4519 unsigned i, best_cost, acost;
4520 struct iv_cand *cand;
4522 best_delta = NULL;
4523 best_cost = iv_ca_cost (ivs);
4525 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4527 cand = iv_cand (data, i);
4529 if (cand == except_cand)
4530 continue;
4532 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4534 if (acost < best_cost)
4536 best_cost = acost;
4537 iv_ca_delta_free (&best_delta);
4538 best_delta = act_delta;
4540 else
4541 iv_ca_delta_free (&act_delta);
4544 if (!best_delta)
4546 *delta = NULL;
4547 return best_cost;
4550 /* Recurse to possibly remove other unnecessary ivs. */
4551 iv_ca_delta_commit (data, ivs, best_delta, true);
4552 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4553 iv_ca_delta_commit (data, ivs, best_delta, false);
4554 *delta = iv_ca_delta_join (best_delta, *delta);
4555 return best_cost;
4558 /* Tries to extend the sets IVS in the best possible way in order
4559 to express the USE. */
4561 static bool
4562 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4563 struct iv_use *use)
4565 unsigned best_cost, act_cost;
4566 unsigned i;
4567 bitmap_iterator bi;
4568 struct iv_cand *cand;
4569 struct iv_ca_delta *best_delta = NULL, *act_delta;
4570 struct cost_pair *cp;
4572 iv_ca_add_use (data, ivs, use);
4573 best_cost = iv_ca_cost (ivs);
4575 cp = iv_ca_cand_for_use (ivs, use);
4576 if (cp)
4578 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4579 iv_ca_set_no_cp (data, ivs, use);
4582 /* First try important candidates. Only if it fails, try the specific ones.
4583 Rationale -- in loops with many variables the best choice often is to use
4584 just one generic biv. If we added here many ivs specific to the uses,
4585 the optimization algorithm later would be likely to get stuck in a local
4586 minimum, thus causing us to create too many ivs. The approach from
4587 few ivs to more seems more likely to be successful -- starting from few
4588 ivs, replacing an expensive use by a specific iv should always be a
4589 win. */
4590 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4592 cand = iv_cand (data, i);
4594 if (iv_ca_cand_used_p (ivs, cand))
4595 continue;
4597 cp = get_use_iv_cost (data, use, cand);
4598 if (!cp)
4599 continue;
4601 iv_ca_set_cp (data, ivs, use, cp);
4602 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4603 iv_ca_set_no_cp (data, ivs, use);
4604 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4606 if (act_cost < best_cost)
4608 best_cost = act_cost;
4610 iv_ca_delta_free (&best_delta);
4611 best_delta = act_delta;
4613 else
4614 iv_ca_delta_free (&act_delta);
4617 if (best_cost == INFTY)
4619 for (i = 0; i < use->n_map_members; i++)
4621 cp = use->cost_map + i;
4622 cand = cp->cand;
4623 if (!cand)
4624 continue;
4626 /* Already tried this. */
4627 if (cand->important)
4628 continue;
4630 if (iv_ca_cand_used_p (ivs, cand))
4631 continue;
4633 act_delta = NULL;
4634 iv_ca_set_cp (data, ivs, use, cp);
4635 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4636 iv_ca_set_no_cp (data, ivs, use);
4637 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4638 cp, act_delta);
4640 if (act_cost < best_cost)
4642 best_cost = act_cost;
4644 if (best_delta)
4645 iv_ca_delta_free (&best_delta);
4646 best_delta = act_delta;
4648 else
4649 iv_ca_delta_free (&act_delta);
4653 iv_ca_delta_commit (data, ivs, best_delta, true);
4654 iv_ca_delta_free (&best_delta);
4656 return (best_cost != INFTY);
4659 /* Finds an initial assignment of candidates to uses. */
4661 static struct iv_ca *
4662 get_initial_solution (struct ivopts_data *data)
4664 struct iv_ca *ivs = iv_ca_new (data);
4665 unsigned i;
4667 for (i = 0; i < n_iv_uses (data); i++)
4668 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4670 iv_ca_free (&ivs);
4671 return NULL;
4674 return ivs;
4677 /* Tries to improve set of induction variables IVS. */
4679 static bool
4680 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4682 unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
4683 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4684 struct iv_cand *cand;
4686 /* Try extending the set of induction variables by one. */
4687 for (i = 0; i < n_iv_cands (data); i++)
4689 cand = iv_cand (data, i);
4691 if (iv_ca_cand_used_p (ivs, cand))
4692 continue;
4694 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4695 if (!act_delta)
4696 continue;
4698 /* If we successfully added the candidate and the set is small enough,
4699 try optimizing it by removing other candidates. */
4700 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4702 iv_ca_delta_commit (data, ivs, act_delta, true);
4703 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4704 iv_ca_delta_commit (data, ivs, act_delta, false);
4705 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4708 if (acost < best_cost)
4710 best_cost = acost;
4711 iv_ca_delta_free (&best_delta);
4712 best_delta = act_delta;
4714 else
4715 iv_ca_delta_free (&act_delta);
4718 if (!best_delta)
4720 /* Try removing the candidates from the set instead. */
4721 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4723 /* Nothing more we can do. */
4724 if (!best_delta)
4725 return false;
4728 iv_ca_delta_commit (data, ivs, best_delta, true);
4729 gcc_assert (best_cost == iv_ca_cost (ivs));
4730 iv_ca_delta_free (&best_delta);
4731 return true;
4734 /* Attempts to find the optimal set of induction variables. We do simple
4735 greedy heuristic -- we try to replace at most one candidate in the selected
4736 solution and remove the unused ivs while this improves the cost. */
4738 static struct iv_ca *
4739 find_optimal_iv_set (struct ivopts_data *data)
4741 unsigned i;
4742 struct iv_ca *set;
4743 struct iv_use *use;
4745 /* Get the initial solution. */
4746 set = get_initial_solution (data);
4747 if (!set)
4749 if (dump_file && (dump_flags & TDF_DETAILS))
4750 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4751 return NULL;
4754 if (dump_file && (dump_flags & TDF_DETAILS))
4756 fprintf (dump_file, "Initial set of candidates:\n");
4757 iv_ca_dump (data, dump_file, set);
4760 while (try_improve_iv_set (data, set))
4762 if (dump_file && (dump_flags & TDF_DETAILS))
4764 fprintf (dump_file, "Improved to:\n");
4765 iv_ca_dump (data, dump_file, set);
4769 if (dump_file && (dump_flags & TDF_DETAILS))
4770 fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
4772 for (i = 0; i < n_iv_uses (data); i++)
4774 use = iv_use (data, i);
4775 use->selected = iv_ca_cand_for_use (set, use)->cand;
4778 return set;
4781 /* Creates a new induction variable corresponding to CAND. */
4783 static void
4784 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4786 block_stmt_iterator incr_pos;
4787 tree base;
4788 bool after = false;
4790 if (!cand->iv)
4791 return;
4793 switch (cand->pos)
4795 case IP_NORMAL:
4796 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4797 break;
4799 case IP_END:
4800 incr_pos = bsi_last (ip_end_pos (data->current_loop));
4801 after = true;
4802 break;
4804 case IP_ORIGINAL:
4805 /* Mark that the iv is preserved. */
4806 name_info (data, cand->var_before)->preserve_biv = true;
4807 name_info (data, cand->var_after)->preserve_biv = true;
4809 /* Rewrite the increment so that it uses var_before directly. */
4810 find_interesting_uses_op (data, cand->var_after)->selected = cand;
4812 return;
4815 gimple_add_tmp_var (cand->var_before);
4816 add_referenced_var (cand->var_before);
4818 base = unshare_expr (cand->iv->base);
4820 create_iv (base, unshare_expr (cand->iv->step),
4821 cand->var_before, data->current_loop,
4822 &incr_pos, after, &cand->var_before, &cand->var_after);
4825 /* Creates new induction variables described in SET. */
4827 static void
4828 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4830 unsigned i;
4831 struct iv_cand *cand;
4832 bitmap_iterator bi;
4834 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4836 cand = iv_cand (data, i);
4837 create_new_iv (data, cand);
4841 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
4842 is true, remove also the ssa name defined by the statement. */
4844 static void
4845 remove_statement (tree stmt, bool including_defined_name)
4847 if (TREE_CODE (stmt) == PHI_NODE)
4849 remove_phi_node (stmt, NULL_TREE, including_defined_name);
4851 else
4853 block_stmt_iterator bsi = bsi_for_stmt (stmt);
4855 bsi_remove (&bsi, true);
4856 release_defs (stmt);
4860 /* Rewrites USE (definition of iv used in a nonlinear expression)
4861 using candidate CAND. */
4863 static void
4864 rewrite_use_nonlinear_expr (struct ivopts_data *data,
4865 struct iv_use *use, struct iv_cand *cand)
4867 tree comp;
4868 tree op, stmts, tgt, ass;
4869 block_stmt_iterator bsi;
4871 /* An important special case -- if we are asked to express value of
4872 the original iv by itself, just exit; there is no need to
4873 introduce a new computation (that might also need casting the
4874 variable to unsigned and back). */
4875 if (cand->pos == IP_ORIGINAL
4876 && cand->incremented_at == use->stmt)
4878 tree step, ctype, utype;
4879 enum tree_code incr_code = PLUS_EXPR;
4881 gcc_assert (TREE_CODE (use->stmt) == GIMPLE_MODIFY_STMT);
4882 gcc_assert (GIMPLE_STMT_OPERAND (use->stmt, 0) == cand->var_after);
4884 step = cand->iv->step;
4885 ctype = TREE_TYPE (step);
4886 utype = TREE_TYPE (cand->var_after);
4887 if (TREE_CODE (step) == NEGATE_EXPR)
4889 incr_code = MINUS_EXPR;
4890 step = TREE_OPERAND (step, 0);
4893 /* Check whether we may leave the computation unchanged.
4894 This is the case only if it does not rely on other
4895 computations in the loop -- otherwise, the computation
4896 we rely upon may be removed in remove_unused_ivs,
4897 thus leading to ICE. */
4898 op = GIMPLE_STMT_OPERAND (use->stmt, 1);
4899 if (TREE_CODE (op) == PLUS_EXPR
4900 || TREE_CODE (op) == MINUS_EXPR)
4902 if (TREE_OPERAND (op, 0) == cand->var_before)
4903 op = TREE_OPERAND (op, 1);
4904 else if (TREE_CODE (op) == PLUS_EXPR
4905 && TREE_OPERAND (op, 1) == cand->var_before)
4906 op = TREE_OPERAND (op, 0);
4907 else
4908 op = NULL_TREE;
4910 else
4911 op = NULL_TREE;
4913 if (op
4914 && (TREE_CODE (op) == INTEGER_CST
4915 || operand_equal_p (op, step, 0)))
4916 return;
4918 /* Otherwise, add the necessary computations to express
4919 the iv. */
4920 op = fold_convert (ctype, cand->var_before);
4921 comp = fold_convert (utype,
4922 build2 (incr_code, ctype, op,
4923 unshare_expr (step)));
4925 else
4927 comp = get_computation (data->current_loop, use, cand);
4928 gcc_assert (comp != NULL_TREE);
4931 switch (TREE_CODE (use->stmt))
4933 case PHI_NODE:
4934 tgt = PHI_RESULT (use->stmt);
4936 /* If we should keep the biv, do not replace it. */
4937 if (name_info (data, tgt)->preserve_biv)
4938 return;
4940 bsi = bsi_after_labels (bb_for_stmt (use->stmt));
4941 break;
4943 case GIMPLE_MODIFY_STMT:
4944 tgt = GIMPLE_STMT_OPERAND (use->stmt, 0);
4945 bsi = bsi_for_stmt (use->stmt);
4946 break;
4948 default:
4949 gcc_unreachable ();
4952 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
4953 if (stmts)
4954 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4956 if (TREE_CODE (use->stmt) == PHI_NODE)
4958 ass = build_gimple_modify_stmt (tgt, op);
4959 bsi_insert_before (&bsi, ass, BSI_SAME_STMT);
4960 remove_statement (use->stmt, false);
4961 SSA_NAME_DEF_STMT (tgt) = ass;
4963 else
4964 GIMPLE_STMT_OPERAND (use->stmt, 1) = op;
4967 /* Replaces ssa name in index IDX by its basic variable. Callback for
4968 for_each_index. */
4970 static bool
4971 idx_remove_ssa_names (tree base, tree *idx,
4972 void *data ATTRIBUTE_UNUSED)
4974 tree *op;
4976 if (TREE_CODE (*idx) == SSA_NAME)
4977 *idx = SSA_NAME_VAR (*idx);
4979 if (TREE_CODE (base) == ARRAY_REF)
4981 op = &TREE_OPERAND (base, 2);
4982 if (*op
4983 && TREE_CODE (*op) == SSA_NAME)
4984 *op = SSA_NAME_VAR (*op);
4985 op = &TREE_OPERAND (base, 3);
4986 if (*op
4987 && TREE_CODE (*op) == SSA_NAME)
4988 *op = SSA_NAME_VAR (*op);
4991 return true;
4994 /* Unshares REF and replaces ssa names inside it by their basic variables. */
4996 static tree
4997 unshare_and_remove_ssa_names (tree ref)
4999 ref = unshare_expr (ref);
5000 for_each_index (&ref, idx_remove_ssa_names, NULL);
5002 return ref;
5005 /* Extract the alias analysis info for the memory reference REF. There are
5006 several ways how this information may be stored and what precisely is
5007 its semantics depending on the type of the reference, but there always is
5008 somewhere hidden one _DECL node that is used to determine the set of
5009 virtual operands for the reference. The code below deciphers this jungle
5010 and extracts this single useful piece of information. */
5012 static tree
5013 get_ref_tag (tree ref, tree orig)
5015 tree var = get_base_address (ref);
5016 tree aref = NULL_TREE, tag, sv;
5017 HOST_WIDE_INT offset, size, maxsize;
5019 for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5021 aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5022 if (ref)
5023 break;
5026 if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5027 return unshare_expr (sv);
5029 if (!var)
5030 return NULL_TREE;
5032 if (TREE_CODE (var) == INDIRECT_REF)
5034 /* If the base is a dereference of a pointer, first check its name memory
5035 tag. If it does not have one, use its symbol memory tag. */
5036 var = TREE_OPERAND (var, 0);
5037 if (TREE_CODE (var) != SSA_NAME)
5038 return NULL_TREE;
5040 if (SSA_NAME_PTR_INFO (var))
5042 tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5043 if (tag)
5044 return tag;
5047 var = SSA_NAME_VAR (var);
5048 tag = symbol_mem_tag (var);
5049 gcc_assert (tag != NULL_TREE);
5050 return tag;
5052 else
5054 if (!DECL_P (var))
5055 return NULL_TREE;
5057 tag = symbol_mem_tag (var);
5058 if (tag)
5059 return tag;
5061 return var;
5065 /* Copies the reference information from OLD_REF to NEW_REF. */
5067 static void
5068 copy_ref_info (tree new_ref, tree old_ref)
5070 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5071 copy_mem_ref_info (new_ref, old_ref);
5072 else
5074 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5075 TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5079 /* Rewrites USE (address that is an iv) using candidate CAND. */
5081 static void
5082 rewrite_use_address (struct ivopts_data *data,
5083 struct iv_use *use, struct iv_cand *cand)
5085 aff_tree aff;
5086 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5087 tree ref;
5088 bool ok;
5090 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5091 gcc_assert (ok);
5092 unshare_aff_combination (&aff);
5094 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5095 copy_ref_info (ref, *use->op_p);
5096 *use->op_p = ref;
5099 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5100 candidate CAND. */
5102 static void
5103 rewrite_use_compare (struct ivopts_data *data,
5104 struct iv_use *use, struct iv_cand *cand)
5106 tree comp, *var_p, op, bound;
5107 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5108 enum tree_code compare;
5109 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5110 bool ok;
5112 bound = cp->value;
5113 if (bound)
5115 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5116 tree var_type = TREE_TYPE (var);
5118 compare = iv_elimination_compare (data, use);
5119 bound = unshare_expr (fold_convert (var_type, bound));
5120 op = force_gimple_operand_bsi (&bsi, bound, true, NULL_TREE);
5122 *use->op_p = build2 (compare, boolean_type_node, var, op);
5123 return;
5126 /* The induction variable elimination failed; just express the original
5127 giv. */
5128 comp = get_computation (data->current_loop, use, cand);
5129 gcc_assert (comp != NULL_TREE);
5131 ok = extract_cond_operands (data, use->op_p, &var_p, NULL, NULL, NULL);
5132 gcc_assert (ok);
5134 *var_p = force_gimple_operand_bsi (&bsi, comp, true, SSA_NAME_VAR (*var_p));
5137 /* Rewrites USE using candidate CAND. */
5139 static void
5140 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5142 push_stmt_changes (&use->stmt);
5144 switch (use->type)
5146 case USE_NONLINEAR_EXPR:
5147 rewrite_use_nonlinear_expr (data, use, cand);
5148 break;
5150 case USE_ADDRESS:
5151 rewrite_use_address (data, use, cand);
5152 break;
5154 case USE_COMPARE:
5155 rewrite_use_compare (data, use, cand);
5156 break;
5158 default:
5159 gcc_unreachable ();
5162 pop_stmt_changes (&use->stmt);
5165 /* Rewrite the uses using the selected induction variables. */
5167 static void
5168 rewrite_uses (struct ivopts_data *data)
5170 unsigned i;
5171 struct iv_cand *cand;
5172 struct iv_use *use;
5174 for (i = 0; i < n_iv_uses (data); i++)
5176 use = iv_use (data, i);
5177 cand = use->selected;
5178 gcc_assert (cand);
5180 rewrite_use (data, use, cand);
5184 /* Removes the ivs that are not used after rewriting. */
5186 static void
5187 remove_unused_ivs (struct ivopts_data *data)
5189 unsigned j;
5190 bitmap_iterator bi;
5192 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5194 struct version_info *info;
5196 info = ver_info (data, j);
5197 if (info->iv
5198 && !integer_zerop (info->iv->step)
5199 && !info->inv_id
5200 && !info->iv->have_use_for
5201 && !info->preserve_biv)
5202 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5206 /* Frees data allocated by the optimization of a single loop. */
5208 static void
5209 free_loop_data (struct ivopts_data *data)
5211 unsigned i, j;
5212 bitmap_iterator bi;
5213 tree obj;
5215 if (data->niters)
5217 pointer_map_destroy (data->niters);
5218 data->niters = NULL;
5221 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5223 struct version_info *info;
5225 info = ver_info (data, i);
5226 if (info->iv)
5227 free (info->iv);
5228 info->iv = NULL;
5229 info->has_nonlin_use = false;
5230 info->preserve_biv = false;
5231 info->inv_id = 0;
5233 bitmap_clear (data->relevant);
5234 bitmap_clear (data->important_candidates);
5236 for (i = 0; i < n_iv_uses (data); i++)
5238 struct iv_use *use = iv_use (data, i);
5240 free (use->iv);
5241 BITMAP_FREE (use->related_cands);
5242 for (j = 0; j < use->n_map_members; j++)
5243 if (use->cost_map[j].depends_on)
5244 BITMAP_FREE (use->cost_map[j].depends_on);
5245 free (use->cost_map);
5246 free (use);
5248 VEC_truncate (iv_use_p, data->iv_uses, 0);
5250 for (i = 0; i < n_iv_cands (data); i++)
5252 struct iv_cand *cand = iv_cand (data, i);
5254 if (cand->iv)
5255 free (cand->iv);
5256 if (cand->depends_on)
5257 BITMAP_FREE (cand->depends_on);
5258 free (cand);
5260 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5262 if (data->version_info_size < num_ssa_names)
5264 data->version_info_size = 2 * num_ssa_names;
5265 free (data->version_info);
5266 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5269 data->max_inv_id = 0;
5271 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5272 SET_DECL_RTL (obj, NULL_RTX);
5274 VEC_truncate (tree, decl_rtl_to_reset, 0);
5277 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5278 loop tree. */
5280 static void
5281 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5283 free_loop_data (data);
5284 free (data->version_info);
5285 BITMAP_FREE (data->relevant);
5286 BITMAP_FREE (data->important_candidates);
5288 VEC_free (tree, heap, decl_rtl_to_reset);
5289 VEC_free (iv_use_p, heap, data->iv_uses);
5290 VEC_free (iv_cand_p, heap, data->iv_candidates);
5293 /* Optimizes the LOOP. Returns true if anything changed. */
5295 static bool
5296 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5298 bool changed = false;
5299 struct iv_ca *iv_ca;
5300 edge exit;
5302 gcc_assert (!data->niters);
5303 data->current_loop = loop;
5305 if (dump_file && (dump_flags & TDF_DETAILS))
5307 fprintf (dump_file, "Processing loop %d\n", loop->num);
5309 exit = single_dom_exit (loop);
5310 if (exit)
5312 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5313 exit->src->index, exit->dest->index);
5314 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5315 fprintf (dump_file, "\n");
5318 fprintf (dump_file, "\n");
5321 /* For each ssa name determines whether it behaves as an induction variable
5322 in some loop. */
5323 if (!find_induction_variables (data))
5324 goto finish;
5326 /* Finds interesting uses (item 1). */
5327 find_interesting_uses (data);
5328 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5329 goto finish;
5331 /* Finds candidates for the induction variables (item 2). */
5332 find_iv_candidates (data);
5334 /* Calculates the costs (item 3, part 1). */
5335 determine_use_iv_costs (data);
5336 determine_iv_costs (data);
5337 determine_set_costs (data);
5339 /* Find the optimal set of induction variables (item 3, part 2). */
5340 iv_ca = find_optimal_iv_set (data);
5341 if (!iv_ca)
5342 goto finish;
5343 changed = true;
5345 /* Create the new induction variables (item 4, part 1). */
5346 create_new_ivs (data, iv_ca);
5347 iv_ca_free (&iv_ca);
5349 /* Rewrite the uses (item 4, part 2). */
5350 rewrite_uses (data);
5352 /* Remove the ivs that are unused after rewriting. */
5353 remove_unused_ivs (data);
5355 /* We have changed the structure of induction variables; it might happen
5356 that definitions in the scev database refer to some of them that were
5357 eliminated. */
5358 scev_reset ();
5360 finish:
5361 free_loop_data (data);
5363 return changed;
5366 /* Main entry point. Optimizes induction variables in loops. */
5368 void
5369 tree_ssa_iv_optimize (void)
5371 struct loop *loop;
5372 struct ivopts_data data;
5373 loop_iterator li;
5375 tree_ssa_iv_optimize_init (&data);
5377 /* Optimize the loops starting with the innermost ones. */
5378 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5380 if (dump_file && (dump_flags & TDF_DETAILS))
5381 flow_loop_dump (loop, dump_file, NULL, 1);
5383 tree_ssa_iv_optimize_loop (&data, loop);
5386 tree_ssa_iv_optimize_finalize (&data);