re PR driver/31353 (gcc --help=target gives a linker error.)
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob6e952af1cd4067f54298fa0b244c365b4f545a62
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301, USA. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "pointer-set.h"
87 #include "hashtab.h"
88 #include "tree-chrec.h"
89 #include "tree-scalar-evolution.h"
90 #include "cfgloop.h"
91 #include "params.h"
92 #include "langhooks.h"
93 #include "tree-affine.h"
94 #include "target.h"
96 /* The infinite cost. */
97 #define INFTY 10000000
99 /* The expected number of loop iterations. TODO -- use profiling instead of
100 this. */
101 #define AVG_LOOP_NITER(LOOP) 5
104 /* Representation of the induction variable. */
105 struct iv
107 tree base; /* Initial value of the iv. */
108 tree base_object; /* A memory object to that the induction variable points. */
109 tree step; /* Step of the iv (constant only). */
110 tree ssa_name; /* The ssa name with the value. */
111 bool biv_p; /* Is it a biv? */
112 bool have_use_for; /* Do we already have a use for it? */
113 unsigned use_id; /* The identifier in the use if it is the case. */
116 /* Per-ssa version information (induction variable descriptions, etc.). */
117 struct version_info
119 tree name; /* The ssa name. */
120 struct iv *iv; /* Induction variable description. */
121 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
122 an expression that is not an induction variable. */
123 unsigned inv_id; /* Id of an invariant. */
124 bool preserve_biv; /* For the original biv, whether to preserve it. */
127 /* Types of uses. */
128 enum use_type
130 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
131 USE_ADDRESS, /* Use in an address. */
132 USE_COMPARE /* Use is a compare. */
135 /* The candidate - cost pair. */
136 struct cost_pair
138 struct iv_cand *cand; /* The candidate. */
139 unsigned cost; /* The cost. */
140 bitmap depends_on; /* The list of invariants that have to be
141 preserved. */
142 tree value; /* For final value elimination, the expression for
143 the final value of the iv. For iv elimination,
144 the new bound to compare with. */
147 /* Use. */
148 struct iv_use
150 unsigned id; /* The id of the use. */
151 enum use_type type; /* Type of the use. */
152 struct iv *iv; /* The induction variable it is based on. */
153 tree stmt; /* Statement in that it occurs. */
154 tree *op_p; /* The place where it occurs. */
155 bitmap related_cands; /* The set of "related" iv candidates, plus the common
156 important ones. */
158 unsigned n_map_members; /* Number of candidates in the cost_map list. */
159 struct cost_pair *cost_map;
160 /* The costs wrto the iv candidates. */
162 struct iv_cand *selected;
163 /* The selected candidate. */
166 /* The position where the iv is computed. */
167 enum iv_position
169 IP_NORMAL, /* At the end, just before the exit condition. */
170 IP_END, /* At the end of the latch block. */
171 IP_ORIGINAL /* The original biv. */
174 /* The induction variable candidate. */
175 struct iv_cand
177 unsigned id; /* The number of the candidate. */
178 bool important; /* Whether this is an "important" candidate, i.e. such
179 that it should be considered by all uses. */
180 enum iv_position pos; /* Where it is computed. */
181 tree incremented_at; /* For original biv, the statement where it is
182 incremented. */
183 tree var_before; /* The variable used for it before increment. */
184 tree var_after; /* The variable used for it after increment. */
185 struct iv *iv; /* The value of the candidate. NULL for
186 "pseudocandidate" used to indicate the possibility
187 to replace the final value of an iv by direct
188 computation of the value. */
189 unsigned cost; /* Cost of the candidate. */
190 bitmap depends_on; /* The list of invariants that are used in step of the
191 biv. */
194 /* The data used by the induction variable optimizations. */
196 typedef struct iv_use *iv_use_p;
197 DEF_VEC_P(iv_use_p);
198 DEF_VEC_ALLOC_P(iv_use_p,heap);
200 typedef struct iv_cand *iv_cand_p;
201 DEF_VEC_P(iv_cand_p);
202 DEF_VEC_ALLOC_P(iv_cand_p,heap);
204 struct ivopts_data
206 /* The currently optimized loop. */
207 struct loop *current_loop;
209 /* Number of registers used in it. */
210 unsigned regs_used;
212 /* Numbers of iterations for all exits of the current loop. */
213 struct pointer_map_t *niters;
215 /* The size of version_info array allocated. */
216 unsigned version_info_size;
218 /* The array of information for the ssa names. */
219 struct version_info *version_info;
221 /* The bitmap of indices in version_info whose value was changed. */
222 bitmap relevant;
224 /* The maximum invariant id. */
225 unsigned max_inv_id;
227 /* The uses of induction variables. */
228 VEC(iv_use_p,heap) *iv_uses;
230 /* The candidates. */
231 VEC(iv_cand_p,heap) *iv_candidates;
233 /* A bitmap of important candidates. */
234 bitmap important_candidates;
236 /* Whether to consider just related and important candidates when replacing a
237 use. */
238 bool consider_all_candidates;
241 /* An assignment of iv candidates to uses. */
243 struct iv_ca
245 /* The number of uses covered by the assignment. */
246 unsigned upto;
248 /* Number of uses that cannot be expressed by the candidates in the set. */
249 unsigned bad_uses;
251 /* Candidate assigned to a use, together with the related costs. */
252 struct cost_pair **cand_for_use;
254 /* Number of times each candidate is used. */
255 unsigned *n_cand_uses;
257 /* The candidates used. */
258 bitmap cands;
260 /* The number of candidates in the set. */
261 unsigned n_cands;
263 /* Total number of registers needed. */
264 unsigned n_regs;
266 /* Total cost of expressing uses. */
267 unsigned cand_use_cost;
269 /* Total cost of candidates. */
270 unsigned cand_cost;
272 /* Number of times each invariant is used. */
273 unsigned *n_invariant_uses;
275 /* Total cost of the assignment. */
276 unsigned cost;
279 /* Difference of two iv candidate assignments. */
281 struct iv_ca_delta
283 /* Changed use. */
284 struct iv_use *use;
286 /* An old assignment (for rollback purposes). */
287 struct cost_pair *old_cp;
289 /* A new assignment. */
290 struct cost_pair *new_cp;
292 /* Next change in the list. */
293 struct iv_ca_delta *next_change;
296 /* Bound on number of candidates below that all candidates are considered. */
298 #define CONSIDER_ALL_CANDIDATES_BOUND \
299 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
301 /* If there are more iv occurrences, we just give up (it is quite unlikely that
302 optimizing such a loop would help, and it would take ages). */
304 #define MAX_CONSIDERED_USES \
305 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
307 /* If there are at most this number of ivs in the set, try removing unnecessary
308 ivs from the set always. */
310 #define ALWAYS_PRUNE_CAND_SET_BOUND \
311 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
313 /* The list of trees for that the decl_rtl field must be reset is stored
314 here. */
316 static VEC(tree,heap) *decl_rtl_to_reset;
318 /* Number of uses recorded in DATA. */
320 static inline unsigned
321 n_iv_uses (struct ivopts_data *data)
323 return VEC_length (iv_use_p, data->iv_uses);
326 /* Ith use recorded in DATA. */
328 static inline struct iv_use *
329 iv_use (struct ivopts_data *data, unsigned i)
331 return VEC_index (iv_use_p, data->iv_uses, i);
334 /* Number of candidates recorded in DATA. */
336 static inline unsigned
337 n_iv_cands (struct ivopts_data *data)
339 return VEC_length (iv_cand_p, data->iv_candidates);
342 /* Ith candidate recorded in DATA. */
344 static inline struct iv_cand *
345 iv_cand (struct ivopts_data *data, unsigned i)
347 return VEC_index (iv_cand_p, data->iv_candidates, i);
350 /* The single loop exit if it dominates the latch, NULL otherwise. */
352 edge
353 single_dom_exit (struct loop *loop)
355 edge exit = single_exit (loop);
357 if (!exit)
358 return NULL;
360 if (!just_once_each_iteration_p (loop, exit->src))
361 return NULL;
363 return exit;
366 /* Dumps information about the induction variable IV to FILE. */
368 extern void dump_iv (FILE *, struct iv *);
369 void
370 dump_iv (FILE *file, struct iv *iv)
372 if (iv->ssa_name)
374 fprintf (file, "ssa name ");
375 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
376 fprintf (file, "\n");
379 fprintf (file, " type ");
380 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
381 fprintf (file, "\n");
383 if (iv->step)
385 fprintf (file, " base ");
386 print_generic_expr (file, iv->base, TDF_SLIM);
387 fprintf (file, "\n");
389 fprintf (file, " step ");
390 print_generic_expr (file, iv->step, TDF_SLIM);
391 fprintf (file, "\n");
393 else
395 fprintf (file, " invariant ");
396 print_generic_expr (file, iv->base, TDF_SLIM);
397 fprintf (file, "\n");
400 if (iv->base_object)
402 fprintf (file, " base object ");
403 print_generic_expr (file, iv->base_object, TDF_SLIM);
404 fprintf (file, "\n");
407 if (iv->biv_p)
408 fprintf (file, " is a biv\n");
411 /* Dumps information about the USE to FILE. */
413 extern void dump_use (FILE *, struct iv_use *);
414 void
415 dump_use (FILE *file, struct iv_use *use)
417 fprintf (file, "use %d\n", use->id);
419 switch (use->type)
421 case USE_NONLINEAR_EXPR:
422 fprintf (file, " generic\n");
423 break;
425 case USE_ADDRESS:
426 fprintf (file, " address\n");
427 break;
429 case USE_COMPARE:
430 fprintf (file, " compare\n");
431 break;
433 default:
434 gcc_unreachable ();
437 fprintf (file, " in statement ");
438 print_generic_expr (file, use->stmt, TDF_SLIM);
439 fprintf (file, "\n");
441 fprintf (file, " at position ");
442 if (use->op_p)
443 print_generic_expr (file, *use->op_p, TDF_SLIM);
444 fprintf (file, "\n");
446 dump_iv (file, use->iv);
448 if (use->related_cands)
450 fprintf (file, " related candidates ");
451 dump_bitmap (file, use->related_cands);
455 /* Dumps information about the uses to FILE. */
457 extern void dump_uses (FILE *, struct ivopts_data *);
458 void
459 dump_uses (FILE *file, struct ivopts_data *data)
461 unsigned i;
462 struct iv_use *use;
464 for (i = 0; i < n_iv_uses (data); i++)
466 use = iv_use (data, i);
468 dump_use (file, use);
469 fprintf (file, "\n");
473 /* Dumps information about induction variable candidate CAND to FILE. */
475 extern void dump_cand (FILE *, struct iv_cand *);
476 void
477 dump_cand (FILE *file, struct iv_cand *cand)
479 struct iv *iv = cand->iv;
481 fprintf (file, "candidate %d%s\n",
482 cand->id, cand->important ? " (important)" : "");
484 if (cand->depends_on)
486 fprintf (file, " depends on ");
487 dump_bitmap (file, cand->depends_on);
490 if (!iv)
492 fprintf (file, " final value replacement\n");
493 return;
496 switch (cand->pos)
498 case IP_NORMAL:
499 fprintf (file, " incremented before exit test\n");
500 break;
502 case IP_END:
503 fprintf (file, " incremented at end\n");
504 break;
506 case IP_ORIGINAL:
507 fprintf (file, " original biv\n");
508 break;
511 dump_iv (file, iv);
514 /* Returns the info for ssa version VER. */
516 static inline struct version_info *
517 ver_info (struct ivopts_data *data, unsigned ver)
519 return data->version_info + ver;
522 /* Returns the info for ssa name NAME. */
524 static inline struct version_info *
525 name_info (struct ivopts_data *data, tree name)
527 return ver_info (data, SSA_NAME_VERSION (name));
530 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
531 emitted in LOOP. */
533 static bool
534 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
536 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
538 gcc_assert (bb);
540 if (sbb == loop->latch)
541 return true;
543 if (sbb != bb)
544 return false;
546 return stmt == last_stmt (bb);
549 /* Returns true if STMT if after the place where the original induction
550 variable CAND is incremented. */
552 static bool
553 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
555 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
556 basic_block stmt_bb = bb_for_stmt (stmt);
557 block_stmt_iterator bsi;
559 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
560 return false;
562 if (stmt_bb != cand_bb)
563 return true;
565 /* Scan the block from the end, since the original ivs are usually
566 incremented at the end of the loop body. */
567 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
569 if (bsi_stmt (bsi) == cand->incremented_at)
570 return false;
571 if (bsi_stmt (bsi) == stmt)
572 return true;
576 /* Returns true if STMT if after the place where the induction variable
577 CAND is incremented in LOOP. */
579 static bool
580 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
582 switch (cand->pos)
584 case IP_END:
585 return false;
587 case IP_NORMAL:
588 return stmt_after_ip_normal_pos (loop, stmt);
590 case IP_ORIGINAL:
591 return stmt_after_ip_original_pos (cand, stmt);
593 default:
594 gcc_unreachable ();
598 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
600 static bool
601 abnormal_ssa_name_p (tree exp)
603 if (!exp)
604 return false;
606 if (TREE_CODE (exp) != SSA_NAME)
607 return false;
609 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
612 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
613 abnormal phi node. Callback for for_each_index. */
615 static bool
616 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
617 void *data ATTRIBUTE_UNUSED)
619 if (TREE_CODE (base) == ARRAY_REF)
621 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
622 return false;
623 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
624 return false;
627 return !abnormal_ssa_name_p (*index);
630 /* Returns true if EXPR contains a ssa name that occurs in an
631 abnormal phi node. */
633 bool
634 contains_abnormal_ssa_name_p (tree expr)
636 enum tree_code code;
637 enum tree_code_class class;
639 if (!expr)
640 return false;
642 code = TREE_CODE (expr);
643 class = TREE_CODE_CLASS (code);
645 if (code == SSA_NAME)
646 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
648 if (code == INTEGER_CST
649 || is_gimple_min_invariant (expr))
650 return false;
652 if (code == ADDR_EXPR)
653 return !for_each_index (&TREE_OPERAND (expr, 0),
654 idx_contains_abnormal_ssa_name_p,
655 NULL);
657 switch (class)
659 case tcc_binary:
660 case tcc_comparison:
661 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
662 return true;
664 /* Fallthru. */
665 case tcc_unary:
666 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
667 return true;
669 break;
671 default:
672 gcc_unreachable ();
675 return false;
678 /* Returns tree describing number of iterations determined from
679 EXIT of DATA->current_loop, or NULL if something goes wrong. */
681 static tree
682 niter_for_exit (struct ivopts_data *data, edge exit)
684 struct tree_niter_desc desc;
685 tree niter;
686 void **slot;
688 if (!data->niters)
690 data->niters = pointer_map_create ();
691 slot = NULL;
693 else
694 slot = pointer_map_contains (data->niters, exit);
696 if (!slot)
698 /* Try to determine number of iterations. We must know it
699 unconditionally (i.e., without possibility of # of iterations
700 being zero). Also, we cannot safely work with ssa names that
701 appear in phi nodes on abnormal edges, so that we do not create
702 overlapping life ranges for them (PR 27283). */
703 if (number_of_iterations_exit (data->current_loop,
704 exit, &desc, true)
705 && integer_zerop (desc.may_be_zero)
706 && !contains_abnormal_ssa_name_p (desc.niter))
707 niter = desc.niter;
708 else
709 niter = NULL_TREE;
711 *pointer_map_insert (data->niters, exit) = niter;
713 else
714 niter = *slot;
716 return niter;
719 /* Returns tree describing number of iterations determined from
720 single dominating exit of DATA->current_loop, or NULL if something
721 goes wrong. */
723 static tree
724 niter_for_single_dom_exit (struct ivopts_data *data)
726 edge exit = single_dom_exit (data->current_loop);
728 if (!exit)
729 return NULL;
731 return niter_for_exit (data, exit);
734 /* Initializes data structures used by the iv optimization pass, stored
735 in DATA. */
737 static void
738 tree_ssa_iv_optimize_init (struct ivopts_data *data)
740 data->version_info_size = 2 * num_ssa_names;
741 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
742 data->relevant = BITMAP_ALLOC (NULL);
743 data->important_candidates = BITMAP_ALLOC (NULL);
744 data->max_inv_id = 0;
745 data->niters = NULL;
746 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
747 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
748 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
751 /* Returns a memory object to that EXPR points. In case we are able to
752 determine that it does not point to any such object, NULL is returned. */
754 static tree
755 determine_base_object (tree expr)
757 enum tree_code code = TREE_CODE (expr);
758 tree base, obj, op0, op1;
760 /* If this is a pointer casted to any type, we need to determine
761 the base object for the pointer; so handle conversions before
762 throwing away non-pointer expressions. */
763 if (TREE_CODE (expr) == NOP_EXPR
764 || TREE_CODE (expr) == CONVERT_EXPR)
765 return determine_base_object (TREE_OPERAND (expr, 0));
767 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
768 return NULL_TREE;
770 switch (code)
772 case INTEGER_CST:
773 return NULL_TREE;
775 case ADDR_EXPR:
776 obj = TREE_OPERAND (expr, 0);
777 base = get_base_address (obj);
779 if (!base)
780 return expr;
782 if (TREE_CODE (base) == INDIRECT_REF)
783 return determine_base_object (TREE_OPERAND (base, 0));
785 return fold_convert (ptr_type_node,
786 build_fold_addr_expr (base));
788 case PLUS_EXPR:
789 case MINUS_EXPR:
790 op0 = determine_base_object (TREE_OPERAND (expr, 0));
791 op1 = determine_base_object (TREE_OPERAND (expr, 1));
793 if (!op1)
794 return op0;
796 if (!op0)
797 return (code == PLUS_EXPR
798 ? op1
799 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
801 return fold_build2 (code, ptr_type_node, op0, op1);
803 default:
804 return fold_convert (ptr_type_node, expr);
808 /* Allocates an induction variable with given initial value BASE and step STEP
809 for loop LOOP. */
811 static struct iv *
812 alloc_iv (tree base, tree step)
814 struct iv *iv = XCNEW (struct iv);
815 gcc_assert (step != NULL_TREE);
817 iv->base = base;
818 iv->base_object = determine_base_object (base);
819 iv->step = step;
820 iv->biv_p = false;
821 iv->have_use_for = false;
822 iv->use_id = 0;
823 iv->ssa_name = NULL_TREE;
825 return iv;
828 /* Sets STEP and BASE for induction variable IV. */
830 static void
831 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
833 struct version_info *info = name_info (data, iv);
835 gcc_assert (!info->iv);
837 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
838 info->iv = alloc_iv (base, step);
839 info->iv->ssa_name = iv;
842 /* Finds induction variable declaration for VAR. */
844 static struct iv *
845 get_iv (struct ivopts_data *data, tree var)
847 basic_block bb;
848 tree type = TREE_TYPE (var);
850 if (!POINTER_TYPE_P (type)
851 && !INTEGRAL_TYPE_P (type))
852 return NULL;
854 if (!name_info (data, var)->iv)
856 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
858 if (!bb
859 || !flow_bb_inside_loop_p (data->current_loop, bb))
860 set_iv (data, var, var, build_int_cst (type, 0));
863 return name_info (data, var)->iv;
866 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
867 not define a simple affine biv with nonzero step. */
869 static tree
870 determine_biv_step (tree phi)
872 struct loop *loop = bb_for_stmt (phi)->loop_father;
873 tree name = PHI_RESULT (phi);
874 affine_iv iv;
876 if (!is_gimple_reg (name))
877 return NULL_TREE;
879 if (!simple_iv (loop, phi, name, &iv, true))
880 return NULL_TREE;
882 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
885 /* Finds basic ivs. */
887 static bool
888 find_bivs (struct ivopts_data *data)
890 tree phi, step, type, base;
891 bool found = false;
892 struct loop *loop = data->current_loop;
894 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
896 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
897 continue;
899 step = determine_biv_step (phi);
900 if (!step)
901 continue;
903 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
904 base = expand_simple_operations (base);
905 if (contains_abnormal_ssa_name_p (base)
906 || contains_abnormal_ssa_name_p (step))
907 continue;
909 type = TREE_TYPE (PHI_RESULT (phi));
910 base = fold_convert (type, base);
911 if (step)
912 step = fold_convert (type, step);
914 set_iv (data, PHI_RESULT (phi), base, step);
915 found = true;
918 return found;
921 /* Marks basic ivs. */
923 static void
924 mark_bivs (struct ivopts_data *data)
926 tree phi, var;
927 struct iv *iv, *incr_iv;
928 struct loop *loop = data->current_loop;
929 basic_block incr_bb;
931 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
933 iv = get_iv (data, PHI_RESULT (phi));
934 if (!iv)
935 continue;
937 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
938 incr_iv = get_iv (data, var);
939 if (!incr_iv)
940 continue;
942 /* If the increment is in the subloop, ignore it. */
943 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
944 if (incr_bb->loop_father != data->current_loop
945 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
946 continue;
948 iv->biv_p = true;
949 incr_iv->biv_p = true;
953 /* Checks whether STMT defines a linear induction variable and stores its
954 parameters to IV. */
956 static bool
957 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
959 tree lhs;
960 struct loop *loop = data->current_loop;
962 iv->base = NULL_TREE;
963 iv->step = NULL_TREE;
965 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
966 return false;
968 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
969 if (TREE_CODE (lhs) != SSA_NAME)
970 return false;
972 if (!simple_iv (loop, stmt, GIMPLE_STMT_OPERAND (stmt, 1), iv, true))
973 return false;
974 iv->base = expand_simple_operations (iv->base);
976 if (contains_abnormal_ssa_name_p (iv->base)
977 || contains_abnormal_ssa_name_p (iv->step))
978 return false;
980 return true;
983 /* Finds general ivs in statement STMT. */
985 static void
986 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
988 affine_iv iv;
990 if (!find_givs_in_stmt_scev (data, stmt, &iv))
991 return;
993 set_iv (data, GIMPLE_STMT_OPERAND (stmt, 0), iv.base, iv.step);
996 /* Finds general ivs in basic block BB. */
998 static void
999 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1001 block_stmt_iterator bsi;
1003 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1004 find_givs_in_stmt (data, bsi_stmt (bsi));
1007 /* Finds general ivs. */
1009 static void
1010 find_givs (struct ivopts_data *data)
1012 struct loop *loop = data->current_loop;
1013 basic_block *body = get_loop_body_in_dom_order (loop);
1014 unsigned i;
1016 for (i = 0; i < loop->num_nodes; i++)
1017 find_givs_in_bb (data, body[i]);
1018 free (body);
1021 /* For each ssa name defined in LOOP determines whether it is an induction
1022 variable and if so, its initial value and step. */
1024 static bool
1025 find_induction_variables (struct ivopts_data *data)
1027 unsigned i;
1028 bitmap_iterator bi;
1030 if (!find_bivs (data))
1031 return false;
1033 find_givs (data);
1034 mark_bivs (data);
1036 if (dump_file && (dump_flags & TDF_DETAILS))
1038 tree niter = niter_for_single_dom_exit (data);
1040 if (niter)
1042 fprintf (dump_file, " number of iterations ");
1043 print_generic_expr (dump_file, niter, TDF_SLIM);
1044 fprintf (dump_file, "\n\n");
1047 fprintf (dump_file, "Induction variables:\n\n");
1049 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1051 if (ver_info (data, i)->iv)
1052 dump_iv (dump_file, ver_info (data, i)->iv);
1056 return true;
1059 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1061 static struct iv_use *
1062 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1063 tree stmt, enum use_type use_type)
1065 struct iv_use *use = XCNEW (struct iv_use);
1067 use->id = n_iv_uses (data);
1068 use->type = use_type;
1069 use->iv = iv;
1070 use->stmt = stmt;
1071 use->op_p = use_p;
1072 use->related_cands = BITMAP_ALLOC (NULL);
1074 /* To avoid showing ssa name in the dumps, if it was not reset by the
1075 caller. */
1076 iv->ssa_name = NULL_TREE;
1078 if (dump_file && (dump_flags & TDF_DETAILS))
1079 dump_use (dump_file, use);
1081 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1083 return use;
1086 /* Checks whether OP is a loop-level invariant and if so, records it.
1087 NONLINEAR_USE is true if the invariant is used in a way we do not
1088 handle specially. */
1090 static void
1091 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1093 basic_block bb;
1094 struct version_info *info;
1096 if (TREE_CODE (op) != SSA_NAME
1097 || !is_gimple_reg (op))
1098 return;
1100 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1101 if (bb
1102 && flow_bb_inside_loop_p (data->current_loop, bb))
1103 return;
1105 info = name_info (data, op);
1106 info->name = op;
1107 info->has_nonlin_use |= nonlinear_use;
1108 if (!info->inv_id)
1109 info->inv_id = ++data->max_inv_id;
1110 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1113 /* Checks whether the use OP is interesting and if so, records it. */
1115 static struct iv_use *
1116 find_interesting_uses_op (struct ivopts_data *data, tree op)
1118 struct iv *iv;
1119 struct iv *civ;
1120 tree stmt;
1121 struct iv_use *use;
1123 if (TREE_CODE (op) != SSA_NAME)
1124 return NULL;
1126 iv = get_iv (data, op);
1127 if (!iv)
1128 return NULL;
1130 if (iv->have_use_for)
1132 use = iv_use (data, iv->use_id);
1134 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1135 return use;
1138 if (integer_zerop (iv->step))
1140 record_invariant (data, op, true);
1141 return NULL;
1143 iv->have_use_for = true;
1145 civ = XNEW (struct iv);
1146 *civ = *iv;
1148 stmt = SSA_NAME_DEF_STMT (op);
1149 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1150 || TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
1152 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1153 iv->use_id = use->id;
1155 return use;
1158 /* Given a condition *COND_P, checks whether it is a compare of an induction
1159 variable and an invariant. If this is the case, CONTROL_VAR is set
1160 to location of the iv, BOUND to the location of the invariant,
1161 IV_VAR and IV_BOUND are set to the corresponding induction variable
1162 descriptions, and true is returned. If this is not the case,
1163 CONTROL_VAR and BOUND are set to the arguments of the condition and
1164 false is returned. */
1166 static bool
1167 extract_cond_operands (struct ivopts_data *data, tree *cond_p,
1168 tree **control_var, tree **bound,
1169 struct iv **iv_var, struct iv **iv_bound)
1171 /* The nodes returned when COND has just one operand. Note that you should
1172 not modify anything in BOUND or IV_BOUND because of this. */
1173 static struct iv const_iv;
1174 static tree zero;
1175 tree cond = *cond_p;
1176 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1177 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1178 bool ret = false;
1180 zero = integer_zero_node;
1181 const_iv.step = integer_zero_node;
1183 if (TREE_CODE (cond) == SSA_NAME)
1185 op0 = cond_p;
1186 iv0 = get_iv (data, cond);
1187 ret = (iv0 && !integer_zerop (iv0->step));
1188 goto end;
1191 if (!COMPARISON_CLASS_P (cond))
1193 op0 = cond_p;
1194 goto end;
1197 op0 = &TREE_OPERAND (cond, 0);
1198 op1 = &TREE_OPERAND (cond, 1);
1199 if (TREE_CODE (*op0) == SSA_NAME)
1200 iv0 = get_iv (data, *op0);
1201 if (TREE_CODE (*op1) == SSA_NAME)
1202 iv1 = get_iv (data, *op1);
1204 /* Exactly one of the compared values must be an iv, and the other one must
1205 be an invariant. */
1206 if (!iv0 || !iv1)
1207 goto end;
1209 if (integer_zerop (iv0->step))
1211 /* Control variable may be on the other side. */
1212 tmp_op = op0; op0 = op1; op1 = tmp_op;
1213 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1215 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1217 end:
1218 if (control_var)
1219 *control_var = op0;;
1220 if (iv_var)
1221 *iv_var = iv0;;
1222 if (bound)
1223 *bound = op1;
1224 if (iv_bound)
1225 *iv_bound = iv1;
1227 return ret;
1230 /* Checks whether the condition *COND_P in STMT is interesting
1231 and if so, records it. */
1233 static void
1234 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1236 tree *var_p, *bound_p;
1237 struct iv *var_iv, *civ;
1239 if (!extract_cond_operands (data, cond_p, &var_p, &bound_p, &var_iv, NULL))
1241 find_interesting_uses_op (data, *var_p);
1242 find_interesting_uses_op (data, *bound_p);
1243 return;
1246 civ = XNEW (struct iv);
1247 *civ = *var_iv;
1248 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1251 /* Returns true if expression EXPR is obviously invariant in LOOP,
1252 i.e. if all its operands are defined outside of the LOOP. */
1254 bool
1255 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1257 basic_block def_bb;
1258 unsigned i, len;
1260 if (is_gimple_min_invariant (expr))
1261 return true;
1263 if (TREE_CODE (expr) == SSA_NAME)
1265 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1266 if (def_bb
1267 && flow_bb_inside_loop_p (loop, def_bb))
1268 return false;
1270 return true;
1273 if (!EXPR_P (expr) && !GIMPLE_STMT_P (expr))
1274 return false;
1276 len = TREE_OPERAND_LENGTH (expr);
1277 for (i = 0; i < len; i++)
1278 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1279 return false;
1281 return true;
1284 /* Cumulates the steps of indices into DATA and replaces their values with the
1285 initial ones. Returns false when the value of the index cannot be determined.
1286 Callback for for_each_index. */
1288 struct ifs_ivopts_data
1290 struct ivopts_data *ivopts_data;
1291 tree stmt;
1292 tree step;
1295 static bool
1296 idx_find_step (tree base, tree *idx, void *data)
1298 struct ifs_ivopts_data *dta = data;
1299 struct iv *iv;
1300 tree step, iv_base, iv_step, lbound, off;
1301 struct loop *loop = dta->ivopts_data->current_loop;
1303 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1304 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1305 return false;
1307 /* If base is a component ref, require that the offset of the reference
1308 be invariant. */
1309 if (TREE_CODE (base) == COMPONENT_REF)
1311 off = component_ref_field_offset (base);
1312 return expr_invariant_in_loop_p (loop, off);
1315 /* If base is array, first check whether we will be able to move the
1316 reference out of the loop (in order to take its address in strength
1317 reduction). In order for this to work we need both lower bound
1318 and step to be loop invariants. */
1319 if (TREE_CODE (base) == ARRAY_REF)
1321 step = array_ref_element_size (base);
1322 lbound = array_ref_low_bound (base);
1324 if (!expr_invariant_in_loop_p (loop, step)
1325 || !expr_invariant_in_loop_p (loop, lbound))
1326 return false;
1329 if (TREE_CODE (*idx) != SSA_NAME)
1330 return true;
1332 iv = get_iv (dta->ivopts_data, *idx);
1333 if (!iv)
1334 return false;
1336 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1337 *&x[0], which is not folded and does not trigger the
1338 ARRAY_REF path below. */
1339 *idx = iv->base;
1341 if (integer_zerop (iv->step))
1342 return true;
1344 if (TREE_CODE (base) == ARRAY_REF)
1346 step = array_ref_element_size (base);
1348 /* We only handle addresses whose step is an integer constant. */
1349 if (TREE_CODE (step) != INTEGER_CST)
1350 return false;
1352 else
1353 /* The step for pointer arithmetics already is 1 byte. */
1354 step = build_int_cst (sizetype, 1);
1356 iv_base = iv->base;
1357 iv_step = iv->step;
1358 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1359 sizetype, &iv_base, &iv_step, dta->stmt,
1360 false))
1362 /* The index might wrap. */
1363 return false;
1366 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1367 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1369 return true;
1372 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1373 object is passed to it in DATA. */
1375 static bool
1376 idx_record_use (tree base, tree *idx,
1377 void *data)
1379 find_interesting_uses_op (data, *idx);
1380 if (TREE_CODE (base) == ARRAY_REF)
1382 find_interesting_uses_op (data, array_ref_element_size (base));
1383 find_interesting_uses_op (data, array_ref_low_bound (base));
1385 return true;
1388 /* Returns true if memory reference REF may be unaligned. */
1390 static bool
1391 may_be_unaligned_p (tree ref)
1393 tree base;
1394 tree base_type;
1395 HOST_WIDE_INT bitsize;
1396 HOST_WIDE_INT bitpos;
1397 tree toffset;
1398 enum machine_mode mode;
1399 int unsignedp, volatilep;
1400 unsigned base_align;
1402 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1403 thus they are not misaligned. */
1404 if (TREE_CODE (ref) == TARGET_MEM_REF)
1405 return false;
1407 /* The test below is basically copy of what expr.c:normal_inner_ref
1408 does to check whether the object must be loaded by parts when
1409 STRICT_ALIGNMENT is true. */
1410 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1411 &unsignedp, &volatilep, true);
1412 base_type = TREE_TYPE (base);
1413 base_align = TYPE_ALIGN (base_type);
1415 if (mode != BLKmode
1416 && (base_align < GET_MODE_ALIGNMENT (mode)
1417 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1418 || bitpos % BITS_PER_UNIT != 0))
1419 return true;
1421 return false;
1424 /* Return true if EXPR may be non-addressable. */
1426 static bool
1427 may_be_nonaddressable_p (tree expr)
1429 switch (TREE_CODE (expr))
1431 case COMPONENT_REF:
1432 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1433 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1435 case ARRAY_REF:
1436 case ARRAY_RANGE_REF:
1437 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1439 case VIEW_CONVERT_EXPR:
1440 /* This kind of view-conversions may wrap non-addressable objects
1441 and make them look addressable. After some processing the
1442 non-addressability may be uncovered again, causing ADDR_EXPRs
1443 of inappropriate objects to be built. */
1444 return AGGREGATE_TYPE_P (TREE_TYPE (expr))
1445 && !AGGREGATE_TYPE_P (TREE_TYPE (TREE_OPERAND (expr, 0)));
1447 default:
1448 break;
1451 return false;
1454 /* Finds addresses in *OP_P inside STMT. */
1456 static void
1457 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1459 tree base = *op_p, step = build_int_cst (sizetype, 0);
1460 struct iv *civ;
1461 struct ifs_ivopts_data ifs_ivopts_data;
1463 /* Do not play with volatile memory references. A bit too conservative,
1464 perhaps, but safe. */
1465 if (stmt_ann (stmt)->has_volatile_ops)
1466 goto fail;
1468 /* Ignore bitfields for now. Not really something terribly complicated
1469 to handle. TODO. */
1470 if (TREE_CODE (base) == BIT_FIELD_REF)
1471 goto fail;
1473 if (may_be_nonaddressable_p (base))
1474 goto fail;
1476 if (STRICT_ALIGNMENT
1477 && may_be_unaligned_p (base))
1478 goto fail;
1480 base = unshare_expr (base);
1482 if (TREE_CODE (base) == TARGET_MEM_REF)
1484 tree type = build_pointer_type (TREE_TYPE (base));
1485 tree astep;
1487 if (TMR_BASE (base)
1488 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1490 civ = get_iv (data, TMR_BASE (base));
1491 if (!civ)
1492 goto fail;
1494 TMR_BASE (base) = civ->base;
1495 step = civ->step;
1497 if (TMR_INDEX (base)
1498 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1500 civ = get_iv (data, TMR_INDEX (base));
1501 if (!civ)
1502 goto fail;
1504 TMR_INDEX (base) = civ->base;
1505 astep = civ->step;
1507 if (astep)
1509 if (TMR_STEP (base))
1510 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1512 step = fold_build2 (PLUS_EXPR, type, step, astep);
1516 if (integer_zerop (step))
1517 goto fail;
1518 base = tree_mem_ref_addr (type, base);
1520 else
1522 ifs_ivopts_data.ivopts_data = data;
1523 ifs_ivopts_data.stmt = stmt;
1524 ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1525 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1526 || integer_zerop (ifs_ivopts_data.step))
1527 goto fail;
1528 step = ifs_ivopts_data.step;
1530 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1531 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1533 base = build_fold_addr_expr (base);
1535 /* Substituting bases of IVs into the base expression might
1536 have caused folding opportunities. */
1537 if (TREE_CODE (base) == ADDR_EXPR)
1539 tree *ref = &TREE_OPERAND (base, 0);
1540 while (handled_component_p (*ref))
1541 ref = &TREE_OPERAND (*ref, 0);
1542 if (TREE_CODE (*ref) == INDIRECT_REF)
1543 *ref = fold_indirect_ref (*ref);
1547 civ = alloc_iv (base, step);
1548 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1549 return;
1551 fail:
1552 for_each_index (op_p, idx_record_use, data);
1555 /* Finds and records invariants used in STMT. */
1557 static void
1558 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1560 ssa_op_iter iter;
1561 use_operand_p use_p;
1562 tree op;
1564 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1566 op = USE_FROM_PTR (use_p);
1567 record_invariant (data, op, false);
1571 /* Finds interesting uses of induction variables in the statement STMT. */
1573 static void
1574 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1576 struct iv *iv;
1577 tree op, lhs, rhs;
1578 ssa_op_iter iter;
1579 use_operand_p use_p;
1581 find_invariants_stmt (data, stmt);
1583 if (TREE_CODE (stmt) == COND_EXPR)
1585 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1586 return;
1589 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
1591 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
1592 rhs = GIMPLE_STMT_OPERAND (stmt, 1);
1594 if (TREE_CODE (lhs) == SSA_NAME)
1596 /* If the statement defines an induction variable, the uses are not
1597 interesting by themselves. */
1599 iv = get_iv (data, lhs);
1601 if (iv && !integer_zerop (iv->step))
1602 return;
1605 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1607 case tcc_comparison:
1608 find_interesting_uses_cond (data, stmt,
1609 &GIMPLE_STMT_OPERAND (stmt, 1));
1610 return;
1612 case tcc_reference:
1613 find_interesting_uses_address (data, stmt,
1614 &GIMPLE_STMT_OPERAND (stmt, 1));
1615 if (REFERENCE_CLASS_P (lhs))
1616 find_interesting_uses_address (data, stmt,
1617 &GIMPLE_STMT_OPERAND (stmt, 0));
1618 return;
1620 default: ;
1623 if (REFERENCE_CLASS_P (lhs)
1624 && is_gimple_val (rhs))
1626 find_interesting_uses_address (data, stmt,
1627 &GIMPLE_STMT_OPERAND (stmt, 0));
1628 find_interesting_uses_op (data, rhs);
1629 return;
1632 /* TODO -- we should also handle address uses of type
1634 memory = call (whatever);
1638 call (memory). */
1641 if (TREE_CODE (stmt) == PHI_NODE
1642 && bb_for_stmt (stmt) == data->current_loop->header)
1644 lhs = PHI_RESULT (stmt);
1645 iv = get_iv (data, lhs);
1647 if (iv && !integer_zerop (iv->step))
1648 return;
1651 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1653 op = USE_FROM_PTR (use_p);
1655 if (TREE_CODE (op) != SSA_NAME)
1656 continue;
1658 iv = get_iv (data, op);
1659 if (!iv)
1660 continue;
1662 find_interesting_uses_op (data, op);
1666 /* Finds interesting uses of induction variables outside of loops
1667 on loop exit edge EXIT. */
1669 static void
1670 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1672 tree phi, def;
1674 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1676 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1677 if (is_gimple_reg (def))
1678 find_interesting_uses_op (data, def);
1682 /* Finds uses of the induction variables that are interesting. */
1684 static void
1685 find_interesting_uses (struct ivopts_data *data)
1687 basic_block bb;
1688 block_stmt_iterator bsi;
1689 tree phi;
1690 basic_block *body = get_loop_body (data->current_loop);
1691 unsigned i;
1692 struct version_info *info;
1693 edge e;
1695 if (dump_file && (dump_flags & TDF_DETAILS))
1696 fprintf (dump_file, "Uses:\n\n");
1698 for (i = 0; i < data->current_loop->num_nodes; i++)
1700 edge_iterator ei;
1701 bb = body[i];
1703 FOR_EACH_EDGE (e, ei, bb->succs)
1704 if (e->dest != EXIT_BLOCK_PTR
1705 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1706 find_interesting_uses_outside (data, e);
1708 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1709 find_interesting_uses_stmt (data, phi);
1710 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1711 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1714 if (dump_file && (dump_flags & TDF_DETAILS))
1716 bitmap_iterator bi;
1718 fprintf (dump_file, "\n");
1720 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1722 info = ver_info (data, i);
1723 if (info->inv_id)
1725 fprintf (dump_file, " ");
1726 print_generic_expr (dump_file, info->name, TDF_SLIM);
1727 fprintf (dump_file, " is invariant (%d)%s\n",
1728 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1732 fprintf (dump_file, "\n");
1735 free (body);
1738 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1739 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1740 we are at the top-level of the processed address. */
1742 static tree
1743 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1744 unsigned HOST_WIDE_INT *offset)
1746 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1747 enum tree_code code;
1748 tree type, orig_type = TREE_TYPE (expr);
1749 unsigned HOST_WIDE_INT off0, off1, st;
1750 tree orig_expr = expr;
1752 STRIP_NOPS (expr);
1754 type = TREE_TYPE (expr);
1755 code = TREE_CODE (expr);
1756 *offset = 0;
1758 switch (code)
1760 case INTEGER_CST:
1761 if (!cst_and_fits_in_hwi (expr)
1762 || integer_zerop (expr))
1763 return orig_expr;
1765 *offset = int_cst_value (expr);
1766 return build_int_cst (orig_type, 0);
1768 case PLUS_EXPR:
1769 case MINUS_EXPR:
1770 op0 = TREE_OPERAND (expr, 0);
1771 op1 = TREE_OPERAND (expr, 1);
1773 op0 = strip_offset_1 (op0, false, false, &off0);
1774 op1 = strip_offset_1 (op1, false, false, &off1);
1776 *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1777 if (op0 == TREE_OPERAND (expr, 0)
1778 && op1 == TREE_OPERAND (expr, 1))
1779 return orig_expr;
1781 if (integer_zerop (op1))
1782 expr = op0;
1783 else if (integer_zerop (op0))
1785 if (code == PLUS_EXPR)
1786 expr = op1;
1787 else
1788 expr = fold_build1 (NEGATE_EXPR, type, op1);
1790 else
1791 expr = fold_build2 (code, type, op0, op1);
1793 return fold_convert (orig_type, expr);
1795 case ARRAY_REF:
1796 if (!inside_addr)
1797 return orig_expr;
1799 step = array_ref_element_size (expr);
1800 if (!cst_and_fits_in_hwi (step))
1801 break;
1803 st = int_cst_value (step);
1804 op1 = TREE_OPERAND (expr, 1);
1805 op1 = strip_offset_1 (op1, false, false, &off1);
1806 *offset = off1 * st;
1808 if (top_compref
1809 && integer_zerop (op1))
1811 /* Strip the component reference completely. */
1812 op0 = TREE_OPERAND (expr, 0);
1813 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1814 *offset += off0;
1815 return op0;
1817 break;
1819 case COMPONENT_REF:
1820 if (!inside_addr)
1821 return orig_expr;
1823 tmp = component_ref_field_offset (expr);
1824 if (top_compref
1825 && cst_and_fits_in_hwi (tmp))
1827 /* Strip the component reference completely. */
1828 op0 = TREE_OPERAND (expr, 0);
1829 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1830 *offset = off0 + int_cst_value (tmp);
1831 return op0;
1833 break;
1835 case ADDR_EXPR:
1836 op0 = TREE_OPERAND (expr, 0);
1837 op0 = strip_offset_1 (op0, true, true, &off0);
1838 *offset += off0;
1840 if (op0 == TREE_OPERAND (expr, 0))
1841 return orig_expr;
1843 expr = build_fold_addr_expr (op0);
1844 return fold_convert (orig_type, expr);
1846 case INDIRECT_REF:
1847 inside_addr = false;
1848 break;
1850 default:
1851 return orig_expr;
1854 /* Default handling of expressions for that we want to recurse into
1855 the first operand. */
1856 op0 = TREE_OPERAND (expr, 0);
1857 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1858 *offset += off0;
1860 if (op0 == TREE_OPERAND (expr, 0)
1861 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1862 return orig_expr;
1864 expr = copy_node (expr);
1865 TREE_OPERAND (expr, 0) = op0;
1866 if (op1)
1867 TREE_OPERAND (expr, 1) = op1;
1869 /* Inside address, we might strip the top level component references,
1870 thus changing type of the expression. Handling of ADDR_EXPR
1871 will fix that. */
1872 expr = fold_convert (orig_type, expr);
1874 return expr;
1877 /* Strips constant offsets from EXPR and stores them to OFFSET. */
1879 static tree
1880 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1882 return strip_offset_1 (expr, false, false, offset);
1885 /* Returns variant of TYPE that can be used as base for different uses.
1886 We return unsigned type with the same precision, which avoids problems
1887 with overflows. */
1889 static tree
1890 generic_type_for (tree type)
1892 if (POINTER_TYPE_P (type))
1893 return unsigned_type_for (type);
1895 if (TYPE_UNSIGNED (type))
1896 return type;
1898 return unsigned_type_for (type);
1901 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
1902 the bitmap to that we should store it. */
1904 static struct ivopts_data *fd_ivopts_data;
1905 static tree
1906 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1908 bitmap *depends_on = data;
1909 struct version_info *info;
1911 if (TREE_CODE (*expr_p) != SSA_NAME)
1912 return NULL_TREE;
1913 info = name_info (fd_ivopts_data, *expr_p);
1915 if (!info->inv_id || info->has_nonlin_use)
1916 return NULL_TREE;
1918 if (!*depends_on)
1919 *depends_on = BITMAP_ALLOC (NULL);
1920 bitmap_set_bit (*depends_on, info->inv_id);
1922 return NULL_TREE;
1925 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1926 position to POS. If USE is not NULL, the candidate is set as related to
1927 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1928 replacement of the final value of the iv by a direct computation. */
1930 static struct iv_cand *
1931 add_candidate_1 (struct ivopts_data *data,
1932 tree base, tree step, bool important, enum iv_position pos,
1933 struct iv_use *use, tree incremented_at)
1935 unsigned i;
1936 struct iv_cand *cand = NULL;
1937 tree type, orig_type;
1939 if (base)
1941 orig_type = TREE_TYPE (base);
1942 type = generic_type_for (orig_type);
1943 if (type != orig_type)
1945 base = fold_convert (type, base);
1946 step = fold_convert (type, step);
1950 for (i = 0; i < n_iv_cands (data); i++)
1952 cand = iv_cand (data, i);
1954 if (cand->pos != pos)
1955 continue;
1957 if (cand->incremented_at != incremented_at)
1958 continue;
1960 if (!cand->iv)
1962 if (!base && !step)
1963 break;
1965 continue;
1968 if (!base && !step)
1969 continue;
1971 if (operand_equal_p (base, cand->iv->base, 0)
1972 && operand_equal_p (step, cand->iv->step, 0))
1973 break;
1976 if (i == n_iv_cands (data))
1978 cand = XCNEW (struct iv_cand);
1979 cand->id = i;
1981 if (!base && !step)
1982 cand->iv = NULL;
1983 else
1984 cand->iv = alloc_iv (base, step);
1986 cand->pos = pos;
1987 if (pos != IP_ORIGINAL && cand->iv)
1989 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
1990 cand->var_after = cand->var_before;
1992 cand->important = important;
1993 cand->incremented_at = incremented_at;
1994 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
1996 if (step
1997 && TREE_CODE (step) != INTEGER_CST)
1999 fd_ivopts_data = data;
2000 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2003 if (dump_file && (dump_flags & TDF_DETAILS))
2004 dump_cand (dump_file, cand);
2007 if (important && !cand->important)
2009 cand->important = true;
2010 if (dump_file && (dump_flags & TDF_DETAILS))
2011 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2014 if (use)
2016 bitmap_set_bit (use->related_cands, i);
2017 if (dump_file && (dump_flags & TDF_DETAILS))
2018 fprintf (dump_file, "Candidate %d is related to use %d\n",
2019 cand->id, use->id);
2022 return cand;
2025 /* Returns true if incrementing the induction variable at the end of the LOOP
2026 is allowed.
2028 The purpose is to avoid splitting latch edge with a biv increment, thus
2029 creating a jump, possibly confusing other optimization passes and leaving
2030 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2031 is not available (so we do not have a better alternative), or if the latch
2032 edge is already nonempty. */
2034 static bool
2035 allow_ip_end_pos_p (struct loop *loop)
2037 if (!ip_normal_pos (loop))
2038 return true;
2040 if (!empty_block_p (ip_end_pos (loop)))
2041 return true;
2043 return false;
2046 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2047 position to POS. If USE is not NULL, the candidate is set as related to
2048 it. The candidate computation is scheduled on all available positions. */
2050 static void
2051 add_candidate (struct ivopts_data *data,
2052 tree base, tree step, bool important, struct iv_use *use)
2054 if (ip_normal_pos (data->current_loop))
2055 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2056 if (ip_end_pos (data->current_loop)
2057 && allow_ip_end_pos_p (data->current_loop))
2058 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2061 /* Add a standard "0 + 1 * iteration" iv candidate for a
2062 type with SIZE bits. */
2064 static void
2065 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2066 unsigned int size)
2068 tree type = lang_hooks.types.type_for_size (size, true);
2069 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2070 true, NULL);
2073 /* Adds standard iv candidates. */
2075 static void
2076 add_standard_iv_candidates (struct ivopts_data *data)
2078 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2080 /* The same for a double-integer type if it is still fast enough. */
2081 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2082 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2086 /* Adds candidates bases on the old induction variable IV. */
2088 static void
2089 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2091 tree phi, def;
2092 struct iv_cand *cand;
2094 add_candidate (data, iv->base, iv->step, true, NULL);
2096 /* The same, but with initial value zero. */
2097 add_candidate (data,
2098 build_int_cst (TREE_TYPE (iv->base), 0),
2099 iv->step, true, NULL);
2101 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2102 if (TREE_CODE (phi) == PHI_NODE)
2104 /* Additionally record the possibility of leaving the original iv
2105 untouched. */
2106 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2107 cand = add_candidate_1 (data,
2108 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2109 SSA_NAME_DEF_STMT (def));
2110 cand->var_before = iv->ssa_name;
2111 cand->var_after = def;
2115 /* Adds candidates based on the old induction variables. */
2117 static void
2118 add_old_ivs_candidates (struct ivopts_data *data)
2120 unsigned i;
2121 struct iv *iv;
2122 bitmap_iterator bi;
2124 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2126 iv = ver_info (data, i)->iv;
2127 if (iv && iv->biv_p && !integer_zerop (iv->step))
2128 add_old_iv_candidates (data, iv);
2132 /* Adds candidates based on the value of the induction variable IV and USE. */
2134 static void
2135 add_iv_value_candidates (struct ivopts_data *data,
2136 struct iv *iv, struct iv_use *use)
2138 unsigned HOST_WIDE_INT offset;
2139 tree base;
2141 add_candidate (data, iv->base, iv->step, false, use);
2143 /* The same, but with initial value zero. Make such variable important,
2144 since it is generic enough so that possibly many uses may be based
2145 on it. */
2146 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2147 iv->step, true, use);
2149 /* Third, try removing the constant offset. */
2150 base = strip_offset (iv->base, &offset);
2151 if (offset)
2152 add_candidate (data, base, iv->step, false, use);
2155 /* Adds candidates based on the uses. */
2157 static void
2158 add_derived_ivs_candidates (struct ivopts_data *data)
2160 unsigned i;
2162 for (i = 0; i < n_iv_uses (data); i++)
2164 struct iv_use *use = iv_use (data, i);
2166 if (!use)
2167 continue;
2169 switch (use->type)
2171 case USE_NONLINEAR_EXPR:
2172 case USE_COMPARE:
2173 case USE_ADDRESS:
2174 /* Just add the ivs based on the value of the iv used here. */
2175 add_iv_value_candidates (data, use->iv, use);
2176 break;
2178 default:
2179 gcc_unreachable ();
2184 /* Record important candidates and add them to related_cands bitmaps
2185 if needed. */
2187 static void
2188 record_important_candidates (struct ivopts_data *data)
2190 unsigned i;
2191 struct iv_use *use;
2193 for (i = 0; i < n_iv_cands (data); i++)
2195 struct iv_cand *cand = iv_cand (data, i);
2197 if (cand->important)
2198 bitmap_set_bit (data->important_candidates, i);
2201 data->consider_all_candidates = (n_iv_cands (data)
2202 <= CONSIDER_ALL_CANDIDATES_BOUND);
2204 if (data->consider_all_candidates)
2206 /* We will not need "related_cands" bitmaps in this case,
2207 so release them to decrease peak memory consumption. */
2208 for (i = 0; i < n_iv_uses (data); i++)
2210 use = iv_use (data, i);
2211 BITMAP_FREE (use->related_cands);
2214 else
2216 /* Add important candidates to the related_cands bitmaps. */
2217 for (i = 0; i < n_iv_uses (data); i++)
2218 bitmap_ior_into (iv_use (data, i)->related_cands,
2219 data->important_candidates);
2223 /* Finds the candidates for the induction variables. */
2225 static void
2226 find_iv_candidates (struct ivopts_data *data)
2228 /* Add commonly used ivs. */
2229 add_standard_iv_candidates (data);
2231 /* Add old induction variables. */
2232 add_old_ivs_candidates (data);
2234 /* Add induction variables derived from uses. */
2235 add_derived_ivs_candidates (data);
2237 /* Record the important candidates. */
2238 record_important_candidates (data);
2241 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2242 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2243 we allocate a simple list to every use. */
2245 static void
2246 alloc_use_cost_map (struct ivopts_data *data)
2248 unsigned i, size, s, j;
2250 for (i = 0; i < n_iv_uses (data); i++)
2252 struct iv_use *use = iv_use (data, i);
2253 bitmap_iterator bi;
2255 if (data->consider_all_candidates)
2256 size = n_iv_cands (data);
2257 else
2259 s = 0;
2260 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2262 s++;
2265 /* Round up to the power of two, so that moduling by it is fast. */
2266 for (size = 1; size < s; size <<= 1)
2267 continue;
2270 use->n_map_members = size;
2271 use->cost_map = XCNEWVEC (struct cost_pair, size);
2275 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2276 on invariants DEPENDS_ON and that the value used in expressing it
2277 is VALUE.*/
2279 static void
2280 set_use_iv_cost (struct ivopts_data *data,
2281 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2282 bitmap depends_on, tree value)
2284 unsigned i, s;
2286 if (cost == INFTY)
2288 BITMAP_FREE (depends_on);
2289 return;
2292 if (data->consider_all_candidates)
2294 use->cost_map[cand->id].cand = cand;
2295 use->cost_map[cand->id].cost = cost;
2296 use->cost_map[cand->id].depends_on = depends_on;
2297 use->cost_map[cand->id].value = value;
2298 return;
2301 /* n_map_members is a power of two, so this computes modulo. */
2302 s = cand->id & (use->n_map_members - 1);
2303 for (i = s; i < use->n_map_members; i++)
2304 if (!use->cost_map[i].cand)
2305 goto found;
2306 for (i = 0; i < s; i++)
2307 if (!use->cost_map[i].cand)
2308 goto found;
2310 gcc_unreachable ();
2312 found:
2313 use->cost_map[i].cand = cand;
2314 use->cost_map[i].cost = cost;
2315 use->cost_map[i].depends_on = depends_on;
2316 use->cost_map[i].value = value;
2319 /* Gets cost of (USE, CANDIDATE) pair. */
2321 static struct cost_pair *
2322 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2323 struct iv_cand *cand)
2325 unsigned i, s;
2326 struct cost_pair *ret;
2328 if (!cand)
2329 return NULL;
2331 if (data->consider_all_candidates)
2333 ret = use->cost_map + cand->id;
2334 if (!ret->cand)
2335 return NULL;
2337 return ret;
2340 /* n_map_members is a power of two, so this computes modulo. */
2341 s = cand->id & (use->n_map_members - 1);
2342 for (i = s; i < use->n_map_members; i++)
2343 if (use->cost_map[i].cand == cand)
2344 return use->cost_map + i;
2346 for (i = 0; i < s; i++)
2347 if (use->cost_map[i].cand == cand)
2348 return use->cost_map + i;
2350 return NULL;
2353 /* Returns estimate on cost of computing SEQ. */
2355 static unsigned
2356 seq_cost (rtx seq)
2358 unsigned cost = 0;
2359 rtx set;
2361 for (; seq; seq = NEXT_INSN (seq))
2363 set = single_set (seq);
2364 if (set)
2365 cost += rtx_cost (set, SET);
2366 else
2367 cost++;
2370 return cost;
2373 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2374 static rtx
2375 produce_memory_decl_rtl (tree obj, int *regno)
2377 rtx x;
2379 gcc_assert (obj);
2380 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2382 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2383 x = gen_rtx_SYMBOL_REF (Pmode, name);
2384 SET_SYMBOL_REF_DECL (x, obj);
2385 x = gen_rtx_MEM (DECL_MODE (obj), x);
2386 targetm.encode_section_info (obj, x, true);
2388 else
2390 x = gen_raw_REG (Pmode, (*regno)++);
2391 x = gen_rtx_MEM (DECL_MODE (obj), x);
2394 return x;
2397 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2398 walk_tree. DATA contains the actual fake register number. */
2400 static tree
2401 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2403 tree obj = NULL_TREE;
2404 rtx x = NULL_RTX;
2405 int *regno = data;
2407 switch (TREE_CODE (*expr_p))
2409 case ADDR_EXPR:
2410 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2411 handled_component_p (*expr_p);
2412 expr_p = &TREE_OPERAND (*expr_p, 0))
2413 continue;
2414 obj = *expr_p;
2415 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2416 x = produce_memory_decl_rtl (obj, regno);
2417 break;
2419 case SSA_NAME:
2420 *ws = 0;
2421 obj = SSA_NAME_VAR (*expr_p);
2422 if (!DECL_RTL_SET_P (obj))
2423 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2424 break;
2426 case VAR_DECL:
2427 case PARM_DECL:
2428 case RESULT_DECL:
2429 *ws = 0;
2430 obj = *expr_p;
2432 if (DECL_RTL_SET_P (obj))
2433 break;
2435 if (DECL_MODE (obj) == BLKmode)
2436 x = produce_memory_decl_rtl (obj, regno);
2437 else
2438 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2440 break;
2442 default:
2443 break;
2446 if (x)
2448 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2449 SET_DECL_RTL (obj, x);
2452 return NULL_TREE;
2455 /* Determines cost of the computation of EXPR. */
2457 static unsigned
2458 computation_cost (tree expr)
2460 rtx seq, rslt;
2461 tree type = TREE_TYPE (expr);
2462 unsigned cost;
2463 /* Avoid using hard regs in ways which may be unsupported. */
2464 int regno = LAST_VIRTUAL_REGISTER + 1;
2466 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2467 start_sequence ();
2468 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2469 seq = get_insns ();
2470 end_sequence ();
2472 cost = seq_cost (seq);
2473 if (MEM_P (rslt))
2474 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2476 return cost;
2479 /* Returns variable containing the value of candidate CAND at statement AT. */
2481 static tree
2482 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2484 if (stmt_after_increment (loop, cand, stmt))
2485 return cand->var_after;
2486 else
2487 return cand->var_before;
2490 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2491 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2494 tree_int_cst_sign_bit (tree t)
2496 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2497 unsigned HOST_WIDE_INT w;
2499 if (bitno < HOST_BITS_PER_WIDE_INT)
2500 w = TREE_INT_CST_LOW (t);
2501 else
2503 w = TREE_INT_CST_HIGH (t);
2504 bitno -= HOST_BITS_PER_WIDE_INT;
2507 return (w >> bitno) & 1;
2510 /* If we can prove that TOP = cst * BOT for some constant cst,
2511 store cst to MUL and return true. Otherwise return false.
2512 The returned value is always sign-extended, regardless of the
2513 signedness of TOP and BOT. */
2515 static bool
2516 constant_multiple_of (tree top, tree bot, double_int *mul)
2518 tree mby;
2519 enum tree_code code;
2520 double_int res, p0, p1;
2521 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2523 STRIP_NOPS (top);
2524 STRIP_NOPS (bot);
2526 if (operand_equal_p (top, bot, 0))
2528 *mul = double_int_one;
2529 return true;
2532 code = TREE_CODE (top);
2533 switch (code)
2535 case MULT_EXPR:
2536 mby = TREE_OPERAND (top, 1);
2537 if (TREE_CODE (mby) != INTEGER_CST)
2538 return false;
2540 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2541 return false;
2543 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
2544 precision);
2545 return true;
2547 case PLUS_EXPR:
2548 case MINUS_EXPR:
2549 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2550 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2551 return false;
2553 if (code == MINUS_EXPR)
2554 p1 = double_int_neg (p1);
2555 *mul = double_int_sext (double_int_add (p0, p1), precision);
2556 return true;
2558 case INTEGER_CST:
2559 if (TREE_CODE (bot) != INTEGER_CST)
2560 return false;
2562 p0 = double_int_sext (tree_to_double_int (top), precision);
2563 p1 = double_int_sext (tree_to_double_int (bot), precision);
2564 if (double_int_zero_p (p1))
2565 return false;
2566 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
2567 precision);
2568 return double_int_zero_p (res);
2570 default:
2571 return false;
2575 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2576 same precision that is at least as wide as the precision of TYPE, stores
2577 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2578 type of A and B. */
2580 static tree
2581 determine_common_wider_type (tree *a, tree *b)
2583 tree wider_type = NULL;
2584 tree suba, subb;
2585 tree atype = TREE_TYPE (*a);
2587 if ((TREE_CODE (*a) == NOP_EXPR
2588 || TREE_CODE (*a) == CONVERT_EXPR))
2590 suba = TREE_OPERAND (*a, 0);
2591 wider_type = TREE_TYPE (suba);
2592 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2593 return atype;
2595 else
2596 return atype;
2598 if ((TREE_CODE (*b) == NOP_EXPR
2599 || TREE_CODE (*b) == CONVERT_EXPR))
2601 subb = TREE_OPERAND (*b, 0);
2602 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2603 return atype;
2605 else
2606 return atype;
2608 *a = suba;
2609 *b = subb;
2610 return wider_type;
2613 /* Determines the expression by that USE is expressed from induction variable
2614 CAND at statement AT in LOOP. The expression is stored in a decomposed
2615 form into AFF. Returns false if USE cannot be expressed using CAND. */
2617 static bool
2618 get_computation_aff (struct loop *loop,
2619 struct iv_use *use, struct iv_cand *cand, tree at,
2620 struct affine_tree_combination *aff)
2622 tree ubase = use->iv->base;
2623 tree ustep = use->iv->step;
2624 tree cbase = cand->iv->base;
2625 tree cstep = cand->iv->step, cstep_common;
2626 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2627 tree common_type, var;
2628 tree uutype;
2629 aff_tree cbase_aff, var_aff;
2630 double_int rat;
2632 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2634 /* We do not have a precision to express the values of use. */
2635 return false;
2638 var = var_at_stmt (loop, cand, at);
2639 uutype = unsigned_type_for (utype);
2641 /* If the conversion is not noop, perform it. */
2642 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2644 cstep = fold_convert (uutype, cstep);
2645 cbase = fold_convert (uutype, cbase);
2646 var = fold_convert (uutype, var);
2649 if (!constant_multiple_of (ustep, cstep, &rat))
2650 return false;
2652 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2653 type, we achieve better folding by computing their difference in this
2654 wider type, and cast the result to UUTYPE. We do not need to worry about
2655 overflows, as all the arithmetics will in the end be performed in UUTYPE
2656 anyway. */
2657 common_type = determine_common_wider_type (&ubase, &cbase);
2659 /* use = ubase - ratio * cbase + ratio * var. */
2660 tree_to_aff_combination (ubase, common_type, aff);
2661 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2662 tree_to_aff_combination (var, uutype, &var_aff);
2664 /* We need to shift the value if we are after the increment. */
2665 if (stmt_after_increment (loop, cand, at))
2667 aff_tree cstep_aff;
2669 if (common_type != uutype)
2670 cstep_common = fold_convert (common_type, cstep);
2671 else
2672 cstep_common = cstep;
2674 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2675 aff_combination_add (&cbase_aff, &cstep_aff);
2678 aff_combination_scale (&cbase_aff, double_int_neg (rat));
2679 aff_combination_add (aff, &cbase_aff);
2680 if (common_type != uutype)
2681 aff_combination_convert (aff, uutype);
2683 aff_combination_scale (&var_aff, rat);
2684 aff_combination_add (aff, &var_aff);
2686 return true;
2689 /* Determines the expression by that USE is expressed from induction variable
2690 CAND at statement AT in LOOP. The computation is unshared. */
2692 static tree
2693 get_computation_at (struct loop *loop,
2694 struct iv_use *use, struct iv_cand *cand, tree at)
2696 aff_tree aff;
2697 tree type = TREE_TYPE (use->iv->base);
2699 if (!get_computation_aff (loop, use, cand, at, &aff))
2700 return NULL_TREE;
2701 unshare_aff_combination (&aff);
2702 return fold_convert (type, aff_combination_to_tree (&aff));
2705 /* Determines the expression by that USE is expressed from induction variable
2706 CAND in LOOP. The computation is unshared. */
2708 static tree
2709 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2711 return get_computation_at (loop, use, cand, use->stmt);
2714 /* Returns cost of addition in MODE. */
2716 static unsigned
2717 add_cost (enum machine_mode mode)
2719 static unsigned costs[NUM_MACHINE_MODES];
2720 rtx seq;
2721 unsigned cost;
2723 if (costs[mode])
2724 return costs[mode];
2726 start_sequence ();
2727 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2728 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2729 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2730 NULL_RTX);
2731 seq = get_insns ();
2732 end_sequence ();
2734 cost = seq_cost (seq);
2735 if (!cost)
2736 cost = 1;
2738 costs[mode] = cost;
2740 if (dump_file && (dump_flags & TDF_DETAILS))
2741 fprintf (dump_file, "Addition in %s costs %d\n",
2742 GET_MODE_NAME (mode), cost);
2743 return cost;
2746 /* Entry in a hashtable of already known costs for multiplication. */
2747 struct mbc_entry
2749 HOST_WIDE_INT cst; /* The constant to multiply by. */
2750 enum machine_mode mode; /* In mode. */
2751 unsigned cost; /* The cost. */
2754 /* Counts hash value for the ENTRY. */
2756 static hashval_t
2757 mbc_entry_hash (const void *entry)
2759 const struct mbc_entry *e = entry;
2761 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2764 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2766 static int
2767 mbc_entry_eq (const void *entry1, const void *entry2)
2769 const struct mbc_entry *e1 = entry1;
2770 const struct mbc_entry *e2 = entry2;
2772 return (e1->mode == e2->mode
2773 && e1->cst == e2->cst);
2776 /* Returns cost of multiplication by constant CST in MODE. */
2778 unsigned
2779 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2781 static htab_t costs;
2782 struct mbc_entry **cached, act;
2783 rtx seq;
2784 unsigned cost;
2786 if (!costs)
2787 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2789 act.mode = mode;
2790 act.cst = cst;
2791 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2792 if (*cached)
2793 return (*cached)->cost;
2795 *cached = XNEW (struct mbc_entry);
2796 (*cached)->mode = mode;
2797 (*cached)->cst = cst;
2799 start_sequence ();
2800 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2801 gen_int_mode (cst, mode), NULL_RTX, 0);
2802 seq = get_insns ();
2803 end_sequence ();
2805 cost = seq_cost (seq);
2807 if (dump_file && (dump_flags & TDF_DETAILS))
2808 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2809 (int) cst, GET_MODE_NAME (mode), cost);
2811 (*cached)->cost = cost;
2813 return cost;
2816 /* Returns true if multiplying by RATIO is allowed in an address. Test the
2817 validity for a memory reference accessing memory of mode MODE. */
2819 bool
2820 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
2822 #define MAX_RATIO 128
2823 static sbitmap valid_mult[MAX_MACHINE_MODE];
2825 if (!valid_mult[mode])
2827 rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2828 rtx addr;
2829 HOST_WIDE_INT i;
2831 valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
2832 sbitmap_zero (valid_mult[mode]);
2833 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2834 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2836 XEXP (addr, 1) = gen_int_mode (i, Pmode);
2837 if (memory_address_p (mode, addr))
2838 SET_BIT (valid_mult[mode], i + MAX_RATIO);
2841 if (dump_file && (dump_flags & TDF_DETAILS))
2843 fprintf (dump_file, " allowed multipliers:");
2844 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2845 if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
2846 fprintf (dump_file, " %d", (int) i);
2847 fprintf (dump_file, "\n");
2848 fprintf (dump_file, "\n");
2852 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
2853 return false;
2855 return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
2858 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2859 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2860 variable is omitted. Compute the cost for a memory reference that accesses
2861 a memory location of mode MEM_MODE.
2863 TODO -- there must be some better way. This all is quite crude. */
2865 static unsigned
2866 get_address_cost (bool symbol_present, bool var_present,
2867 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
2868 enum machine_mode mem_mode)
2870 static bool initialized[MAX_MACHINE_MODE];
2871 static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
2872 static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
2873 static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
2874 unsigned cost, acost;
2875 bool offset_p, ratio_p;
2876 HOST_WIDE_INT s_offset;
2877 unsigned HOST_WIDE_INT mask;
2878 unsigned bits;
2880 if (!initialized[mem_mode])
2882 HOST_WIDE_INT i;
2883 HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
2884 int old_cse_not_expected;
2885 unsigned sym_p, var_p, off_p, rat_p, add_c;
2886 rtx seq, addr, base;
2887 rtx reg0, reg1;
2889 initialized[mem_mode] = true;
2891 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2893 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2894 for (i = start; i <= 1 << 20; i <<= 1)
2896 XEXP (addr, 1) = gen_int_mode (i, Pmode);
2897 if (!memory_address_p (mem_mode, addr))
2898 break;
2900 max_offset[mem_mode] = i == start ? 0 : i >> 1;
2901 off[mem_mode] = max_offset[mem_mode];
2903 for (i = start; i <= 1 << 20; i <<= 1)
2905 XEXP (addr, 1) = gen_int_mode (-i, Pmode);
2906 if (!memory_address_p (mem_mode, addr))
2907 break;
2909 min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
2911 if (dump_file && (dump_flags & TDF_DETAILS))
2913 fprintf (dump_file, "get_address_cost:\n");
2914 fprintf (dump_file, " min offset %s %d\n",
2915 GET_MODE_NAME (mem_mode),
2916 (int) min_offset[mem_mode]);
2917 fprintf (dump_file, " max offset %s %d\n",
2918 GET_MODE_NAME (mem_mode),
2919 (int) max_offset[mem_mode]);
2922 rat[mem_mode] = 1;
2923 for (i = 2; i <= MAX_RATIO; i++)
2924 if (multiplier_allowed_in_address_p (i, mem_mode))
2926 rat[mem_mode] = i;
2927 break;
2930 /* Compute the cost of various addressing modes. */
2931 acost = 0;
2932 reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2933 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
2935 for (i = 0; i < 16; i++)
2937 sym_p = i & 1;
2938 var_p = (i >> 1) & 1;
2939 off_p = (i >> 2) & 1;
2940 rat_p = (i >> 3) & 1;
2942 addr = reg0;
2943 if (rat_p)
2944 addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
2945 gen_int_mode (rat[mem_mode], Pmode));
2947 if (var_p)
2948 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
2950 if (sym_p)
2952 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2953 /* ??? We can run into trouble with some backends by presenting
2954 it with symbols which havn't been properly passed through
2955 targetm.encode_section_info. By setting the local bit, we
2956 enhance the probability of things working. */
2957 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
2959 if (off_p)
2960 base = gen_rtx_fmt_e (CONST, Pmode,
2961 gen_rtx_fmt_ee (PLUS, Pmode,
2962 base,
2963 gen_int_mode (off[mem_mode],
2964 Pmode)));
2966 else if (off_p)
2967 base = gen_int_mode (off[mem_mode], Pmode);
2968 else
2969 base = NULL_RTX;
2971 if (base)
2972 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
2974 start_sequence ();
2975 /* To avoid splitting addressing modes, pretend that no cse will
2976 follow. */
2977 old_cse_not_expected = cse_not_expected;
2978 cse_not_expected = true;
2979 addr = memory_address (mem_mode, addr);
2980 cse_not_expected = old_cse_not_expected;
2981 seq = get_insns ();
2982 end_sequence ();
2984 acost = seq_cost (seq);
2985 acost += address_cost (addr, mem_mode);
2987 if (!acost)
2988 acost = 1;
2989 costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
2992 /* On some targets, it is quite expensive to load symbol to a register,
2993 which makes addresses that contain symbols look much more expensive.
2994 However, the symbol will have to be loaded in any case before the
2995 loop (and quite likely we have it in register already), so it does not
2996 make much sense to penalize them too heavily. So make some final
2997 tweaks for the SYMBOL_PRESENT modes:
2999 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3000 var is cheaper, use this mode with small penalty.
3001 If VAR_PRESENT is true, try whether the mode with
3002 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3003 if this is the case, use it. */
3004 add_c = add_cost (Pmode);
3005 for (i = 0; i < 8; i++)
3007 var_p = i & 1;
3008 off_p = (i >> 1) & 1;
3009 rat_p = (i >> 2) & 1;
3011 acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
3012 if (var_p)
3013 acost += add_c;
3015 if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3016 costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3019 if (dump_file && (dump_flags & TDF_DETAILS))
3021 fprintf (dump_file, "Address costs:\n");
3023 for (i = 0; i < 16; i++)
3025 sym_p = i & 1;
3026 var_p = (i >> 1) & 1;
3027 off_p = (i >> 2) & 1;
3028 rat_p = (i >> 3) & 1;
3030 fprintf (dump_file, " ");
3031 if (sym_p)
3032 fprintf (dump_file, "sym + ");
3033 if (var_p)
3034 fprintf (dump_file, "var + ");
3035 if (off_p)
3036 fprintf (dump_file, "cst + ");
3037 if (rat_p)
3038 fprintf (dump_file, "rat * ");
3040 acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3041 fprintf (dump_file, "index costs %d\n", acost);
3043 fprintf (dump_file, "\n");
3047 bits = GET_MODE_BITSIZE (Pmode);
3048 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3049 offset &= mask;
3050 if ((offset >> (bits - 1) & 1))
3051 offset |= ~mask;
3052 s_offset = offset;
3054 cost = 0;
3055 offset_p = (s_offset != 0
3056 && min_offset[mem_mode] <= s_offset
3057 && s_offset <= max_offset[mem_mode]);
3058 ratio_p = (ratio != 1
3059 && multiplier_allowed_in_address_p (ratio, mem_mode));
3061 if (ratio != 1 && !ratio_p)
3062 cost += multiply_by_cost (ratio, Pmode);
3064 if (s_offset && !offset_p && !symbol_present)
3065 cost += add_cost (Pmode);
3067 acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3068 return cost + acost;
3071 /* Estimates cost of forcing expression EXPR into a variable. */
3073 unsigned
3074 force_expr_to_var_cost (tree expr)
3076 static bool costs_initialized = false;
3077 static unsigned integer_cost;
3078 static unsigned symbol_cost;
3079 static unsigned address_cost;
3080 tree op0, op1;
3081 unsigned cost0, cost1, cost;
3082 enum machine_mode mode;
3084 if (!costs_initialized)
3086 tree type = build_pointer_type (integer_type_node);
3087 tree var, addr;
3088 rtx x;
3090 var = create_tmp_var_raw (integer_type_node, "test_var");
3091 TREE_STATIC (var) = 1;
3092 x = produce_memory_decl_rtl (var, NULL);
3093 SET_DECL_RTL (var, x);
3095 integer_cost = computation_cost (build_int_cst (integer_type_node,
3096 2000));
3098 addr = build1 (ADDR_EXPR, type, var);
3099 symbol_cost = computation_cost (addr) + 1;
3101 address_cost
3102 = computation_cost (build2 (PLUS_EXPR, type,
3103 addr,
3104 build_int_cst (type, 2000))) + 1;
3105 if (dump_file && (dump_flags & TDF_DETAILS))
3107 fprintf (dump_file, "force_expr_to_var_cost:\n");
3108 fprintf (dump_file, " integer %d\n", (int) integer_cost);
3109 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
3110 fprintf (dump_file, " address %d\n", (int) address_cost);
3111 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
3112 fprintf (dump_file, "\n");
3115 costs_initialized = true;
3118 STRIP_NOPS (expr);
3120 if (SSA_VAR_P (expr))
3121 return 0;
3123 if (TREE_INVARIANT (expr))
3125 if (TREE_CODE (expr) == INTEGER_CST)
3126 return integer_cost;
3128 if (TREE_CODE (expr) == ADDR_EXPR)
3130 tree obj = TREE_OPERAND (expr, 0);
3132 if (TREE_CODE (obj) == VAR_DECL
3133 || TREE_CODE (obj) == PARM_DECL
3134 || TREE_CODE (obj) == RESULT_DECL)
3135 return symbol_cost;
3138 return address_cost;
3141 switch (TREE_CODE (expr))
3143 case PLUS_EXPR:
3144 case MINUS_EXPR:
3145 case MULT_EXPR:
3146 op0 = TREE_OPERAND (expr, 0);
3147 op1 = TREE_OPERAND (expr, 1);
3148 STRIP_NOPS (op0);
3149 STRIP_NOPS (op1);
3151 if (is_gimple_val (op0))
3152 cost0 = 0;
3153 else
3154 cost0 = force_expr_to_var_cost (op0);
3156 if (is_gimple_val (op1))
3157 cost1 = 0;
3158 else
3159 cost1 = force_expr_to_var_cost (op1);
3161 break;
3163 default:
3164 /* Just an arbitrary value, FIXME. */
3165 return target_spill_cost;
3168 mode = TYPE_MODE (TREE_TYPE (expr));
3169 switch (TREE_CODE (expr))
3171 case PLUS_EXPR:
3172 case MINUS_EXPR:
3173 cost = add_cost (mode);
3174 break;
3176 case MULT_EXPR:
3177 if (cst_and_fits_in_hwi (op0))
3178 cost = multiply_by_cost (int_cst_value (op0), mode);
3179 else if (cst_and_fits_in_hwi (op1))
3180 cost = multiply_by_cost (int_cst_value (op1), mode);
3181 else
3182 return target_spill_cost;
3183 break;
3185 default:
3186 gcc_unreachable ();
3189 cost += cost0;
3190 cost += cost1;
3192 /* Bound the cost by target_spill_cost. The parts of complicated
3193 computations often are either loop invariant or at least can
3194 be shared between several iv uses, so letting this grow without
3195 limits would not give reasonable results. */
3196 return cost < target_spill_cost ? cost : target_spill_cost;
3199 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3200 invariants the computation depends on. */
3202 static unsigned
3203 force_var_cost (struct ivopts_data *data,
3204 tree expr, bitmap *depends_on)
3206 if (depends_on)
3208 fd_ivopts_data = data;
3209 walk_tree (&expr, find_depends, depends_on, NULL);
3212 return force_expr_to_var_cost (expr);
3215 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3216 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3217 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3218 invariants the computation depends on. */
3220 static unsigned
3221 split_address_cost (struct ivopts_data *data,
3222 tree addr, bool *symbol_present, bool *var_present,
3223 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3225 tree core;
3226 HOST_WIDE_INT bitsize;
3227 HOST_WIDE_INT bitpos;
3228 tree toffset;
3229 enum machine_mode mode;
3230 int unsignedp, volatilep;
3232 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3233 &unsignedp, &volatilep, false);
3235 if (toffset != 0
3236 || bitpos % BITS_PER_UNIT != 0
3237 || TREE_CODE (core) != VAR_DECL)
3239 *symbol_present = false;
3240 *var_present = true;
3241 fd_ivopts_data = data;
3242 walk_tree (&addr, find_depends, depends_on, NULL);
3243 return target_spill_cost;
3246 *offset += bitpos / BITS_PER_UNIT;
3247 if (TREE_STATIC (core)
3248 || DECL_EXTERNAL (core))
3250 *symbol_present = true;
3251 *var_present = false;
3252 return 0;
3255 *symbol_present = false;
3256 *var_present = true;
3257 return 0;
3260 /* Estimates cost of expressing difference of addresses E1 - E2 as
3261 var + symbol + offset. The value of offset is added to OFFSET,
3262 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3263 part is missing. DEPENDS_ON is a set of the invariants the computation
3264 depends on. */
3266 static unsigned
3267 ptr_difference_cost (struct ivopts_data *data,
3268 tree e1, tree e2, bool *symbol_present, bool *var_present,
3269 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3271 HOST_WIDE_INT diff = 0;
3272 unsigned cost;
3274 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3276 if (ptr_difference_const (e1, e2, &diff))
3278 *offset += diff;
3279 *symbol_present = false;
3280 *var_present = false;
3281 return 0;
3284 if (e2 == integer_zero_node)
3285 return split_address_cost (data, TREE_OPERAND (e1, 0),
3286 symbol_present, var_present, offset, depends_on);
3288 *symbol_present = false;
3289 *var_present = true;
3291 cost = force_var_cost (data, e1, depends_on);
3292 cost += force_var_cost (data, e2, depends_on);
3293 cost += add_cost (Pmode);
3295 return cost;
3298 /* Estimates cost of expressing difference E1 - E2 as
3299 var + symbol + offset. The value of offset is added to OFFSET,
3300 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3301 part is missing. DEPENDS_ON is a set of the invariants the computation
3302 depends on. */
3304 static unsigned
3305 difference_cost (struct ivopts_data *data,
3306 tree e1, tree e2, bool *symbol_present, bool *var_present,
3307 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3309 unsigned cost;
3310 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3311 unsigned HOST_WIDE_INT off1, off2;
3313 e1 = strip_offset (e1, &off1);
3314 e2 = strip_offset (e2, &off2);
3315 *offset += off1 - off2;
3317 STRIP_NOPS (e1);
3318 STRIP_NOPS (e2);
3320 if (TREE_CODE (e1) == ADDR_EXPR)
3321 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3322 depends_on);
3323 *symbol_present = false;
3325 if (operand_equal_p (e1, e2, 0))
3327 *var_present = false;
3328 return 0;
3330 *var_present = true;
3331 if (integer_zerop (e2))
3332 return force_var_cost (data, e1, depends_on);
3334 if (integer_zerop (e1))
3336 cost = force_var_cost (data, e2, depends_on);
3337 cost += multiply_by_cost (-1, mode);
3339 return cost;
3342 cost = force_var_cost (data, e1, depends_on);
3343 cost += force_var_cost (data, e2, depends_on);
3344 cost += add_cost (mode);
3346 return cost;
3349 /* Determines the cost of the computation by that USE is expressed
3350 from induction variable CAND. If ADDRESS_P is true, we just need
3351 to create an address from it, otherwise we want to get it into
3352 register. A set of invariants we depend on is stored in
3353 DEPENDS_ON. AT is the statement at that the value is computed. */
3355 static unsigned
3356 get_computation_cost_at (struct ivopts_data *data,
3357 struct iv_use *use, struct iv_cand *cand,
3358 bool address_p, bitmap *depends_on, tree at)
3360 tree ubase = use->iv->base, ustep = use->iv->step;
3361 tree cbase, cstep;
3362 tree utype = TREE_TYPE (ubase), ctype;
3363 unsigned HOST_WIDE_INT cstepi, offset = 0;
3364 HOST_WIDE_INT ratio, aratio;
3365 bool var_present, symbol_present;
3366 unsigned cost = 0, n_sums;
3367 double_int rat;
3369 *depends_on = NULL;
3371 /* Only consider real candidates. */
3372 if (!cand->iv)
3373 return INFTY;
3375 cbase = cand->iv->base;
3376 cstep = cand->iv->step;
3377 ctype = TREE_TYPE (cbase);
3379 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3381 /* We do not have a precision to express the values of use. */
3382 return INFTY;
3385 if (address_p)
3387 /* Do not try to express address of an object with computation based
3388 on address of a different object. This may cause problems in rtl
3389 level alias analysis (that does not expect this to be happening,
3390 as this is illegal in C), and would be unlikely to be useful
3391 anyway. */
3392 if (use->iv->base_object
3393 && cand->iv->base_object
3394 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3395 return INFTY;
3398 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3400 /* TODO -- add direct handling of this case. */
3401 goto fallback;
3404 /* CSTEPI is removed from the offset in case statement is after the
3405 increment. If the step is not constant, we use zero instead.
3406 This is a bit imprecise (there is the extra addition), but
3407 redundancy elimination is likely to transform the code so that
3408 it uses value of the variable before increment anyway,
3409 so it is not that much unrealistic. */
3410 if (cst_and_fits_in_hwi (cstep))
3411 cstepi = int_cst_value (cstep);
3412 else
3413 cstepi = 0;
3415 if (!constant_multiple_of (ustep, cstep, &rat))
3416 return INFTY;
3418 if (double_int_fits_in_shwi_p (rat))
3419 ratio = double_int_to_shwi (rat);
3420 else
3421 return INFTY;
3423 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3424 or ratio == 1, it is better to handle this like
3426 ubase - ratio * cbase + ratio * var
3428 (also holds in the case ratio == -1, TODO. */
3430 if (cst_and_fits_in_hwi (cbase))
3432 offset = - ratio * int_cst_value (cbase);
3433 cost += difference_cost (data,
3434 ubase, integer_zero_node,
3435 &symbol_present, &var_present, &offset,
3436 depends_on);
3438 else if (ratio == 1)
3440 cost += difference_cost (data,
3441 ubase, cbase,
3442 &symbol_present, &var_present, &offset,
3443 depends_on);
3445 else
3447 cost += force_var_cost (data, cbase, depends_on);
3448 cost += add_cost (TYPE_MODE (ctype));
3449 cost += difference_cost (data,
3450 ubase, integer_zero_node,
3451 &symbol_present, &var_present, &offset,
3452 depends_on);
3455 /* If we are after the increment, the value of the candidate is higher by
3456 one iteration. */
3457 if (stmt_after_increment (data->current_loop, cand, at))
3458 offset -= ratio * cstepi;
3460 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3461 (symbol/var/const parts may be omitted). If we are looking for an address,
3462 find the cost of addressing this. */
3463 if (address_p)
3464 return cost + get_address_cost (symbol_present, var_present, offset, ratio,
3465 TYPE_MODE (TREE_TYPE (*use->op_p)));
3467 /* Otherwise estimate the costs for computing the expression. */
3468 aratio = ratio > 0 ? ratio : -ratio;
3469 if (!symbol_present && !var_present && !offset)
3471 if (ratio != 1)
3472 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3474 return cost;
3477 if (aratio != 1)
3478 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3480 n_sums = 1;
3481 if (var_present
3482 /* Symbol + offset should be compile-time computable. */
3483 && (symbol_present || offset))
3484 n_sums++;
3486 return cost + n_sums * add_cost (TYPE_MODE (ctype));
3488 fallback:
3490 /* Just get the expression, expand it and measure the cost. */
3491 tree comp = get_computation_at (data->current_loop, use, cand, at);
3493 if (!comp)
3494 return INFTY;
3496 if (address_p)
3497 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3499 return computation_cost (comp);
3503 /* Determines the cost of the computation by that USE is expressed
3504 from induction variable CAND. If ADDRESS_P is true, we just need
3505 to create an address from it, otherwise we want to get it into
3506 register. A set of invariants we depend on is stored in
3507 DEPENDS_ON. */
3509 static unsigned
3510 get_computation_cost (struct ivopts_data *data,
3511 struct iv_use *use, struct iv_cand *cand,
3512 bool address_p, bitmap *depends_on)
3514 return get_computation_cost_at (data,
3515 use, cand, address_p, depends_on, use->stmt);
3518 /* Determines cost of basing replacement of USE on CAND in a generic
3519 expression. */
3521 static bool
3522 determine_use_iv_cost_generic (struct ivopts_data *data,
3523 struct iv_use *use, struct iv_cand *cand)
3525 bitmap depends_on;
3526 unsigned cost;
3528 /* The simple case first -- if we need to express value of the preserved
3529 original biv, the cost is 0. This also prevents us from counting the
3530 cost of increment twice -- once at this use and once in the cost of
3531 the candidate. */
3532 if (cand->pos == IP_ORIGINAL
3533 && cand->incremented_at == use->stmt)
3535 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3536 return true;
3539 cost = get_computation_cost (data, use, cand, false, &depends_on);
3540 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3542 return cost != INFTY;
3545 /* Determines cost of basing replacement of USE on CAND in an address. */
3547 static bool
3548 determine_use_iv_cost_address (struct ivopts_data *data,
3549 struct iv_use *use, struct iv_cand *cand)
3551 bitmap depends_on;
3552 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3554 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3556 return cost != INFTY;
3559 /* Computes value of candidate CAND at position AT in iteration NITER, and
3560 stores it to VAL. */
3562 static void
3563 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter,
3564 aff_tree *val)
3566 aff_tree step, delta, nit;
3567 struct iv *iv = cand->iv;
3568 tree type = TREE_TYPE (iv->base);
3570 tree_to_aff_combination (iv->step, type, &step);
3571 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3572 aff_combination_convert (&nit, type);
3573 aff_combination_mult (&nit, &step, &delta);
3574 if (stmt_after_increment (loop, cand, at))
3575 aff_combination_add (&delta, &step);
3577 tree_to_aff_combination (iv->base, type, val);
3578 aff_combination_add (val, &delta);
3581 /* Returns period of induction variable iv. */
3583 static tree
3584 iv_period (struct iv *iv)
3586 tree step = iv->step, period, type;
3587 tree pow2div;
3589 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3591 /* Period of the iv is gcd (step, type range). Since type range is power
3592 of two, it suffices to determine the maximum power of two that divides
3593 step. */
3594 pow2div = num_ending_zeros (step);
3595 type = unsigned_type_for (TREE_TYPE (step));
3597 period = build_low_bits_mask (type,
3598 (TYPE_PRECISION (type)
3599 - tree_low_cst (pow2div, 1)));
3601 return period;
3604 /* Returns the comparison operator used when eliminating the iv USE. */
3606 static enum tree_code
3607 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3609 struct loop *loop = data->current_loop;
3610 basic_block ex_bb;
3611 edge exit;
3613 ex_bb = bb_for_stmt (use->stmt);
3614 exit = EDGE_SUCC (ex_bb, 0);
3615 if (flow_bb_inside_loop_p (loop, exit->dest))
3616 exit = EDGE_SUCC (ex_bb, 1);
3618 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3621 /* Check whether it is possible to express the condition in USE by comparison
3622 of candidate CAND. If so, store the value compared with to BOUND. */
3624 static bool
3625 may_eliminate_iv (struct ivopts_data *data,
3626 struct iv_use *use, struct iv_cand *cand, tree *bound)
3628 basic_block ex_bb;
3629 edge exit;
3630 tree nit, nit_type;
3631 tree wider_type, period, per_type;
3632 struct loop *loop = data->current_loop;
3633 aff_tree bnd;
3635 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3636 return false;
3638 /* For now works only for exits that dominate the loop latch. TODO -- extend
3639 for other conditions inside loop body. */
3640 ex_bb = bb_for_stmt (use->stmt);
3641 if (use->stmt != last_stmt (ex_bb)
3642 || TREE_CODE (use->stmt) != COND_EXPR)
3643 return false;
3644 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3645 return false;
3647 exit = EDGE_SUCC (ex_bb, 0);
3648 if (flow_bb_inside_loop_p (loop, exit->dest))
3649 exit = EDGE_SUCC (ex_bb, 1);
3650 if (flow_bb_inside_loop_p (loop, exit->dest))
3651 return false;
3653 nit = niter_for_exit (data, exit);
3654 if (!nit)
3655 return false;
3657 nit_type = TREE_TYPE (nit);
3659 /* Determine whether we may use the variable to test whether niter iterations
3660 elapsed. This is the case iff the period of the induction variable is
3661 greater than the number of iterations. */
3662 period = iv_period (cand->iv);
3663 if (!period)
3664 return false;
3665 per_type = TREE_TYPE (period);
3667 wider_type = TREE_TYPE (period);
3668 if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
3669 wider_type = per_type;
3670 else
3671 wider_type = nit_type;
3673 if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
3674 fold_convert (wider_type, period),
3675 fold_convert (wider_type, nit))))
3676 return false;
3678 cand_value_at (loop, cand, use->stmt, nit, &bnd);
3679 *bound = aff_combination_to_tree (&bnd);
3680 return true;
3683 /* Determines cost of basing replacement of USE on CAND in a condition. */
3685 static bool
3686 determine_use_iv_cost_condition (struct ivopts_data *data,
3687 struct iv_use *use, struct iv_cand *cand)
3689 tree bound = NULL_TREE;
3690 struct iv *cmp_iv;
3691 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
3692 unsigned elim_cost, express_cost, cost;
3693 bool ok;
3695 /* Only consider real candidates. */
3696 if (!cand->iv)
3698 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
3699 return false;
3702 /* Try iv elimination. */
3703 if (may_eliminate_iv (data, use, cand, &bound))
3704 elim_cost = force_var_cost (data, bound, &depends_on_elim);
3705 else
3706 elim_cost = INFTY;
3708 /* Try expressing the original giv. If it is compared with an invariant,
3709 note that we cannot get rid of it. */
3710 ok = extract_cond_operands (data, use->op_p, NULL, NULL, NULL, &cmp_iv);
3711 gcc_assert (ok);
3713 express_cost = get_computation_cost (data, use, cand, false,
3714 &depends_on_express);
3715 fd_ivopts_data = data;
3716 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
3718 /* Choose the better approach. */
3719 if (elim_cost < express_cost)
3721 cost = elim_cost;
3722 depends_on = depends_on_elim;
3723 depends_on_elim = NULL;
3725 else
3727 cost = express_cost;
3728 depends_on = depends_on_express;
3729 depends_on_express = NULL;
3730 bound = NULL_TREE;
3733 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
3735 if (depends_on_elim)
3736 BITMAP_FREE (depends_on_elim);
3737 if (depends_on_express)
3738 BITMAP_FREE (depends_on_express);
3740 return cost != INFTY;
3743 /* Determines cost of basing replacement of USE on CAND. Returns false
3744 if USE cannot be based on CAND. */
3746 static bool
3747 determine_use_iv_cost (struct ivopts_data *data,
3748 struct iv_use *use, struct iv_cand *cand)
3750 switch (use->type)
3752 case USE_NONLINEAR_EXPR:
3753 return determine_use_iv_cost_generic (data, use, cand);
3755 case USE_ADDRESS:
3756 return determine_use_iv_cost_address (data, use, cand);
3758 case USE_COMPARE:
3759 return determine_use_iv_cost_condition (data, use, cand);
3761 default:
3762 gcc_unreachable ();
3766 /* Determines costs of basing the use of the iv on an iv candidate. */
3768 static void
3769 determine_use_iv_costs (struct ivopts_data *data)
3771 unsigned i, j;
3772 struct iv_use *use;
3773 struct iv_cand *cand;
3774 bitmap to_clear = BITMAP_ALLOC (NULL);
3776 alloc_use_cost_map (data);
3778 for (i = 0; i < n_iv_uses (data); i++)
3780 use = iv_use (data, i);
3782 if (data->consider_all_candidates)
3784 for (j = 0; j < n_iv_cands (data); j++)
3786 cand = iv_cand (data, j);
3787 determine_use_iv_cost (data, use, cand);
3790 else
3792 bitmap_iterator bi;
3794 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3796 cand = iv_cand (data, j);
3797 if (!determine_use_iv_cost (data, use, cand))
3798 bitmap_set_bit (to_clear, j);
3801 /* Remove the candidates for that the cost is infinite from
3802 the list of related candidates. */
3803 bitmap_and_compl_into (use->related_cands, to_clear);
3804 bitmap_clear (to_clear);
3808 BITMAP_FREE (to_clear);
3810 if (dump_file && (dump_flags & TDF_DETAILS))
3812 fprintf (dump_file, "Use-candidate costs:\n");
3814 for (i = 0; i < n_iv_uses (data); i++)
3816 use = iv_use (data, i);
3818 fprintf (dump_file, "Use %d:\n", i);
3819 fprintf (dump_file, " cand\tcost\tdepends on\n");
3820 for (j = 0; j < use->n_map_members; j++)
3822 if (!use->cost_map[j].cand
3823 || use->cost_map[j].cost == INFTY)
3824 continue;
3826 fprintf (dump_file, " %d\t%d\t",
3827 use->cost_map[j].cand->id,
3828 use->cost_map[j].cost);
3829 if (use->cost_map[j].depends_on)
3830 bitmap_print (dump_file,
3831 use->cost_map[j].depends_on, "","");
3832 fprintf (dump_file, "\n");
3835 fprintf (dump_file, "\n");
3837 fprintf (dump_file, "\n");
3841 /* Determines cost of the candidate CAND. */
3843 static void
3844 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3846 unsigned cost_base, cost_step;
3847 tree base;
3849 if (!cand->iv)
3851 cand->cost = 0;
3852 return;
3855 /* There are two costs associated with the candidate -- its increment
3856 and its initialization. The second is almost negligible for any loop
3857 that rolls enough, so we take it just very little into account. */
3859 base = cand->iv->base;
3860 cost_base = force_var_cost (data, base, NULL);
3861 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3863 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3865 /* Prefer the original iv unless we may gain something by replacing it;
3866 this is not really relevant for artificial ivs created by other
3867 passes. */
3868 if (cand->pos == IP_ORIGINAL
3869 && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
3870 cand->cost--;
3872 /* Prefer not to insert statements into latch unless there are some
3873 already (so that we do not create unnecessary jumps). */
3874 if (cand->pos == IP_END
3875 && empty_block_p (ip_end_pos (data->current_loop)))
3876 cand->cost++;
3879 /* Determines costs of computation of the candidates. */
3881 static void
3882 determine_iv_costs (struct ivopts_data *data)
3884 unsigned i;
3886 if (dump_file && (dump_flags & TDF_DETAILS))
3888 fprintf (dump_file, "Candidate costs:\n");
3889 fprintf (dump_file, " cand\tcost\n");
3892 for (i = 0; i < n_iv_cands (data); i++)
3894 struct iv_cand *cand = iv_cand (data, i);
3896 determine_iv_cost (data, cand);
3898 if (dump_file && (dump_flags & TDF_DETAILS))
3899 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
3902 if (dump_file && (dump_flags & TDF_DETAILS))
3903 fprintf (dump_file, "\n");
3906 /* Calculates cost for having SIZE induction variables. */
3908 static unsigned
3909 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3911 return global_cost_for_size (size, data->regs_used, n_iv_uses (data));
3914 /* For each size of the induction variable set determine the penalty. */
3916 static void
3917 determine_set_costs (struct ivopts_data *data)
3919 unsigned j, n;
3920 tree phi, op;
3921 struct loop *loop = data->current_loop;
3922 bitmap_iterator bi;
3924 /* We use the following model (definitely improvable, especially the
3925 cost function -- TODO):
3927 We estimate the number of registers available (using MD data), name it A.
3929 We estimate the number of registers used by the loop, name it U. This
3930 number is obtained as the number of loop phi nodes (not counting virtual
3931 registers and bivs) + the number of variables from outside of the loop.
3933 We set a reserve R (free regs that are used for temporary computations,
3934 etc.). For now the reserve is a constant 3.
3936 Let I be the number of induction variables.
3938 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3939 make a lot of ivs without a reason).
3940 -- if A - R < U + I <= A, the cost is I * PRES_COST
3941 -- if U + I > A, the cost is I * PRES_COST and
3942 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
3944 if (dump_file && (dump_flags & TDF_DETAILS))
3946 fprintf (dump_file, "Global costs:\n");
3947 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
3948 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
3949 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
3950 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
3953 n = 0;
3954 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
3956 op = PHI_RESULT (phi);
3958 if (!is_gimple_reg (op))
3959 continue;
3961 if (get_iv (data, op))
3962 continue;
3964 n++;
3967 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
3969 struct version_info *info = ver_info (data, j);
3971 if (info->inv_id && info->has_nonlin_use)
3972 n++;
3975 data->regs_used = n;
3976 if (dump_file && (dump_flags & TDF_DETAILS))
3977 fprintf (dump_file, " regs_used %d\n", n);
3979 if (dump_file && (dump_flags & TDF_DETAILS))
3981 fprintf (dump_file, " cost for size:\n");
3982 fprintf (dump_file, " ivs\tcost\n");
3983 for (j = 0; j <= 2 * target_avail_regs; j++)
3984 fprintf (dump_file, " %d\t%d\n", j,
3985 ivopts_global_cost_for_size (data, j));
3986 fprintf (dump_file, "\n");
3990 /* Returns true if A is a cheaper cost pair than B. */
3992 static bool
3993 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
3995 if (!a)
3996 return false;
3998 if (!b)
3999 return true;
4001 if (a->cost < b->cost)
4002 return true;
4004 if (a->cost > b->cost)
4005 return false;
4007 /* In case the costs are the same, prefer the cheaper candidate. */
4008 if (a->cand->cost < b->cand->cost)
4009 return true;
4011 return false;
4014 /* Computes the cost field of IVS structure. */
4016 static void
4017 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4019 unsigned cost = 0;
4021 cost += ivs->cand_use_cost;
4022 cost += ivs->cand_cost;
4023 cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4025 ivs->cost = cost;
4028 /* Remove invariants in set INVS to set IVS. */
4030 static void
4031 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4033 bitmap_iterator bi;
4034 unsigned iid;
4036 if (!invs)
4037 return;
4039 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4041 ivs->n_invariant_uses[iid]--;
4042 if (ivs->n_invariant_uses[iid] == 0)
4043 ivs->n_regs--;
4047 /* Set USE not to be expressed by any candidate in IVS. */
4049 static void
4050 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4051 struct iv_use *use)
4053 unsigned uid = use->id, cid;
4054 struct cost_pair *cp;
4056 cp = ivs->cand_for_use[uid];
4057 if (!cp)
4058 return;
4059 cid = cp->cand->id;
4061 ivs->bad_uses++;
4062 ivs->cand_for_use[uid] = NULL;
4063 ivs->n_cand_uses[cid]--;
4065 if (ivs->n_cand_uses[cid] == 0)
4067 bitmap_clear_bit (ivs->cands, cid);
4068 /* Do not count the pseudocandidates. */
4069 if (cp->cand->iv)
4070 ivs->n_regs--;
4071 ivs->n_cands--;
4072 ivs->cand_cost -= cp->cand->cost;
4074 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4077 ivs->cand_use_cost -= cp->cost;
4079 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4080 iv_ca_recount_cost (data, ivs);
4083 /* Add invariants in set INVS to set IVS. */
4085 static void
4086 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4088 bitmap_iterator bi;
4089 unsigned iid;
4091 if (!invs)
4092 return;
4094 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4096 ivs->n_invariant_uses[iid]++;
4097 if (ivs->n_invariant_uses[iid] == 1)
4098 ivs->n_regs++;
4102 /* Set cost pair for USE in set IVS to CP. */
4104 static void
4105 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4106 struct iv_use *use, struct cost_pair *cp)
4108 unsigned uid = use->id, cid;
4110 if (ivs->cand_for_use[uid] == cp)
4111 return;
4113 if (ivs->cand_for_use[uid])
4114 iv_ca_set_no_cp (data, ivs, use);
4116 if (cp)
4118 cid = cp->cand->id;
4120 ivs->bad_uses--;
4121 ivs->cand_for_use[uid] = cp;
4122 ivs->n_cand_uses[cid]++;
4123 if (ivs->n_cand_uses[cid] == 1)
4125 bitmap_set_bit (ivs->cands, cid);
4126 /* Do not count the pseudocandidates. */
4127 if (cp->cand->iv)
4128 ivs->n_regs++;
4129 ivs->n_cands++;
4130 ivs->cand_cost += cp->cand->cost;
4132 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4135 ivs->cand_use_cost += cp->cost;
4136 iv_ca_set_add_invariants (ivs, cp->depends_on);
4137 iv_ca_recount_cost (data, ivs);
4141 /* Extend set IVS by expressing USE by some of the candidates in it
4142 if possible. */
4144 static void
4145 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4146 struct iv_use *use)
4148 struct cost_pair *best_cp = NULL, *cp;
4149 bitmap_iterator bi;
4150 unsigned i;
4152 gcc_assert (ivs->upto >= use->id);
4154 if (ivs->upto == use->id)
4156 ivs->upto++;
4157 ivs->bad_uses++;
4160 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4162 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4164 if (cheaper_cost_pair (cp, best_cp))
4165 best_cp = cp;
4168 iv_ca_set_cp (data, ivs, use, best_cp);
4171 /* Get cost for assignment IVS. */
4173 static unsigned
4174 iv_ca_cost (struct iv_ca *ivs)
4176 return (ivs->bad_uses ? INFTY : ivs->cost);
4179 /* Returns true if all dependences of CP are among invariants in IVS. */
4181 static bool
4182 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4184 unsigned i;
4185 bitmap_iterator bi;
4187 if (!cp->depends_on)
4188 return true;
4190 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4192 if (ivs->n_invariant_uses[i] == 0)
4193 return false;
4196 return true;
4199 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4200 it before NEXT_CHANGE. */
4202 static struct iv_ca_delta *
4203 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4204 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4206 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4208 change->use = use;
4209 change->old_cp = old_cp;
4210 change->new_cp = new_cp;
4211 change->next_change = next_change;
4213 return change;
4216 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4217 are rewritten. */
4219 static struct iv_ca_delta *
4220 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4222 struct iv_ca_delta *last;
4224 if (!l2)
4225 return l1;
4227 if (!l1)
4228 return l2;
4230 for (last = l1; last->next_change; last = last->next_change)
4231 continue;
4232 last->next_change = l2;
4234 return l1;
4237 /* Returns candidate by that USE is expressed in IVS. */
4239 static struct cost_pair *
4240 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4242 return ivs->cand_for_use[use->id];
4245 /* Reverse the list of changes DELTA, forming the inverse to it. */
4247 static struct iv_ca_delta *
4248 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4250 struct iv_ca_delta *act, *next, *prev = NULL;
4251 struct cost_pair *tmp;
4253 for (act = delta; act; act = next)
4255 next = act->next_change;
4256 act->next_change = prev;
4257 prev = act;
4259 tmp = act->old_cp;
4260 act->old_cp = act->new_cp;
4261 act->new_cp = tmp;
4264 return prev;
4267 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4268 reverted instead. */
4270 static void
4271 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4272 struct iv_ca_delta *delta, bool forward)
4274 struct cost_pair *from, *to;
4275 struct iv_ca_delta *act;
4277 if (!forward)
4278 delta = iv_ca_delta_reverse (delta);
4280 for (act = delta; act; act = act->next_change)
4282 from = act->old_cp;
4283 to = act->new_cp;
4284 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4285 iv_ca_set_cp (data, ivs, act->use, to);
4288 if (!forward)
4289 iv_ca_delta_reverse (delta);
4292 /* Returns true if CAND is used in IVS. */
4294 static bool
4295 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4297 return ivs->n_cand_uses[cand->id] > 0;
4300 /* Returns number of induction variable candidates in the set IVS. */
4302 static unsigned
4303 iv_ca_n_cands (struct iv_ca *ivs)
4305 return ivs->n_cands;
4308 /* Free the list of changes DELTA. */
4310 static void
4311 iv_ca_delta_free (struct iv_ca_delta **delta)
4313 struct iv_ca_delta *act, *next;
4315 for (act = *delta; act; act = next)
4317 next = act->next_change;
4318 free (act);
4321 *delta = NULL;
4324 /* Allocates new iv candidates assignment. */
4326 static struct iv_ca *
4327 iv_ca_new (struct ivopts_data *data)
4329 struct iv_ca *nw = XNEW (struct iv_ca);
4331 nw->upto = 0;
4332 nw->bad_uses = 0;
4333 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4334 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4335 nw->cands = BITMAP_ALLOC (NULL);
4336 nw->n_cands = 0;
4337 nw->n_regs = 0;
4338 nw->cand_use_cost = 0;
4339 nw->cand_cost = 0;
4340 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4341 nw->cost = 0;
4343 return nw;
4346 /* Free memory occupied by the set IVS. */
4348 static void
4349 iv_ca_free (struct iv_ca **ivs)
4351 free ((*ivs)->cand_for_use);
4352 free ((*ivs)->n_cand_uses);
4353 BITMAP_FREE ((*ivs)->cands);
4354 free ((*ivs)->n_invariant_uses);
4355 free (*ivs);
4356 *ivs = NULL;
4359 /* Dumps IVS to FILE. */
4361 static void
4362 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4364 const char *pref = " invariants ";
4365 unsigned i;
4367 fprintf (file, " cost %d\n", iv_ca_cost (ivs));
4368 bitmap_print (file, ivs->cands, " candidates ","\n");
4370 for (i = 1; i <= data->max_inv_id; i++)
4371 if (ivs->n_invariant_uses[i])
4373 fprintf (file, "%s%d", pref, i);
4374 pref = ", ";
4376 fprintf (file, "\n");
4379 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4380 new set, and store differences in DELTA. Number of induction variables
4381 in the new set is stored to N_IVS. */
4383 static unsigned
4384 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4385 struct iv_cand *cand, struct iv_ca_delta **delta,
4386 unsigned *n_ivs)
4388 unsigned i, cost;
4389 struct iv_use *use;
4390 struct cost_pair *old_cp, *new_cp;
4392 *delta = NULL;
4393 for (i = 0; i < ivs->upto; i++)
4395 use = iv_use (data, i);
4396 old_cp = iv_ca_cand_for_use (ivs, use);
4398 if (old_cp
4399 && old_cp->cand == cand)
4400 continue;
4402 new_cp = get_use_iv_cost (data, use, cand);
4403 if (!new_cp)
4404 continue;
4406 if (!iv_ca_has_deps (ivs, new_cp))
4407 continue;
4409 if (!cheaper_cost_pair (new_cp, old_cp))
4410 continue;
4412 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4415 iv_ca_delta_commit (data, ivs, *delta, true);
4416 cost = iv_ca_cost (ivs);
4417 if (n_ivs)
4418 *n_ivs = iv_ca_n_cands (ivs);
4419 iv_ca_delta_commit (data, ivs, *delta, false);
4421 return cost;
4424 /* Try narrowing set IVS by removing CAND. Return the cost of
4425 the new set and store the differences in DELTA. */
4427 static unsigned
4428 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4429 struct iv_cand *cand, struct iv_ca_delta **delta)
4431 unsigned i, ci;
4432 struct iv_use *use;
4433 struct cost_pair *old_cp, *new_cp, *cp;
4434 bitmap_iterator bi;
4435 struct iv_cand *cnd;
4436 unsigned cost;
4438 *delta = NULL;
4439 for (i = 0; i < n_iv_uses (data); i++)
4441 use = iv_use (data, i);
4443 old_cp = iv_ca_cand_for_use (ivs, use);
4444 if (old_cp->cand != cand)
4445 continue;
4447 new_cp = NULL;
4449 if (data->consider_all_candidates)
4451 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4453 if (ci == cand->id)
4454 continue;
4456 cnd = iv_cand (data, ci);
4458 cp = get_use_iv_cost (data, use, cnd);
4459 if (!cp)
4460 continue;
4461 if (!iv_ca_has_deps (ivs, cp))
4462 continue;
4464 if (!cheaper_cost_pair (cp, new_cp))
4465 continue;
4467 new_cp = cp;
4470 else
4472 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4474 if (ci == cand->id)
4475 continue;
4477 cnd = iv_cand (data, ci);
4479 cp = get_use_iv_cost (data, use, cnd);
4480 if (!cp)
4481 continue;
4482 if (!iv_ca_has_deps (ivs, cp))
4483 continue;
4485 if (!cheaper_cost_pair (cp, new_cp))
4486 continue;
4488 new_cp = cp;
4492 if (!new_cp)
4494 iv_ca_delta_free (delta);
4495 return INFTY;
4498 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4501 iv_ca_delta_commit (data, ivs, *delta, true);
4502 cost = iv_ca_cost (ivs);
4503 iv_ca_delta_commit (data, ivs, *delta, false);
4505 return cost;
4508 /* Try optimizing the set of candidates IVS by removing candidates different
4509 from to EXCEPT_CAND from it. Return cost of the new set, and store
4510 differences in DELTA. */
4512 static unsigned
4513 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4514 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4516 bitmap_iterator bi;
4517 struct iv_ca_delta *act_delta, *best_delta;
4518 unsigned i, best_cost, acost;
4519 struct iv_cand *cand;
4521 best_delta = NULL;
4522 best_cost = iv_ca_cost (ivs);
4524 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4526 cand = iv_cand (data, i);
4528 if (cand == except_cand)
4529 continue;
4531 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4533 if (acost < best_cost)
4535 best_cost = acost;
4536 iv_ca_delta_free (&best_delta);
4537 best_delta = act_delta;
4539 else
4540 iv_ca_delta_free (&act_delta);
4543 if (!best_delta)
4545 *delta = NULL;
4546 return best_cost;
4549 /* Recurse to possibly remove other unnecessary ivs. */
4550 iv_ca_delta_commit (data, ivs, best_delta, true);
4551 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4552 iv_ca_delta_commit (data, ivs, best_delta, false);
4553 *delta = iv_ca_delta_join (best_delta, *delta);
4554 return best_cost;
4557 /* Tries to extend the sets IVS in the best possible way in order
4558 to express the USE. */
4560 static bool
4561 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4562 struct iv_use *use)
4564 unsigned best_cost, act_cost;
4565 unsigned i;
4566 bitmap_iterator bi;
4567 struct iv_cand *cand;
4568 struct iv_ca_delta *best_delta = NULL, *act_delta;
4569 struct cost_pair *cp;
4571 iv_ca_add_use (data, ivs, use);
4572 best_cost = iv_ca_cost (ivs);
4574 cp = iv_ca_cand_for_use (ivs, use);
4575 if (cp)
4577 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4578 iv_ca_set_no_cp (data, ivs, use);
4581 /* First try important candidates. Only if it fails, try the specific ones.
4582 Rationale -- in loops with many variables the best choice often is to use
4583 just one generic biv. If we added here many ivs specific to the uses,
4584 the optimization algorithm later would be likely to get stuck in a local
4585 minimum, thus causing us to create too many ivs. The approach from
4586 few ivs to more seems more likely to be successful -- starting from few
4587 ivs, replacing an expensive use by a specific iv should always be a
4588 win. */
4589 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4591 cand = iv_cand (data, i);
4593 if (iv_ca_cand_used_p (ivs, cand))
4594 continue;
4596 cp = get_use_iv_cost (data, use, cand);
4597 if (!cp)
4598 continue;
4600 iv_ca_set_cp (data, ivs, use, cp);
4601 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4602 iv_ca_set_no_cp (data, ivs, use);
4603 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4605 if (act_cost < best_cost)
4607 best_cost = act_cost;
4609 iv_ca_delta_free (&best_delta);
4610 best_delta = act_delta;
4612 else
4613 iv_ca_delta_free (&act_delta);
4616 if (best_cost == INFTY)
4618 for (i = 0; i < use->n_map_members; i++)
4620 cp = use->cost_map + i;
4621 cand = cp->cand;
4622 if (!cand)
4623 continue;
4625 /* Already tried this. */
4626 if (cand->important)
4627 continue;
4629 if (iv_ca_cand_used_p (ivs, cand))
4630 continue;
4632 act_delta = NULL;
4633 iv_ca_set_cp (data, ivs, use, cp);
4634 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4635 iv_ca_set_no_cp (data, ivs, use);
4636 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4637 cp, act_delta);
4639 if (act_cost < best_cost)
4641 best_cost = act_cost;
4643 if (best_delta)
4644 iv_ca_delta_free (&best_delta);
4645 best_delta = act_delta;
4647 else
4648 iv_ca_delta_free (&act_delta);
4652 iv_ca_delta_commit (data, ivs, best_delta, true);
4653 iv_ca_delta_free (&best_delta);
4655 return (best_cost != INFTY);
4658 /* Finds an initial assignment of candidates to uses. */
4660 static struct iv_ca *
4661 get_initial_solution (struct ivopts_data *data)
4663 struct iv_ca *ivs = iv_ca_new (data);
4664 unsigned i;
4666 for (i = 0; i < n_iv_uses (data); i++)
4667 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4669 iv_ca_free (&ivs);
4670 return NULL;
4673 return ivs;
4676 /* Tries to improve set of induction variables IVS. */
4678 static bool
4679 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4681 unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
4682 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4683 struct iv_cand *cand;
4685 /* Try extending the set of induction variables by one. */
4686 for (i = 0; i < n_iv_cands (data); i++)
4688 cand = iv_cand (data, i);
4690 if (iv_ca_cand_used_p (ivs, cand))
4691 continue;
4693 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4694 if (!act_delta)
4695 continue;
4697 /* If we successfully added the candidate and the set is small enough,
4698 try optimizing it by removing other candidates. */
4699 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4701 iv_ca_delta_commit (data, ivs, act_delta, true);
4702 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4703 iv_ca_delta_commit (data, ivs, act_delta, false);
4704 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4707 if (acost < best_cost)
4709 best_cost = acost;
4710 iv_ca_delta_free (&best_delta);
4711 best_delta = act_delta;
4713 else
4714 iv_ca_delta_free (&act_delta);
4717 if (!best_delta)
4719 /* Try removing the candidates from the set instead. */
4720 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4722 /* Nothing more we can do. */
4723 if (!best_delta)
4724 return false;
4727 iv_ca_delta_commit (data, ivs, best_delta, true);
4728 gcc_assert (best_cost == iv_ca_cost (ivs));
4729 iv_ca_delta_free (&best_delta);
4730 return true;
4733 /* Attempts to find the optimal set of induction variables. We do simple
4734 greedy heuristic -- we try to replace at most one candidate in the selected
4735 solution and remove the unused ivs while this improves the cost. */
4737 static struct iv_ca *
4738 find_optimal_iv_set (struct ivopts_data *data)
4740 unsigned i;
4741 struct iv_ca *set;
4742 struct iv_use *use;
4744 /* Get the initial solution. */
4745 set = get_initial_solution (data);
4746 if (!set)
4748 if (dump_file && (dump_flags & TDF_DETAILS))
4749 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4750 return NULL;
4753 if (dump_file && (dump_flags & TDF_DETAILS))
4755 fprintf (dump_file, "Initial set of candidates:\n");
4756 iv_ca_dump (data, dump_file, set);
4759 while (try_improve_iv_set (data, set))
4761 if (dump_file && (dump_flags & TDF_DETAILS))
4763 fprintf (dump_file, "Improved to:\n");
4764 iv_ca_dump (data, dump_file, set);
4768 if (dump_file && (dump_flags & TDF_DETAILS))
4769 fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
4771 for (i = 0; i < n_iv_uses (data); i++)
4773 use = iv_use (data, i);
4774 use->selected = iv_ca_cand_for_use (set, use)->cand;
4777 return set;
4780 /* Creates a new induction variable corresponding to CAND. */
4782 static void
4783 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4785 block_stmt_iterator incr_pos;
4786 tree base;
4787 bool after = false;
4789 if (!cand->iv)
4790 return;
4792 switch (cand->pos)
4794 case IP_NORMAL:
4795 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4796 break;
4798 case IP_END:
4799 incr_pos = bsi_last (ip_end_pos (data->current_loop));
4800 after = true;
4801 break;
4803 case IP_ORIGINAL:
4804 /* Mark that the iv is preserved. */
4805 name_info (data, cand->var_before)->preserve_biv = true;
4806 name_info (data, cand->var_after)->preserve_biv = true;
4808 /* Rewrite the increment so that it uses var_before directly. */
4809 find_interesting_uses_op (data, cand->var_after)->selected = cand;
4811 return;
4814 gimple_add_tmp_var (cand->var_before);
4815 add_referenced_var (cand->var_before);
4817 base = unshare_expr (cand->iv->base);
4819 create_iv (base, unshare_expr (cand->iv->step),
4820 cand->var_before, data->current_loop,
4821 &incr_pos, after, &cand->var_before, &cand->var_after);
4824 /* Creates new induction variables described in SET. */
4826 static void
4827 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4829 unsigned i;
4830 struct iv_cand *cand;
4831 bitmap_iterator bi;
4833 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4835 cand = iv_cand (data, i);
4836 create_new_iv (data, cand);
4840 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
4841 is true, remove also the ssa name defined by the statement. */
4843 static void
4844 remove_statement (tree stmt, bool including_defined_name)
4846 if (TREE_CODE (stmt) == PHI_NODE)
4848 remove_phi_node (stmt, NULL_TREE, including_defined_name);
4850 else
4852 block_stmt_iterator bsi = bsi_for_stmt (stmt);
4854 bsi_remove (&bsi, true);
4855 release_defs (stmt);
4859 /* Rewrites USE (definition of iv used in a nonlinear expression)
4860 using candidate CAND. */
4862 static void
4863 rewrite_use_nonlinear_expr (struct ivopts_data *data,
4864 struct iv_use *use, struct iv_cand *cand)
4866 tree comp;
4867 tree op, stmts, tgt, ass;
4868 block_stmt_iterator bsi, pbsi;
4870 /* An important special case -- if we are asked to express value of
4871 the original iv by itself, just exit; there is no need to
4872 introduce a new computation (that might also need casting the
4873 variable to unsigned and back). */
4874 if (cand->pos == IP_ORIGINAL
4875 && cand->incremented_at == use->stmt)
4877 tree step, ctype, utype;
4878 enum tree_code incr_code = PLUS_EXPR;
4880 gcc_assert (TREE_CODE (use->stmt) == GIMPLE_MODIFY_STMT);
4881 gcc_assert (GIMPLE_STMT_OPERAND (use->stmt, 0) == cand->var_after);
4883 step = cand->iv->step;
4884 ctype = TREE_TYPE (step);
4885 utype = TREE_TYPE (cand->var_after);
4886 if (TREE_CODE (step) == NEGATE_EXPR)
4888 incr_code = MINUS_EXPR;
4889 step = TREE_OPERAND (step, 0);
4892 /* Check whether we may leave the computation unchanged.
4893 This is the case only if it does not rely on other
4894 computations in the loop -- otherwise, the computation
4895 we rely upon may be removed in remove_unused_ivs,
4896 thus leading to ICE. */
4897 op = GIMPLE_STMT_OPERAND (use->stmt, 1);
4898 if (TREE_CODE (op) == PLUS_EXPR
4899 || TREE_CODE (op) == MINUS_EXPR)
4901 if (TREE_OPERAND (op, 0) == cand->var_before)
4902 op = TREE_OPERAND (op, 1);
4903 else if (TREE_CODE (op) == PLUS_EXPR
4904 && TREE_OPERAND (op, 1) == cand->var_before)
4905 op = TREE_OPERAND (op, 0);
4906 else
4907 op = NULL_TREE;
4909 else
4910 op = NULL_TREE;
4912 if (op
4913 && (TREE_CODE (op) == INTEGER_CST
4914 || operand_equal_p (op, step, 0)))
4915 return;
4917 /* Otherwise, add the necessary computations to express
4918 the iv. */
4919 op = fold_convert (ctype, cand->var_before);
4920 comp = fold_convert (utype,
4921 build2 (incr_code, ctype, op,
4922 unshare_expr (step)));
4924 else
4926 comp = get_computation (data->current_loop, use, cand);
4927 gcc_assert (comp != NULL_TREE);
4930 switch (TREE_CODE (use->stmt))
4932 case PHI_NODE:
4933 tgt = PHI_RESULT (use->stmt);
4935 /* If we should keep the biv, do not replace it. */
4936 if (name_info (data, tgt)->preserve_biv)
4937 return;
4939 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
4940 while (!bsi_end_p (pbsi)
4941 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
4943 bsi = pbsi;
4944 bsi_next (&pbsi);
4946 break;
4948 case GIMPLE_MODIFY_STMT:
4949 tgt = GIMPLE_STMT_OPERAND (use->stmt, 0);
4950 bsi = bsi_for_stmt (use->stmt);
4951 break;
4953 default:
4954 gcc_unreachable ();
4957 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
4959 if (TREE_CODE (use->stmt) == PHI_NODE)
4961 if (stmts)
4962 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4963 ass = build_gimple_modify_stmt (tgt, op);
4964 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
4965 remove_statement (use->stmt, false);
4966 SSA_NAME_DEF_STMT (tgt) = ass;
4968 else
4970 if (stmts)
4971 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4972 GIMPLE_STMT_OPERAND (use->stmt, 1) = op;
4976 /* Replaces ssa name in index IDX by its basic variable. Callback for
4977 for_each_index. */
4979 static bool
4980 idx_remove_ssa_names (tree base, tree *idx,
4981 void *data ATTRIBUTE_UNUSED)
4983 tree *op;
4985 if (TREE_CODE (*idx) == SSA_NAME)
4986 *idx = SSA_NAME_VAR (*idx);
4988 if (TREE_CODE (base) == ARRAY_REF)
4990 op = &TREE_OPERAND (base, 2);
4991 if (*op
4992 && TREE_CODE (*op) == SSA_NAME)
4993 *op = SSA_NAME_VAR (*op);
4994 op = &TREE_OPERAND (base, 3);
4995 if (*op
4996 && TREE_CODE (*op) == SSA_NAME)
4997 *op = SSA_NAME_VAR (*op);
5000 return true;
5003 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5005 static tree
5006 unshare_and_remove_ssa_names (tree ref)
5008 ref = unshare_expr (ref);
5009 for_each_index (&ref, idx_remove_ssa_names, NULL);
5011 return ref;
5014 /* Extract the alias analysis info for the memory reference REF. There are
5015 several ways how this information may be stored and what precisely is
5016 its semantics depending on the type of the reference, but there always is
5017 somewhere hidden one _DECL node that is used to determine the set of
5018 virtual operands for the reference. The code below deciphers this jungle
5019 and extracts this single useful piece of information. */
5021 static tree
5022 get_ref_tag (tree ref, tree orig)
5024 tree var = get_base_address (ref);
5025 tree aref = NULL_TREE, tag, sv;
5026 HOST_WIDE_INT offset, size, maxsize;
5028 for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5030 aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5031 if (ref)
5032 break;
5035 if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5036 return unshare_expr (sv);
5038 if (!var)
5039 return NULL_TREE;
5041 if (TREE_CODE (var) == INDIRECT_REF)
5043 /* If the base is a dereference of a pointer, first check its name memory
5044 tag. If it does not have one, use its symbol memory tag. */
5045 var = TREE_OPERAND (var, 0);
5046 if (TREE_CODE (var) != SSA_NAME)
5047 return NULL_TREE;
5049 if (SSA_NAME_PTR_INFO (var))
5051 tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5052 if (tag)
5053 return tag;
5056 var = SSA_NAME_VAR (var);
5057 tag = symbol_mem_tag (var);
5058 gcc_assert (tag != NULL_TREE);
5059 return tag;
5061 else
5063 if (!DECL_P (var))
5064 return NULL_TREE;
5066 tag = symbol_mem_tag (var);
5067 if (tag)
5068 return tag;
5070 return var;
5074 /* Copies the reference information from OLD_REF to NEW_REF. */
5076 static void
5077 copy_ref_info (tree new_ref, tree old_ref)
5079 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5080 copy_mem_ref_info (new_ref, old_ref);
5081 else
5083 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5084 TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5088 /* Rewrites USE (address that is an iv) using candidate CAND. */
5090 static void
5091 rewrite_use_address (struct ivopts_data *data,
5092 struct iv_use *use, struct iv_cand *cand)
5094 aff_tree aff;
5095 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5096 tree ref;
5097 bool ok;
5099 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5100 gcc_assert (ok);
5101 unshare_aff_combination (&aff);
5103 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5104 copy_ref_info (ref, *use->op_p);
5105 *use->op_p = ref;
5108 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5109 candidate CAND. */
5111 static void
5112 rewrite_use_compare (struct ivopts_data *data,
5113 struct iv_use *use, struct iv_cand *cand)
5115 tree comp, *var_p, op, bound;
5116 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5117 enum tree_code compare;
5118 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5119 bool ok;
5121 bound = cp->value;
5122 if (bound)
5124 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5125 tree var_type = TREE_TYPE (var);
5127 compare = iv_elimination_compare (data, use);
5128 bound = unshare_expr (fold_convert (var_type, bound));
5129 op = force_gimple_operand_bsi (&bsi, bound, true, NULL_TREE);
5131 *use->op_p = build2 (compare, boolean_type_node, var, op);
5132 return;
5135 /* The induction variable elimination failed; just express the original
5136 giv. */
5137 comp = get_computation (data->current_loop, use, cand);
5138 gcc_assert (comp != NULL_TREE);
5140 ok = extract_cond_operands (data, use->op_p, &var_p, NULL, NULL, NULL);
5141 gcc_assert (ok);
5143 *var_p = force_gimple_operand_bsi (&bsi, comp, true, SSA_NAME_VAR (*var_p));
5146 /* Rewrites USE using candidate CAND. */
5148 static void
5149 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5151 push_stmt_changes (&use->stmt);
5153 switch (use->type)
5155 case USE_NONLINEAR_EXPR:
5156 rewrite_use_nonlinear_expr (data, use, cand);
5157 break;
5159 case USE_ADDRESS:
5160 rewrite_use_address (data, use, cand);
5161 break;
5163 case USE_COMPARE:
5164 rewrite_use_compare (data, use, cand);
5165 break;
5167 default:
5168 gcc_unreachable ();
5171 pop_stmt_changes (&use->stmt);
5174 /* Rewrite the uses using the selected induction variables. */
5176 static void
5177 rewrite_uses (struct ivopts_data *data)
5179 unsigned i;
5180 struct iv_cand *cand;
5181 struct iv_use *use;
5183 for (i = 0; i < n_iv_uses (data); i++)
5185 use = iv_use (data, i);
5186 cand = use->selected;
5187 gcc_assert (cand);
5189 rewrite_use (data, use, cand);
5193 /* Removes the ivs that are not used after rewriting. */
5195 static void
5196 remove_unused_ivs (struct ivopts_data *data)
5198 unsigned j;
5199 bitmap_iterator bi;
5201 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5203 struct version_info *info;
5205 info = ver_info (data, j);
5206 if (info->iv
5207 && !integer_zerop (info->iv->step)
5208 && !info->inv_id
5209 && !info->iv->have_use_for
5210 && !info->preserve_biv)
5211 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5215 /* Frees data allocated by the optimization of a single loop. */
5217 static void
5218 free_loop_data (struct ivopts_data *data)
5220 unsigned i, j;
5221 bitmap_iterator bi;
5222 tree obj;
5224 if (data->niters)
5226 pointer_map_destroy (data->niters);
5227 data->niters = NULL;
5230 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5232 struct version_info *info;
5234 info = ver_info (data, i);
5235 if (info->iv)
5236 free (info->iv);
5237 info->iv = NULL;
5238 info->has_nonlin_use = false;
5239 info->preserve_biv = false;
5240 info->inv_id = 0;
5242 bitmap_clear (data->relevant);
5243 bitmap_clear (data->important_candidates);
5245 for (i = 0; i < n_iv_uses (data); i++)
5247 struct iv_use *use = iv_use (data, i);
5249 free (use->iv);
5250 BITMAP_FREE (use->related_cands);
5251 for (j = 0; j < use->n_map_members; j++)
5252 if (use->cost_map[j].depends_on)
5253 BITMAP_FREE (use->cost_map[j].depends_on);
5254 free (use->cost_map);
5255 free (use);
5257 VEC_truncate (iv_use_p, data->iv_uses, 0);
5259 for (i = 0; i < n_iv_cands (data); i++)
5261 struct iv_cand *cand = iv_cand (data, i);
5263 if (cand->iv)
5264 free (cand->iv);
5265 if (cand->depends_on)
5266 BITMAP_FREE (cand->depends_on);
5267 free (cand);
5269 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5271 if (data->version_info_size < num_ssa_names)
5273 data->version_info_size = 2 * num_ssa_names;
5274 free (data->version_info);
5275 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5278 data->max_inv_id = 0;
5280 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5281 SET_DECL_RTL (obj, NULL_RTX);
5283 VEC_truncate (tree, decl_rtl_to_reset, 0);
5286 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5287 loop tree. */
5289 static void
5290 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5292 free_loop_data (data);
5293 free (data->version_info);
5294 BITMAP_FREE (data->relevant);
5295 BITMAP_FREE (data->important_candidates);
5297 VEC_free (tree, heap, decl_rtl_to_reset);
5298 VEC_free (iv_use_p, heap, data->iv_uses);
5299 VEC_free (iv_cand_p, heap, data->iv_candidates);
5302 /* Optimizes the LOOP. Returns true if anything changed. */
5304 static bool
5305 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5307 bool changed = false;
5308 struct iv_ca *iv_ca;
5309 edge exit;
5311 gcc_assert (!data->niters);
5312 data->current_loop = loop;
5314 if (dump_file && (dump_flags & TDF_DETAILS))
5316 fprintf (dump_file, "Processing loop %d\n", loop->num);
5318 exit = single_dom_exit (loop);
5319 if (exit)
5321 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5322 exit->src->index, exit->dest->index);
5323 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5324 fprintf (dump_file, "\n");
5327 fprintf (dump_file, "\n");
5330 /* For each ssa name determines whether it behaves as an induction variable
5331 in some loop. */
5332 if (!find_induction_variables (data))
5333 goto finish;
5335 /* Finds interesting uses (item 1). */
5336 find_interesting_uses (data);
5337 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5338 goto finish;
5340 /* Finds candidates for the induction variables (item 2). */
5341 find_iv_candidates (data);
5343 /* Calculates the costs (item 3, part 1). */
5344 determine_use_iv_costs (data);
5345 determine_iv_costs (data);
5346 determine_set_costs (data);
5348 /* Find the optimal set of induction variables (item 3, part 2). */
5349 iv_ca = find_optimal_iv_set (data);
5350 if (!iv_ca)
5351 goto finish;
5352 changed = true;
5354 /* Create the new induction variables (item 4, part 1). */
5355 create_new_ivs (data, iv_ca);
5356 iv_ca_free (&iv_ca);
5358 /* Rewrite the uses (item 4, part 2). */
5359 rewrite_uses (data);
5361 /* Remove the ivs that are unused after rewriting. */
5362 remove_unused_ivs (data);
5364 /* We have changed the structure of induction variables; it might happen
5365 that definitions in the scev database refer to some of them that were
5366 eliminated. */
5367 scev_reset ();
5369 finish:
5370 free_loop_data (data);
5372 return changed;
5375 /* Main entry point. Optimizes induction variables in loops. */
5377 void
5378 tree_ssa_iv_optimize (void)
5380 struct loop *loop;
5381 struct ivopts_data data;
5382 loop_iterator li;
5384 tree_ssa_iv_optimize_init (&data);
5386 /* Optimize the loops starting with the innermost ones. */
5387 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5389 if (dump_file && (dump_flags & TDF_DETAILS))
5390 flow_loop_dump (loop, dump_file, NULL, 1);
5392 tree_ssa_iv_optimize_loop (&data, loop);
5395 tree_ssa_iv_optimize_finalize (&data);