2008-01-26 Jerry DeLisle <jvdelisle@gcc.gnu.org>
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blobc8cefd418a195f66ea7920e4b0878f0735519f73
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "tm.h"
68 #include "tree.h"
69 #include "rtl.h"
70 #include "tm_p.h"
71 #include "hard-reg-set.h"
72 #include "basic-block.h"
73 #include "output.h"
74 #include "diagnostic.h"
75 #include "tree-flow.h"
76 #include "tree-dump.h"
77 #include "timevar.h"
78 #include "cfgloop.h"
79 #include "varray.h"
80 #include "expr.h"
81 #include "tree-pass.h"
82 #include "ggc.h"
83 #include "insn-config.h"
84 #include "recog.h"
85 #include "pointer-set.h"
86 #include "hashtab.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
89 #include "cfgloop.h"
90 #include "params.h"
91 #include "langhooks.h"
92 #include "tree-affine.h"
93 #include "target.h"
95 /* The infinite cost. */
96 #define INFTY 10000000
98 /* The expected number of loop iterations. TODO -- use profiling instead of
99 this. */
100 #define AVG_LOOP_NITER(LOOP) 5
103 /* Representation of the induction variable. */
104 struct iv
106 tree base; /* Initial value of the iv. */
107 tree base_object; /* A memory object to that the induction variable points. */
108 tree step; /* Step of the iv (constant only). */
109 tree ssa_name; /* The ssa name with the value. */
110 bool biv_p; /* Is it a biv? */
111 bool have_use_for; /* Do we already have a use for it? */
112 unsigned use_id; /* The identifier in the use if it is the case. */
115 /* Per-ssa version information (induction variable descriptions, etc.). */
116 struct version_info
118 tree name; /* The ssa name. */
119 struct iv *iv; /* Induction variable description. */
120 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
121 an expression that is not an induction variable. */
122 unsigned inv_id; /* Id of an invariant. */
123 bool preserve_biv; /* For the original biv, whether to preserve it. */
126 /* Types of uses. */
127 enum use_type
129 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
130 USE_ADDRESS, /* Use in an address. */
131 USE_COMPARE /* Use is a compare. */
134 /* The candidate - cost pair. */
135 struct cost_pair
137 struct iv_cand *cand; /* The candidate. */
138 unsigned cost; /* The cost. */
139 bitmap depends_on; /* The list of invariants that have to be
140 preserved. */
141 tree value; /* For final value elimination, the expression for
142 the final value of the iv. For iv elimination,
143 the new bound to compare with. */
146 /* Use. */
147 struct iv_use
149 unsigned id; /* The id of the use. */
150 enum use_type type; /* Type of the use. */
151 struct iv *iv; /* The induction variable it is based on. */
152 tree stmt; /* Statement in that it occurs. */
153 tree *op_p; /* The place where it occurs. */
154 bitmap related_cands; /* The set of "related" iv candidates, plus the common
155 important ones. */
157 unsigned n_map_members; /* Number of candidates in the cost_map list. */
158 struct cost_pair *cost_map;
159 /* The costs wrto the iv candidates. */
161 struct iv_cand *selected;
162 /* The selected candidate. */
165 /* The position where the iv is computed. */
166 enum iv_position
168 IP_NORMAL, /* At the end, just before the exit condition. */
169 IP_END, /* At the end of the latch block. */
170 IP_ORIGINAL /* The original biv. */
173 /* The induction variable candidate. */
174 struct iv_cand
176 unsigned id; /* The number of the candidate. */
177 bool important; /* Whether this is an "important" candidate, i.e. such
178 that it should be considered by all uses. */
179 enum iv_position pos; /* Where it is computed. */
180 tree incremented_at; /* For original biv, the statement where it is
181 incremented. */
182 tree var_before; /* The variable used for it before increment. */
183 tree var_after; /* The variable used for it after increment. */
184 struct iv *iv; /* The value of the candidate. NULL for
185 "pseudocandidate" used to indicate the possibility
186 to replace the final value of an iv by direct
187 computation of the value. */
188 unsigned cost; /* Cost of the candidate. */
189 bitmap depends_on; /* The list of invariants that are used in step of the
190 biv. */
193 /* The data used by the induction variable optimizations. */
195 typedef struct iv_use *iv_use_p;
196 DEF_VEC_P(iv_use_p);
197 DEF_VEC_ALLOC_P(iv_use_p,heap);
199 typedef struct iv_cand *iv_cand_p;
200 DEF_VEC_P(iv_cand_p);
201 DEF_VEC_ALLOC_P(iv_cand_p,heap);
203 struct ivopts_data
205 /* The currently optimized loop. */
206 struct loop *current_loop;
208 /* Number of registers used in it. */
209 unsigned regs_used;
211 /* Numbers of iterations for all exits of the current loop. */
212 struct pointer_map_t *niters;
214 /* The size of version_info array allocated. */
215 unsigned version_info_size;
217 /* The array of information for the ssa names. */
218 struct version_info *version_info;
220 /* The bitmap of indices in version_info whose value was changed. */
221 bitmap relevant;
223 /* The maximum invariant id. */
224 unsigned max_inv_id;
226 /* The uses of induction variables. */
227 VEC(iv_use_p,heap) *iv_uses;
229 /* The candidates. */
230 VEC(iv_cand_p,heap) *iv_candidates;
232 /* A bitmap of important candidates. */
233 bitmap important_candidates;
235 /* Whether to consider just related and important candidates when replacing a
236 use. */
237 bool consider_all_candidates;
240 /* An assignment of iv candidates to uses. */
242 struct iv_ca
244 /* The number of uses covered by the assignment. */
245 unsigned upto;
247 /* Number of uses that cannot be expressed by the candidates in the set. */
248 unsigned bad_uses;
250 /* Candidate assigned to a use, together with the related costs. */
251 struct cost_pair **cand_for_use;
253 /* Number of times each candidate is used. */
254 unsigned *n_cand_uses;
256 /* The candidates used. */
257 bitmap cands;
259 /* The number of candidates in the set. */
260 unsigned n_cands;
262 /* Total number of registers needed. */
263 unsigned n_regs;
265 /* Total cost of expressing uses. */
266 unsigned cand_use_cost;
268 /* Total cost of candidates. */
269 unsigned cand_cost;
271 /* Number of times each invariant is used. */
272 unsigned *n_invariant_uses;
274 /* Total cost of the assignment. */
275 unsigned cost;
278 /* Difference of two iv candidate assignments. */
280 struct iv_ca_delta
282 /* Changed use. */
283 struct iv_use *use;
285 /* An old assignment (for rollback purposes). */
286 struct cost_pair *old_cp;
288 /* A new assignment. */
289 struct cost_pair *new_cp;
291 /* Next change in the list. */
292 struct iv_ca_delta *next_change;
295 /* Bound on number of candidates below that all candidates are considered. */
297 #define CONSIDER_ALL_CANDIDATES_BOUND \
298 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
300 /* If there are more iv occurrences, we just give up (it is quite unlikely that
301 optimizing such a loop would help, and it would take ages). */
303 #define MAX_CONSIDERED_USES \
304 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
306 /* If there are at most this number of ivs in the set, try removing unnecessary
307 ivs from the set always. */
309 #define ALWAYS_PRUNE_CAND_SET_BOUND \
310 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
312 /* The list of trees for that the decl_rtl field must be reset is stored
313 here. */
315 static VEC(tree,heap) *decl_rtl_to_reset;
317 /* Number of uses recorded in DATA. */
319 static inline unsigned
320 n_iv_uses (struct ivopts_data *data)
322 return VEC_length (iv_use_p, data->iv_uses);
325 /* Ith use recorded in DATA. */
327 static inline struct iv_use *
328 iv_use (struct ivopts_data *data, unsigned i)
330 return VEC_index (iv_use_p, data->iv_uses, i);
333 /* Number of candidates recorded in DATA. */
335 static inline unsigned
336 n_iv_cands (struct ivopts_data *data)
338 return VEC_length (iv_cand_p, data->iv_candidates);
341 /* Ith candidate recorded in DATA. */
343 static inline struct iv_cand *
344 iv_cand (struct ivopts_data *data, unsigned i)
346 return VEC_index (iv_cand_p, data->iv_candidates, i);
349 /* The single loop exit if it dominates the latch, NULL otherwise. */
351 edge
352 single_dom_exit (struct loop *loop)
354 edge exit = single_exit (loop);
356 if (!exit)
357 return NULL;
359 if (!just_once_each_iteration_p (loop, exit->src))
360 return NULL;
362 return exit;
365 /* Dumps information about the induction variable IV to FILE. */
367 extern void dump_iv (FILE *, struct iv *);
368 void
369 dump_iv (FILE *file, struct iv *iv)
371 if (iv->ssa_name)
373 fprintf (file, "ssa name ");
374 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
375 fprintf (file, "\n");
378 fprintf (file, " type ");
379 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
380 fprintf (file, "\n");
382 if (iv->step)
384 fprintf (file, " base ");
385 print_generic_expr (file, iv->base, TDF_SLIM);
386 fprintf (file, "\n");
388 fprintf (file, " step ");
389 print_generic_expr (file, iv->step, TDF_SLIM);
390 fprintf (file, "\n");
392 else
394 fprintf (file, " invariant ");
395 print_generic_expr (file, iv->base, TDF_SLIM);
396 fprintf (file, "\n");
399 if (iv->base_object)
401 fprintf (file, " base object ");
402 print_generic_expr (file, iv->base_object, TDF_SLIM);
403 fprintf (file, "\n");
406 if (iv->biv_p)
407 fprintf (file, " is a biv\n");
410 /* Dumps information about the USE to FILE. */
412 extern void dump_use (FILE *, struct iv_use *);
413 void
414 dump_use (FILE *file, struct iv_use *use)
416 fprintf (file, "use %d\n", use->id);
418 switch (use->type)
420 case USE_NONLINEAR_EXPR:
421 fprintf (file, " generic\n");
422 break;
424 case USE_ADDRESS:
425 fprintf (file, " address\n");
426 break;
428 case USE_COMPARE:
429 fprintf (file, " compare\n");
430 break;
432 default:
433 gcc_unreachable ();
436 fprintf (file, " in statement ");
437 print_generic_expr (file, use->stmt, TDF_SLIM);
438 fprintf (file, "\n");
440 fprintf (file, " at position ");
441 if (use->op_p)
442 print_generic_expr (file, *use->op_p, TDF_SLIM);
443 fprintf (file, "\n");
445 dump_iv (file, use->iv);
447 if (use->related_cands)
449 fprintf (file, " related candidates ");
450 dump_bitmap (file, use->related_cands);
454 /* Dumps information about the uses to FILE. */
456 extern void dump_uses (FILE *, struct ivopts_data *);
457 void
458 dump_uses (FILE *file, struct ivopts_data *data)
460 unsigned i;
461 struct iv_use *use;
463 for (i = 0; i < n_iv_uses (data); i++)
465 use = iv_use (data, i);
467 dump_use (file, use);
468 fprintf (file, "\n");
472 /* Dumps information about induction variable candidate CAND to FILE. */
474 extern void dump_cand (FILE *, struct iv_cand *);
475 void
476 dump_cand (FILE *file, struct iv_cand *cand)
478 struct iv *iv = cand->iv;
480 fprintf (file, "candidate %d%s\n",
481 cand->id, cand->important ? " (important)" : "");
483 if (cand->depends_on)
485 fprintf (file, " depends on ");
486 dump_bitmap (file, cand->depends_on);
489 if (!iv)
491 fprintf (file, " final value replacement\n");
492 return;
495 switch (cand->pos)
497 case IP_NORMAL:
498 fprintf (file, " incremented before exit test\n");
499 break;
501 case IP_END:
502 fprintf (file, " incremented at end\n");
503 break;
505 case IP_ORIGINAL:
506 fprintf (file, " original biv\n");
507 break;
510 dump_iv (file, iv);
513 /* Returns the info for ssa version VER. */
515 static inline struct version_info *
516 ver_info (struct ivopts_data *data, unsigned ver)
518 return data->version_info + ver;
521 /* Returns the info for ssa name NAME. */
523 static inline struct version_info *
524 name_info (struct ivopts_data *data, tree name)
526 return ver_info (data, SSA_NAME_VERSION (name));
529 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
530 emitted in LOOP. */
532 static bool
533 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
535 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
537 gcc_assert (bb);
539 if (sbb == loop->latch)
540 return true;
542 if (sbb != bb)
543 return false;
545 return stmt == last_stmt (bb);
548 /* Returns true if STMT if after the place where the original induction
549 variable CAND is incremented. */
551 static bool
552 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
554 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
555 basic_block stmt_bb = bb_for_stmt (stmt);
556 block_stmt_iterator bsi;
558 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
559 return false;
561 if (stmt_bb != cand_bb)
562 return true;
564 /* Scan the block from the end, since the original ivs are usually
565 incremented at the end of the loop body. */
566 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
568 if (bsi_stmt (bsi) == cand->incremented_at)
569 return false;
570 if (bsi_stmt (bsi) == stmt)
571 return true;
575 /* Returns true if STMT if after the place where the induction variable
576 CAND is incremented in LOOP. */
578 static bool
579 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
581 switch (cand->pos)
583 case IP_END:
584 return false;
586 case IP_NORMAL:
587 return stmt_after_ip_normal_pos (loop, stmt);
589 case IP_ORIGINAL:
590 return stmt_after_ip_original_pos (cand, stmt);
592 default:
593 gcc_unreachable ();
597 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
599 static bool
600 abnormal_ssa_name_p (tree exp)
602 if (!exp)
603 return false;
605 if (TREE_CODE (exp) != SSA_NAME)
606 return false;
608 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
611 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
612 abnormal phi node. Callback for for_each_index. */
614 static bool
615 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
616 void *data ATTRIBUTE_UNUSED)
618 if (TREE_CODE (base) == ARRAY_REF)
620 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
621 return false;
622 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
623 return false;
626 return !abnormal_ssa_name_p (*index);
629 /* Returns true if EXPR contains a ssa name that occurs in an
630 abnormal phi node. */
632 bool
633 contains_abnormal_ssa_name_p (tree expr)
635 enum tree_code code;
636 enum tree_code_class codeclass;
638 if (!expr)
639 return false;
641 code = TREE_CODE (expr);
642 codeclass = TREE_CODE_CLASS (code);
644 if (code == SSA_NAME)
645 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
647 if (code == INTEGER_CST
648 || is_gimple_min_invariant (expr))
649 return false;
651 if (code == ADDR_EXPR)
652 return !for_each_index (&TREE_OPERAND (expr, 0),
653 idx_contains_abnormal_ssa_name_p,
654 NULL);
656 switch (codeclass)
658 case tcc_binary:
659 case tcc_comparison:
660 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
661 return true;
663 /* Fallthru. */
664 case tcc_unary:
665 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
666 return true;
668 break;
670 default:
671 gcc_unreachable ();
674 return false;
677 /* Returns tree describing number of iterations determined from
678 EXIT of DATA->current_loop, or NULL if something goes wrong. */
680 static tree
681 niter_for_exit (struct ivopts_data *data, edge exit)
683 struct tree_niter_desc desc;
684 tree niter;
685 void **slot;
687 if (!data->niters)
689 data->niters = pointer_map_create ();
690 slot = NULL;
692 else
693 slot = pointer_map_contains (data->niters, exit);
695 if (!slot)
697 /* Try to determine number of iterations. We must know it
698 unconditionally (i.e., without possibility of # of iterations
699 being zero). Also, we cannot safely work with ssa names that
700 appear in phi nodes on abnormal edges, so that we do not create
701 overlapping life ranges for them (PR 27283). */
702 if (number_of_iterations_exit (data->current_loop,
703 exit, &desc, true)
704 && integer_zerop (desc.may_be_zero)
705 && !contains_abnormal_ssa_name_p (desc.niter))
706 niter = desc.niter;
707 else
708 niter = NULL_TREE;
710 *pointer_map_insert (data->niters, exit) = niter;
712 else
713 niter = (tree) *slot;
715 return niter;
718 /* Returns tree describing number of iterations determined from
719 single dominating exit of DATA->current_loop, or NULL if something
720 goes wrong. */
722 static tree
723 niter_for_single_dom_exit (struct ivopts_data *data)
725 edge exit = single_dom_exit (data->current_loop);
727 if (!exit)
728 return NULL;
730 return niter_for_exit (data, exit);
733 /* Initializes data structures used by the iv optimization pass, stored
734 in DATA. */
736 static void
737 tree_ssa_iv_optimize_init (struct ivopts_data *data)
739 data->version_info_size = 2 * num_ssa_names;
740 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
741 data->relevant = BITMAP_ALLOC (NULL);
742 data->important_candidates = BITMAP_ALLOC (NULL);
743 data->max_inv_id = 0;
744 data->niters = NULL;
745 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
746 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
747 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
750 /* Returns a memory object to that EXPR points. In case we are able to
751 determine that it does not point to any such object, NULL is returned. */
753 static tree
754 determine_base_object (tree expr)
756 enum tree_code code = TREE_CODE (expr);
757 tree base, obj;
759 /* If this is a pointer casted to any type, we need to determine
760 the base object for the pointer; so handle conversions before
761 throwing away non-pointer expressions. */
762 if (TREE_CODE (expr) == NOP_EXPR
763 || TREE_CODE (expr) == CONVERT_EXPR)
764 return determine_base_object (TREE_OPERAND (expr, 0));
766 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
767 return NULL_TREE;
769 switch (code)
771 case INTEGER_CST:
772 return NULL_TREE;
774 case ADDR_EXPR:
775 obj = TREE_OPERAND (expr, 0);
776 base = get_base_address (obj);
778 if (!base)
779 return expr;
781 if (TREE_CODE (base) == INDIRECT_REF)
782 return determine_base_object (TREE_OPERAND (base, 0));
784 return fold_convert (ptr_type_node,
785 build_fold_addr_expr (base));
787 case POINTER_PLUS_EXPR:
788 return determine_base_object (TREE_OPERAND (expr, 0));
790 case PLUS_EXPR:
791 case MINUS_EXPR:
792 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
793 gcc_unreachable ();
795 default:
796 return fold_convert (ptr_type_node, expr);
800 /* Allocates an induction variable with given initial value BASE and step STEP
801 for loop LOOP. */
803 static struct iv *
804 alloc_iv (tree base, tree step)
806 struct iv *iv = XCNEW (struct iv);
807 gcc_assert (step != NULL_TREE);
809 iv->base = base;
810 iv->base_object = determine_base_object (base);
811 iv->step = step;
812 iv->biv_p = false;
813 iv->have_use_for = false;
814 iv->use_id = 0;
815 iv->ssa_name = NULL_TREE;
817 return iv;
820 /* Sets STEP and BASE for induction variable IV. */
822 static void
823 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
825 struct version_info *info = name_info (data, iv);
827 gcc_assert (!info->iv);
829 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
830 info->iv = alloc_iv (base, step);
831 info->iv->ssa_name = iv;
834 /* Finds induction variable declaration for VAR. */
836 static struct iv *
837 get_iv (struct ivopts_data *data, tree var)
839 basic_block bb;
840 tree type = TREE_TYPE (var);
842 if (!POINTER_TYPE_P (type)
843 && !INTEGRAL_TYPE_P (type))
844 return NULL;
846 if (!name_info (data, var)->iv)
848 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
850 if (!bb
851 || !flow_bb_inside_loop_p (data->current_loop, bb))
852 set_iv (data, var, var, build_int_cst (type, 0));
855 return name_info (data, var)->iv;
858 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
859 not define a simple affine biv with nonzero step. */
861 static tree
862 determine_biv_step (tree phi)
864 struct loop *loop = bb_for_stmt (phi)->loop_father;
865 tree name = PHI_RESULT (phi);
866 affine_iv iv;
868 if (!is_gimple_reg (name))
869 return NULL_TREE;
871 if (!simple_iv (loop, phi, name, &iv, true))
872 return NULL_TREE;
874 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
877 /* Finds basic ivs. */
879 static bool
880 find_bivs (struct ivopts_data *data)
882 tree phi, step, type, base;
883 bool found = false;
884 struct loop *loop = data->current_loop;
886 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
888 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
889 continue;
891 step = determine_biv_step (phi);
892 if (!step)
893 continue;
895 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
896 base = expand_simple_operations (base);
897 if (contains_abnormal_ssa_name_p (base)
898 || contains_abnormal_ssa_name_p (step))
899 continue;
901 type = TREE_TYPE (PHI_RESULT (phi));
902 base = fold_convert (type, base);
903 if (step)
904 step = fold_convert (type, step);
906 set_iv (data, PHI_RESULT (phi), base, step);
907 found = true;
910 return found;
913 /* Marks basic ivs. */
915 static void
916 mark_bivs (struct ivopts_data *data)
918 tree phi, var;
919 struct iv *iv, *incr_iv;
920 struct loop *loop = data->current_loop;
921 basic_block incr_bb;
923 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
925 iv = get_iv (data, PHI_RESULT (phi));
926 if (!iv)
927 continue;
929 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
930 incr_iv = get_iv (data, var);
931 if (!incr_iv)
932 continue;
934 /* If the increment is in the subloop, ignore it. */
935 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
936 if (incr_bb->loop_father != data->current_loop
937 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
938 continue;
940 iv->biv_p = true;
941 incr_iv->biv_p = true;
945 /* Checks whether STMT defines a linear induction variable and stores its
946 parameters to IV. */
948 static bool
949 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
951 tree lhs;
952 struct loop *loop = data->current_loop;
954 iv->base = NULL_TREE;
955 iv->step = NULL_TREE;
957 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
958 return false;
960 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
961 if (TREE_CODE (lhs) != SSA_NAME)
962 return false;
964 if (!simple_iv (loop, stmt, GIMPLE_STMT_OPERAND (stmt, 1), iv, true))
965 return false;
966 iv->base = expand_simple_operations (iv->base);
968 if (contains_abnormal_ssa_name_p (iv->base)
969 || contains_abnormal_ssa_name_p (iv->step))
970 return false;
972 return true;
975 /* Finds general ivs in statement STMT. */
977 static void
978 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
980 affine_iv iv;
982 if (!find_givs_in_stmt_scev (data, stmt, &iv))
983 return;
985 set_iv (data, GIMPLE_STMT_OPERAND (stmt, 0), iv.base, iv.step);
988 /* Finds general ivs in basic block BB. */
990 static void
991 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
993 block_stmt_iterator bsi;
995 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
996 find_givs_in_stmt (data, bsi_stmt (bsi));
999 /* Finds general ivs. */
1001 static void
1002 find_givs (struct ivopts_data *data)
1004 struct loop *loop = data->current_loop;
1005 basic_block *body = get_loop_body_in_dom_order (loop);
1006 unsigned i;
1008 for (i = 0; i < loop->num_nodes; i++)
1009 find_givs_in_bb (data, body[i]);
1010 free (body);
1013 /* For each ssa name defined in LOOP determines whether it is an induction
1014 variable and if so, its initial value and step. */
1016 static bool
1017 find_induction_variables (struct ivopts_data *data)
1019 unsigned i;
1020 bitmap_iterator bi;
1022 if (!find_bivs (data))
1023 return false;
1025 find_givs (data);
1026 mark_bivs (data);
1028 if (dump_file && (dump_flags & TDF_DETAILS))
1030 tree niter = niter_for_single_dom_exit (data);
1032 if (niter)
1034 fprintf (dump_file, " number of iterations ");
1035 print_generic_expr (dump_file, niter, TDF_SLIM);
1036 fprintf (dump_file, "\n\n");
1039 fprintf (dump_file, "Induction variables:\n\n");
1041 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1043 if (ver_info (data, i)->iv)
1044 dump_iv (dump_file, ver_info (data, i)->iv);
1048 return true;
1051 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1053 static struct iv_use *
1054 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1055 tree stmt, enum use_type use_type)
1057 struct iv_use *use = XCNEW (struct iv_use);
1059 use->id = n_iv_uses (data);
1060 use->type = use_type;
1061 use->iv = iv;
1062 use->stmt = stmt;
1063 use->op_p = use_p;
1064 use->related_cands = BITMAP_ALLOC (NULL);
1066 /* To avoid showing ssa name in the dumps, if it was not reset by the
1067 caller. */
1068 iv->ssa_name = NULL_TREE;
1070 if (dump_file && (dump_flags & TDF_DETAILS))
1071 dump_use (dump_file, use);
1073 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1075 return use;
1078 /* Checks whether OP is a loop-level invariant and if so, records it.
1079 NONLINEAR_USE is true if the invariant is used in a way we do not
1080 handle specially. */
1082 static void
1083 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1085 basic_block bb;
1086 struct version_info *info;
1088 if (TREE_CODE (op) != SSA_NAME
1089 || !is_gimple_reg (op))
1090 return;
1092 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1093 if (bb
1094 && flow_bb_inside_loop_p (data->current_loop, bb))
1095 return;
1097 info = name_info (data, op);
1098 info->name = op;
1099 info->has_nonlin_use |= nonlinear_use;
1100 if (!info->inv_id)
1101 info->inv_id = ++data->max_inv_id;
1102 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1105 /* Checks whether the use OP is interesting and if so, records it. */
1107 static struct iv_use *
1108 find_interesting_uses_op (struct ivopts_data *data, tree op)
1110 struct iv *iv;
1111 struct iv *civ;
1112 tree stmt;
1113 struct iv_use *use;
1115 if (TREE_CODE (op) != SSA_NAME)
1116 return NULL;
1118 iv = get_iv (data, op);
1119 if (!iv)
1120 return NULL;
1122 if (iv->have_use_for)
1124 use = iv_use (data, iv->use_id);
1126 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1127 return use;
1130 if (integer_zerop (iv->step))
1132 record_invariant (data, op, true);
1133 return NULL;
1135 iv->have_use_for = true;
1137 civ = XNEW (struct iv);
1138 *civ = *iv;
1140 stmt = SSA_NAME_DEF_STMT (op);
1141 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1142 || TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
1144 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1145 iv->use_id = use->id;
1147 return use;
1150 /* Given a condition *COND_P, checks whether it is a compare of an induction
1151 variable and an invariant. If this is the case, CONTROL_VAR is set
1152 to location of the iv, BOUND to the location of the invariant,
1153 IV_VAR and IV_BOUND are set to the corresponding induction variable
1154 descriptions, and true is returned. If this is not the case,
1155 CONTROL_VAR and BOUND are set to the arguments of the condition and
1156 false is returned. */
1158 static bool
1159 extract_cond_operands (struct ivopts_data *data, tree *cond_p,
1160 tree **control_var, tree **bound,
1161 struct iv **iv_var, struct iv **iv_bound)
1163 /* The nodes returned when COND has just one operand. Note that you should
1164 not modify anything in BOUND or IV_BOUND because of this. */
1165 static struct iv const_iv;
1166 static tree zero;
1167 tree cond = *cond_p;
1168 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1169 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1170 bool ret = false;
1172 zero = integer_zero_node;
1173 const_iv.step = integer_zero_node;
1175 if (TREE_CODE (cond) == SSA_NAME)
1177 op0 = cond_p;
1178 iv0 = get_iv (data, cond);
1179 ret = (iv0 && !integer_zerop (iv0->step));
1180 goto end;
1183 if (!COMPARISON_CLASS_P (cond))
1185 op0 = cond_p;
1186 goto end;
1189 op0 = &TREE_OPERAND (cond, 0);
1190 op1 = &TREE_OPERAND (cond, 1);
1191 if (TREE_CODE (*op0) == SSA_NAME)
1192 iv0 = get_iv (data, *op0);
1193 if (TREE_CODE (*op1) == SSA_NAME)
1194 iv1 = get_iv (data, *op1);
1196 /* Exactly one of the compared values must be an iv, and the other one must
1197 be an invariant. */
1198 if (!iv0 || !iv1)
1199 goto end;
1201 if (integer_zerop (iv0->step))
1203 /* Control variable may be on the other side. */
1204 tmp_op = op0; op0 = op1; op1 = tmp_op;
1205 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1207 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1209 end:
1210 if (control_var)
1211 *control_var = op0;;
1212 if (iv_var)
1213 *iv_var = iv0;;
1214 if (bound)
1215 *bound = op1;
1216 if (iv_bound)
1217 *iv_bound = iv1;
1219 return ret;
1222 /* Checks whether the condition *COND_P in STMT is interesting
1223 and if so, records it. */
1225 static void
1226 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1228 tree *var_p, *bound_p;
1229 struct iv *var_iv, *civ;
1231 if (!extract_cond_operands (data, cond_p, &var_p, &bound_p, &var_iv, NULL))
1233 find_interesting_uses_op (data, *var_p);
1234 find_interesting_uses_op (data, *bound_p);
1235 return;
1238 civ = XNEW (struct iv);
1239 *civ = *var_iv;
1240 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1243 /* Returns true if expression EXPR is obviously invariant in LOOP,
1244 i.e. if all its operands are defined outside of the LOOP. */
1246 bool
1247 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1249 basic_block def_bb;
1250 unsigned i, len;
1252 if (is_gimple_min_invariant (expr))
1253 return true;
1255 if (TREE_CODE (expr) == SSA_NAME)
1257 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1258 if (def_bb
1259 && flow_bb_inside_loop_p (loop, def_bb))
1260 return false;
1262 return true;
1265 if (!EXPR_P (expr) && !GIMPLE_STMT_P (expr))
1266 return false;
1268 len = TREE_OPERAND_LENGTH (expr);
1269 for (i = 0; i < len; i++)
1270 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1271 return false;
1273 return true;
1276 /* Cumulates the steps of indices into DATA and replaces their values with the
1277 initial ones. Returns false when the value of the index cannot be determined.
1278 Callback for for_each_index. */
1280 struct ifs_ivopts_data
1282 struct ivopts_data *ivopts_data;
1283 tree stmt;
1284 tree step;
1287 static bool
1288 idx_find_step (tree base, tree *idx, void *data)
1290 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1291 struct iv *iv;
1292 tree step, iv_base, iv_step, lbound, off;
1293 struct loop *loop = dta->ivopts_data->current_loop;
1295 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1296 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1297 return false;
1299 /* If base is a component ref, require that the offset of the reference
1300 be invariant. */
1301 if (TREE_CODE (base) == COMPONENT_REF)
1303 off = component_ref_field_offset (base);
1304 return expr_invariant_in_loop_p (loop, off);
1307 /* If base is array, first check whether we will be able to move the
1308 reference out of the loop (in order to take its address in strength
1309 reduction). In order for this to work we need both lower bound
1310 and step to be loop invariants. */
1311 if (TREE_CODE (base) == ARRAY_REF)
1313 step = array_ref_element_size (base);
1314 lbound = array_ref_low_bound (base);
1316 if (!expr_invariant_in_loop_p (loop, step)
1317 || !expr_invariant_in_loop_p (loop, lbound))
1318 return false;
1321 if (TREE_CODE (*idx) != SSA_NAME)
1322 return true;
1324 iv = get_iv (dta->ivopts_data, *idx);
1325 if (!iv)
1326 return false;
1328 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1329 *&x[0], which is not folded and does not trigger the
1330 ARRAY_REF path below. */
1331 *idx = iv->base;
1333 if (integer_zerop (iv->step))
1334 return true;
1336 if (TREE_CODE (base) == ARRAY_REF)
1338 step = array_ref_element_size (base);
1340 /* We only handle addresses whose step is an integer constant. */
1341 if (TREE_CODE (step) != INTEGER_CST)
1342 return false;
1344 else
1345 /* The step for pointer arithmetics already is 1 byte. */
1346 step = build_int_cst (sizetype, 1);
1348 iv_base = iv->base;
1349 iv_step = iv->step;
1350 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1351 sizetype, &iv_base, &iv_step, dta->stmt,
1352 false))
1354 /* The index might wrap. */
1355 return false;
1358 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1359 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1361 return true;
1364 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1365 object is passed to it in DATA. */
1367 static bool
1368 idx_record_use (tree base, tree *idx,
1369 void *vdata)
1371 struct ivopts_data *data = (struct ivopts_data *) vdata;
1372 find_interesting_uses_op (data, *idx);
1373 if (TREE_CODE (base) == ARRAY_REF)
1375 find_interesting_uses_op (data, array_ref_element_size (base));
1376 find_interesting_uses_op (data, array_ref_low_bound (base));
1378 return true;
1381 /* Returns true if memory reference REF may be unaligned. */
1383 static bool
1384 may_be_unaligned_p (tree ref)
1386 tree base;
1387 tree base_type;
1388 HOST_WIDE_INT bitsize;
1389 HOST_WIDE_INT bitpos;
1390 tree toffset;
1391 enum machine_mode mode;
1392 int unsignedp, volatilep;
1393 unsigned base_align;
1395 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1396 thus they are not misaligned. */
1397 if (TREE_CODE (ref) == TARGET_MEM_REF)
1398 return false;
1400 /* The test below is basically copy of what expr.c:normal_inner_ref
1401 does to check whether the object must be loaded by parts when
1402 STRICT_ALIGNMENT is true. */
1403 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1404 &unsignedp, &volatilep, true);
1405 base_type = TREE_TYPE (base);
1406 base_align = TYPE_ALIGN (base_type);
1408 if (mode != BLKmode
1409 && (base_align < GET_MODE_ALIGNMENT (mode)
1410 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1411 || bitpos % BITS_PER_UNIT != 0))
1412 return true;
1414 return false;
1417 /* Return true if EXPR may be non-addressable. */
1419 static bool
1420 may_be_nonaddressable_p (tree expr)
1422 switch (TREE_CODE (expr))
1424 case COMPONENT_REF:
1425 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1426 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1428 case ARRAY_REF:
1429 case ARRAY_RANGE_REF:
1430 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1432 case VIEW_CONVERT_EXPR:
1433 /* This kind of view-conversions may wrap non-addressable objects
1434 and make them look addressable. After some processing the
1435 non-addressability may be uncovered again, causing ADDR_EXPRs
1436 of inappropriate objects to be built. */
1437 return AGGREGATE_TYPE_P (TREE_TYPE (expr))
1438 && !AGGREGATE_TYPE_P (TREE_TYPE (TREE_OPERAND (expr, 0)));
1440 default:
1441 break;
1444 return false;
1447 /* Finds addresses in *OP_P inside STMT. */
1449 static void
1450 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1452 tree base = *op_p, step = build_int_cst (sizetype, 0);
1453 struct iv *civ;
1454 struct ifs_ivopts_data ifs_ivopts_data;
1456 /* Do not play with volatile memory references. A bit too conservative,
1457 perhaps, but safe. */
1458 if (stmt_ann (stmt)->has_volatile_ops)
1459 goto fail;
1461 /* Ignore bitfields for now. Not really something terribly complicated
1462 to handle. TODO. */
1463 if (TREE_CODE (base) == BIT_FIELD_REF)
1464 goto fail;
1466 if (may_be_nonaddressable_p (base))
1467 goto fail;
1469 if (STRICT_ALIGNMENT
1470 && may_be_unaligned_p (base))
1471 goto fail;
1473 base = unshare_expr (base);
1475 if (TREE_CODE (base) == TARGET_MEM_REF)
1477 tree type = build_pointer_type (TREE_TYPE (base));
1478 tree astep;
1480 if (TMR_BASE (base)
1481 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1483 civ = get_iv (data, TMR_BASE (base));
1484 if (!civ)
1485 goto fail;
1487 TMR_BASE (base) = civ->base;
1488 step = civ->step;
1490 if (TMR_INDEX (base)
1491 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1493 civ = get_iv (data, TMR_INDEX (base));
1494 if (!civ)
1495 goto fail;
1497 TMR_INDEX (base) = civ->base;
1498 astep = civ->step;
1500 if (astep)
1502 if (TMR_STEP (base))
1503 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1505 step = fold_build2 (PLUS_EXPR, type, step, astep);
1509 if (integer_zerop (step))
1510 goto fail;
1511 base = tree_mem_ref_addr (type, base);
1513 else
1515 ifs_ivopts_data.ivopts_data = data;
1516 ifs_ivopts_data.stmt = stmt;
1517 ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1518 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1519 || integer_zerop (ifs_ivopts_data.step))
1520 goto fail;
1521 step = ifs_ivopts_data.step;
1523 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1524 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1526 base = build_fold_addr_expr (base);
1528 /* Substituting bases of IVs into the base expression might
1529 have caused folding opportunities. */
1530 if (TREE_CODE (base) == ADDR_EXPR)
1532 tree *ref = &TREE_OPERAND (base, 0);
1533 while (handled_component_p (*ref))
1534 ref = &TREE_OPERAND (*ref, 0);
1535 if (TREE_CODE (*ref) == INDIRECT_REF)
1536 *ref = fold_indirect_ref (*ref);
1540 civ = alloc_iv (base, step);
1541 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1542 return;
1544 fail:
1545 for_each_index (op_p, idx_record_use, data);
1548 /* Finds and records invariants used in STMT. */
1550 static void
1551 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1553 ssa_op_iter iter;
1554 use_operand_p use_p;
1555 tree op;
1557 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1559 op = USE_FROM_PTR (use_p);
1560 record_invariant (data, op, false);
1564 /* Finds interesting uses of induction variables in the statement STMT. */
1566 static void
1567 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1569 struct iv *iv;
1570 tree op, lhs, rhs;
1571 ssa_op_iter iter;
1572 use_operand_p use_p;
1574 find_invariants_stmt (data, stmt);
1576 if (TREE_CODE (stmt) == COND_EXPR)
1578 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1579 return;
1582 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
1584 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
1585 rhs = GIMPLE_STMT_OPERAND (stmt, 1);
1587 if (TREE_CODE (lhs) == SSA_NAME)
1589 /* If the statement defines an induction variable, the uses are not
1590 interesting by themselves. */
1592 iv = get_iv (data, lhs);
1594 if (iv && !integer_zerop (iv->step))
1595 return;
1598 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1600 case tcc_comparison:
1601 find_interesting_uses_cond (data, stmt,
1602 &GIMPLE_STMT_OPERAND (stmt, 1));
1603 return;
1605 case tcc_reference:
1606 find_interesting_uses_address (data, stmt,
1607 &GIMPLE_STMT_OPERAND (stmt, 1));
1608 if (REFERENCE_CLASS_P (lhs))
1609 find_interesting_uses_address (data, stmt,
1610 &GIMPLE_STMT_OPERAND (stmt, 0));
1611 return;
1613 default: ;
1616 if (REFERENCE_CLASS_P (lhs)
1617 && is_gimple_val (rhs))
1619 find_interesting_uses_address (data, stmt,
1620 &GIMPLE_STMT_OPERAND (stmt, 0));
1621 find_interesting_uses_op (data, rhs);
1622 return;
1625 /* TODO -- we should also handle address uses of type
1627 memory = call (whatever);
1631 call (memory). */
1634 if (TREE_CODE (stmt) == PHI_NODE
1635 && bb_for_stmt (stmt) == data->current_loop->header)
1637 lhs = PHI_RESULT (stmt);
1638 iv = get_iv (data, lhs);
1640 if (iv && !integer_zerop (iv->step))
1641 return;
1644 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1646 op = USE_FROM_PTR (use_p);
1648 if (TREE_CODE (op) != SSA_NAME)
1649 continue;
1651 iv = get_iv (data, op);
1652 if (!iv)
1653 continue;
1655 find_interesting_uses_op (data, op);
1659 /* Finds interesting uses of induction variables outside of loops
1660 on loop exit edge EXIT. */
1662 static void
1663 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1665 tree phi, def;
1667 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1669 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1670 if (is_gimple_reg (def))
1671 find_interesting_uses_op (data, def);
1675 /* Finds uses of the induction variables that are interesting. */
1677 static void
1678 find_interesting_uses (struct ivopts_data *data)
1680 basic_block bb;
1681 block_stmt_iterator bsi;
1682 tree phi;
1683 basic_block *body = get_loop_body (data->current_loop);
1684 unsigned i;
1685 struct version_info *info;
1686 edge e;
1688 if (dump_file && (dump_flags & TDF_DETAILS))
1689 fprintf (dump_file, "Uses:\n\n");
1691 for (i = 0; i < data->current_loop->num_nodes; i++)
1693 edge_iterator ei;
1694 bb = body[i];
1696 FOR_EACH_EDGE (e, ei, bb->succs)
1697 if (e->dest != EXIT_BLOCK_PTR
1698 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1699 find_interesting_uses_outside (data, e);
1701 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1702 find_interesting_uses_stmt (data, phi);
1703 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1704 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1707 if (dump_file && (dump_flags & TDF_DETAILS))
1709 bitmap_iterator bi;
1711 fprintf (dump_file, "\n");
1713 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1715 info = ver_info (data, i);
1716 if (info->inv_id)
1718 fprintf (dump_file, " ");
1719 print_generic_expr (dump_file, info->name, TDF_SLIM);
1720 fprintf (dump_file, " is invariant (%d)%s\n",
1721 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1725 fprintf (dump_file, "\n");
1728 free (body);
1731 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1732 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1733 we are at the top-level of the processed address. */
1735 static tree
1736 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1737 unsigned HOST_WIDE_INT *offset)
1739 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1740 enum tree_code code;
1741 tree type, orig_type = TREE_TYPE (expr);
1742 unsigned HOST_WIDE_INT off0, off1, st;
1743 tree orig_expr = expr;
1745 STRIP_NOPS (expr);
1747 type = TREE_TYPE (expr);
1748 code = TREE_CODE (expr);
1749 *offset = 0;
1751 switch (code)
1753 case INTEGER_CST:
1754 if (!cst_and_fits_in_hwi (expr)
1755 || integer_zerop (expr))
1756 return orig_expr;
1758 *offset = int_cst_value (expr);
1759 return build_int_cst (orig_type, 0);
1761 case POINTER_PLUS_EXPR:
1762 case PLUS_EXPR:
1763 case MINUS_EXPR:
1764 op0 = TREE_OPERAND (expr, 0);
1765 op1 = TREE_OPERAND (expr, 1);
1767 op0 = strip_offset_1 (op0, false, false, &off0);
1768 op1 = strip_offset_1 (op1, false, false, &off1);
1770 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
1771 if (op0 == TREE_OPERAND (expr, 0)
1772 && op1 == TREE_OPERAND (expr, 1))
1773 return orig_expr;
1775 if (integer_zerop (op1))
1776 expr = op0;
1777 else if (integer_zerop (op0))
1779 if (code == MINUS_EXPR)
1780 expr = fold_build1 (NEGATE_EXPR, type, op1);
1781 else
1782 expr = op1;
1784 else
1785 expr = fold_build2 (code, type, op0, op1);
1787 return fold_convert (orig_type, expr);
1789 case ARRAY_REF:
1790 if (!inside_addr)
1791 return orig_expr;
1793 step = array_ref_element_size (expr);
1794 if (!cst_and_fits_in_hwi (step))
1795 break;
1797 st = int_cst_value (step);
1798 op1 = TREE_OPERAND (expr, 1);
1799 op1 = strip_offset_1 (op1, false, false, &off1);
1800 *offset = off1 * st;
1802 if (top_compref
1803 && integer_zerop (op1))
1805 /* Strip the component reference completely. */
1806 op0 = TREE_OPERAND (expr, 0);
1807 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1808 *offset += off0;
1809 return op0;
1811 break;
1813 case COMPONENT_REF:
1814 if (!inside_addr)
1815 return orig_expr;
1817 tmp = component_ref_field_offset (expr);
1818 if (top_compref
1819 && cst_and_fits_in_hwi (tmp))
1821 /* Strip the component reference completely. */
1822 op0 = TREE_OPERAND (expr, 0);
1823 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1824 *offset = off0 + int_cst_value (tmp);
1825 return op0;
1827 break;
1829 case ADDR_EXPR:
1830 op0 = TREE_OPERAND (expr, 0);
1831 op0 = strip_offset_1 (op0, true, true, &off0);
1832 *offset += off0;
1834 if (op0 == TREE_OPERAND (expr, 0))
1835 return orig_expr;
1837 expr = build_fold_addr_expr (op0);
1838 return fold_convert (orig_type, expr);
1840 case INDIRECT_REF:
1841 inside_addr = false;
1842 break;
1844 default:
1845 return orig_expr;
1848 /* Default handling of expressions for that we want to recurse into
1849 the first operand. */
1850 op0 = TREE_OPERAND (expr, 0);
1851 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1852 *offset += off0;
1854 if (op0 == TREE_OPERAND (expr, 0)
1855 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1856 return orig_expr;
1858 expr = copy_node (expr);
1859 TREE_OPERAND (expr, 0) = op0;
1860 if (op1)
1861 TREE_OPERAND (expr, 1) = op1;
1863 /* Inside address, we might strip the top level component references,
1864 thus changing type of the expression. Handling of ADDR_EXPR
1865 will fix that. */
1866 expr = fold_convert (orig_type, expr);
1868 return expr;
1871 /* Strips constant offsets from EXPR and stores them to OFFSET. */
1873 static tree
1874 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1876 return strip_offset_1 (expr, false, false, offset);
1879 /* Returns variant of TYPE that can be used as base for different uses.
1880 We return unsigned type with the same precision, which avoids problems
1881 with overflows. */
1883 static tree
1884 generic_type_for (tree type)
1886 if (POINTER_TYPE_P (type))
1887 return unsigned_type_for (type);
1889 if (TYPE_UNSIGNED (type))
1890 return type;
1892 return unsigned_type_for (type);
1895 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
1896 the bitmap to that we should store it. */
1898 static struct ivopts_data *fd_ivopts_data;
1899 static tree
1900 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1902 bitmap *depends_on = (bitmap *) data;
1903 struct version_info *info;
1905 if (TREE_CODE (*expr_p) != SSA_NAME)
1906 return NULL_TREE;
1907 info = name_info (fd_ivopts_data, *expr_p);
1909 if (!info->inv_id || info->has_nonlin_use)
1910 return NULL_TREE;
1912 if (!*depends_on)
1913 *depends_on = BITMAP_ALLOC (NULL);
1914 bitmap_set_bit (*depends_on, info->inv_id);
1916 return NULL_TREE;
1919 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1920 position to POS. If USE is not NULL, the candidate is set as related to
1921 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1922 replacement of the final value of the iv by a direct computation. */
1924 static struct iv_cand *
1925 add_candidate_1 (struct ivopts_data *data,
1926 tree base, tree step, bool important, enum iv_position pos,
1927 struct iv_use *use, tree incremented_at)
1929 unsigned i;
1930 struct iv_cand *cand = NULL;
1931 tree type, orig_type;
1933 if (base)
1935 orig_type = TREE_TYPE (base);
1936 type = generic_type_for (orig_type);
1937 if (type != orig_type)
1939 base = fold_convert (type, base);
1940 step = fold_convert (type, step);
1944 for (i = 0; i < n_iv_cands (data); i++)
1946 cand = iv_cand (data, i);
1948 if (cand->pos != pos)
1949 continue;
1951 if (cand->incremented_at != incremented_at)
1952 continue;
1954 if (!cand->iv)
1956 if (!base && !step)
1957 break;
1959 continue;
1962 if (!base && !step)
1963 continue;
1965 if (operand_equal_p (base, cand->iv->base, 0)
1966 && operand_equal_p (step, cand->iv->step, 0))
1967 break;
1970 if (i == n_iv_cands (data))
1972 cand = XCNEW (struct iv_cand);
1973 cand->id = i;
1975 if (!base && !step)
1976 cand->iv = NULL;
1977 else
1978 cand->iv = alloc_iv (base, step);
1980 cand->pos = pos;
1981 if (pos != IP_ORIGINAL && cand->iv)
1983 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
1984 cand->var_after = cand->var_before;
1986 cand->important = important;
1987 cand->incremented_at = incremented_at;
1988 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
1990 if (step
1991 && TREE_CODE (step) != INTEGER_CST)
1993 fd_ivopts_data = data;
1994 walk_tree (&step, find_depends, &cand->depends_on, NULL);
1997 if (dump_file && (dump_flags & TDF_DETAILS))
1998 dump_cand (dump_file, cand);
2001 if (important && !cand->important)
2003 cand->important = true;
2004 if (dump_file && (dump_flags & TDF_DETAILS))
2005 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2008 if (use)
2010 bitmap_set_bit (use->related_cands, i);
2011 if (dump_file && (dump_flags & TDF_DETAILS))
2012 fprintf (dump_file, "Candidate %d is related to use %d\n",
2013 cand->id, use->id);
2016 return cand;
2019 /* Returns true if incrementing the induction variable at the end of the LOOP
2020 is allowed.
2022 The purpose is to avoid splitting latch edge with a biv increment, thus
2023 creating a jump, possibly confusing other optimization passes and leaving
2024 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2025 is not available (so we do not have a better alternative), or if the latch
2026 edge is already nonempty. */
2028 static bool
2029 allow_ip_end_pos_p (struct loop *loop)
2031 if (!ip_normal_pos (loop))
2032 return true;
2034 if (!empty_block_p (ip_end_pos (loop)))
2035 return true;
2037 return false;
2040 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2041 position to POS. If USE is not NULL, the candidate is set as related to
2042 it. The candidate computation is scheduled on all available positions. */
2044 static void
2045 add_candidate (struct ivopts_data *data,
2046 tree base, tree step, bool important, struct iv_use *use)
2048 if (ip_normal_pos (data->current_loop))
2049 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2050 if (ip_end_pos (data->current_loop)
2051 && allow_ip_end_pos_p (data->current_loop))
2052 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2055 /* Add a standard "0 + 1 * iteration" iv candidate for a
2056 type with SIZE bits. */
2058 static void
2059 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2060 unsigned int size)
2062 tree type = lang_hooks.types.type_for_size (size, true);
2063 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2064 true, NULL);
2067 /* Adds standard iv candidates. */
2069 static void
2070 add_standard_iv_candidates (struct ivopts_data *data)
2072 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2074 /* The same for a double-integer type if it is still fast enough. */
2075 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2076 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2080 /* Adds candidates bases on the old induction variable IV. */
2082 static void
2083 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2085 tree phi, def;
2086 struct iv_cand *cand;
2088 add_candidate (data, iv->base, iv->step, true, NULL);
2090 /* The same, but with initial value zero. */
2091 add_candidate (data,
2092 build_int_cst (TREE_TYPE (iv->base), 0),
2093 iv->step, true, NULL);
2095 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2096 if (TREE_CODE (phi) == PHI_NODE)
2098 /* Additionally record the possibility of leaving the original iv
2099 untouched. */
2100 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2101 cand = add_candidate_1 (data,
2102 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2103 SSA_NAME_DEF_STMT (def));
2104 cand->var_before = iv->ssa_name;
2105 cand->var_after = def;
2109 /* Adds candidates based on the old induction variables. */
2111 static void
2112 add_old_ivs_candidates (struct ivopts_data *data)
2114 unsigned i;
2115 struct iv *iv;
2116 bitmap_iterator bi;
2118 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2120 iv = ver_info (data, i)->iv;
2121 if (iv && iv->biv_p && !integer_zerop (iv->step))
2122 add_old_iv_candidates (data, iv);
2126 /* Adds candidates based on the value of the induction variable IV and USE. */
2128 static void
2129 add_iv_value_candidates (struct ivopts_data *data,
2130 struct iv *iv, struct iv_use *use)
2132 unsigned HOST_WIDE_INT offset;
2133 tree base;
2135 add_candidate (data, iv->base, iv->step, false, use);
2137 /* The same, but with initial value zero. Make such variable important,
2138 since it is generic enough so that possibly many uses may be based
2139 on it. */
2140 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2141 iv->step, true, use);
2143 /* Third, try removing the constant offset. */
2144 base = strip_offset (iv->base, &offset);
2145 if (offset)
2146 add_candidate (data, base, iv->step, false, use);
2149 /* Adds candidates based on the uses. */
2151 static void
2152 add_derived_ivs_candidates (struct ivopts_data *data)
2154 unsigned i;
2156 for (i = 0; i < n_iv_uses (data); i++)
2158 struct iv_use *use = iv_use (data, i);
2160 if (!use)
2161 continue;
2163 switch (use->type)
2165 case USE_NONLINEAR_EXPR:
2166 case USE_COMPARE:
2167 case USE_ADDRESS:
2168 /* Just add the ivs based on the value of the iv used here. */
2169 add_iv_value_candidates (data, use->iv, use);
2170 break;
2172 default:
2173 gcc_unreachable ();
2178 /* Record important candidates and add them to related_cands bitmaps
2179 if needed. */
2181 static void
2182 record_important_candidates (struct ivopts_data *data)
2184 unsigned i;
2185 struct iv_use *use;
2187 for (i = 0; i < n_iv_cands (data); i++)
2189 struct iv_cand *cand = iv_cand (data, i);
2191 if (cand->important)
2192 bitmap_set_bit (data->important_candidates, i);
2195 data->consider_all_candidates = (n_iv_cands (data)
2196 <= CONSIDER_ALL_CANDIDATES_BOUND);
2198 if (data->consider_all_candidates)
2200 /* We will not need "related_cands" bitmaps in this case,
2201 so release them to decrease peak memory consumption. */
2202 for (i = 0; i < n_iv_uses (data); i++)
2204 use = iv_use (data, i);
2205 BITMAP_FREE (use->related_cands);
2208 else
2210 /* Add important candidates to the related_cands bitmaps. */
2211 for (i = 0; i < n_iv_uses (data); i++)
2212 bitmap_ior_into (iv_use (data, i)->related_cands,
2213 data->important_candidates);
2217 /* Finds the candidates for the induction variables. */
2219 static void
2220 find_iv_candidates (struct ivopts_data *data)
2222 /* Add commonly used ivs. */
2223 add_standard_iv_candidates (data);
2225 /* Add old induction variables. */
2226 add_old_ivs_candidates (data);
2228 /* Add induction variables derived from uses. */
2229 add_derived_ivs_candidates (data);
2231 /* Record the important candidates. */
2232 record_important_candidates (data);
2235 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2236 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2237 we allocate a simple list to every use. */
2239 static void
2240 alloc_use_cost_map (struct ivopts_data *data)
2242 unsigned i, size, s, j;
2244 for (i = 0; i < n_iv_uses (data); i++)
2246 struct iv_use *use = iv_use (data, i);
2247 bitmap_iterator bi;
2249 if (data->consider_all_candidates)
2250 size = n_iv_cands (data);
2251 else
2253 s = 0;
2254 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2256 s++;
2259 /* Round up to the power of two, so that moduling by it is fast. */
2260 for (size = 1; size < s; size <<= 1)
2261 continue;
2264 use->n_map_members = size;
2265 use->cost_map = XCNEWVEC (struct cost_pair, size);
2269 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2270 on invariants DEPENDS_ON and that the value used in expressing it
2271 is VALUE.*/
2273 static void
2274 set_use_iv_cost (struct ivopts_data *data,
2275 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2276 bitmap depends_on, tree value)
2278 unsigned i, s;
2280 if (cost == INFTY)
2282 BITMAP_FREE (depends_on);
2283 return;
2286 if (data->consider_all_candidates)
2288 use->cost_map[cand->id].cand = cand;
2289 use->cost_map[cand->id].cost = cost;
2290 use->cost_map[cand->id].depends_on = depends_on;
2291 use->cost_map[cand->id].value = value;
2292 return;
2295 /* n_map_members is a power of two, so this computes modulo. */
2296 s = cand->id & (use->n_map_members - 1);
2297 for (i = s; i < use->n_map_members; i++)
2298 if (!use->cost_map[i].cand)
2299 goto found;
2300 for (i = 0; i < s; i++)
2301 if (!use->cost_map[i].cand)
2302 goto found;
2304 gcc_unreachable ();
2306 found:
2307 use->cost_map[i].cand = cand;
2308 use->cost_map[i].cost = cost;
2309 use->cost_map[i].depends_on = depends_on;
2310 use->cost_map[i].value = value;
2313 /* Gets cost of (USE, CANDIDATE) pair. */
2315 static struct cost_pair *
2316 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2317 struct iv_cand *cand)
2319 unsigned i, s;
2320 struct cost_pair *ret;
2322 if (!cand)
2323 return NULL;
2325 if (data->consider_all_candidates)
2327 ret = use->cost_map + cand->id;
2328 if (!ret->cand)
2329 return NULL;
2331 return ret;
2334 /* n_map_members is a power of two, so this computes modulo. */
2335 s = cand->id & (use->n_map_members - 1);
2336 for (i = s; i < use->n_map_members; i++)
2337 if (use->cost_map[i].cand == cand)
2338 return use->cost_map + i;
2340 for (i = 0; i < s; i++)
2341 if (use->cost_map[i].cand == cand)
2342 return use->cost_map + i;
2344 return NULL;
2347 /* Returns estimate on cost of computing SEQ. */
2349 static unsigned
2350 seq_cost (rtx seq)
2352 unsigned cost = 0;
2353 rtx set;
2355 for (; seq; seq = NEXT_INSN (seq))
2357 set = single_set (seq);
2358 if (set)
2359 cost += rtx_cost (set, SET);
2360 else
2361 cost++;
2364 return cost;
2367 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2368 static rtx
2369 produce_memory_decl_rtl (tree obj, int *regno)
2371 rtx x;
2373 gcc_assert (obj);
2374 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2376 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2377 x = gen_rtx_SYMBOL_REF (Pmode, name);
2378 SET_SYMBOL_REF_DECL (x, obj);
2379 x = gen_rtx_MEM (DECL_MODE (obj), x);
2380 targetm.encode_section_info (obj, x, true);
2382 else
2384 x = gen_raw_REG (Pmode, (*regno)++);
2385 x = gen_rtx_MEM (DECL_MODE (obj), x);
2388 return x;
2391 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2392 walk_tree. DATA contains the actual fake register number. */
2394 static tree
2395 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2397 tree obj = NULL_TREE;
2398 rtx x = NULL_RTX;
2399 int *regno = (int *) data;
2401 switch (TREE_CODE (*expr_p))
2403 case ADDR_EXPR:
2404 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2405 handled_component_p (*expr_p);
2406 expr_p = &TREE_OPERAND (*expr_p, 0))
2407 continue;
2408 obj = *expr_p;
2409 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2410 x = produce_memory_decl_rtl (obj, regno);
2411 break;
2413 case SSA_NAME:
2414 *ws = 0;
2415 obj = SSA_NAME_VAR (*expr_p);
2416 if (!DECL_RTL_SET_P (obj))
2417 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2418 break;
2420 case VAR_DECL:
2421 case PARM_DECL:
2422 case RESULT_DECL:
2423 *ws = 0;
2424 obj = *expr_p;
2426 if (DECL_RTL_SET_P (obj))
2427 break;
2429 if (DECL_MODE (obj) == BLKmode)
2430 x = produce_memory_decl_rtl (obj, regno);
2431 else
2432 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2434 break;
2436 default:
2437 break;
2440 if (x)
2442 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2443 SET_DECL_RTL (obj, x);
2446 return NULL_TREE;
2449 /* Determines cost of the computation of EXPR. */
2451 static unsigned
2452 computation_cost (tree expr)
2454 rtx seq, rslt;
2455 tree type = TREE_TYPE (expr);
2456 unsigned cost;
2457 /* Avoid using hard regs in ways which may be unsupported. */
2458 int regno = LAST_VIRTUAL_REGISTER + 1;
2460 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2461 start_sequence ();
2462 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2463 seq = get_insns ();
2464 end_sequence ();
2466 cost = seq_cost (seq);
2467 if (MEM_P (rslt))
2468 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2470 return cost;
2473 /* Returns variable containing the value of candidate CAND at statement AT. */
2475 static tree
2476 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2478 if (stmt_after_increment (loop, cand, stmt))
2479 return cand->var_after;
2480 else
2481 return cand->var_before;
2484 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2485 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2488 tree_int_cst_sign_bit (const_tree t)
2490 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2491 unsigned HOST_WIDE_INT w;
2493 if (bitno < HOST_BITS_PER_WIDE_INT)
2494 w = TREE_INT_CST_LOW (t);
2495 else
2497 w = TREE_INT_CST_HIGH (t);
2498 bitno -= HOST_BITS_PER_WIDE_INT;
2501 return (w >> bitno) & 1;
2504 /* If we can prove that TOP = cst * BOT for some constant cst,
2505 store cst to MUL and return true. Otherwise return false.
2506 The returned value is always sign-extended, regardless of the
2507 signedness of TOP and BOT. */
2509 static bool
2510 constant_multiple_of (tree top, tree bot, double_int *mul)
2512 tree mby;
2513 enum tree_code code;
2514 double_int res, p0, p1;
2515 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2517 STRIP_NOPS (top);
2518 STRIP_NOPS (bot);
2520 if (operand_equal_p (top, bot, 0))
2522 *mul = double_int_one;
2523 return true;
2526 code = TREE_CODE (top);
2527 switch (code)
2529 case MULT_EXPR:
2530 mby = TREE_OPERAND (top, 1);
2531 if (TREE_CODE (mby) != INTEGER_CST)
2532 return false;
2534 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2535 return false;
2537 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
2538 precision);
2539 return true;
2541 case PLUS_EXPR:
2542 case MINUS_EXPR:
2543 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2544 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2545 return false;
2547 if (code == MINUS_EXPR)
2548 p1 = double_int_neg (p1);
2549 *mul = double_int_sext (double_int_add (p0, p1), precision);
2550 return true;
2552 case INTEGER_CST:
2553 if (TREE_CODE (bot) != INTEGER_CST)
2554 return false;
2556 p0 = double_int_sext (tree_to_double_int (top), precision);
2557 p1 = double_int_sext (tree_to_double_int (bot), precision);
2558 if (double_int_zero_p (p1))
2559 return false;
2560 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
2561 precision);
2562 return double_int_zero_p (res);
2564 default:
2565 return false;
2569 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2570 same precision that is at least as wide as the precision of TYPE, stores
2571 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2572 type of A and B. */
2574 static tree
2575 determine_common_wider_type (tree *a, tree *b)
2577 tree wider_type = NULL;
2578 tree suba, subb;
2579 tree atype = TREE_TYPE (*a);
2581 if ((TREE_CODE (*a) == NOP_EXPR
2582 || TREE_CODE (*a) == CONVERT_EXPR))
2584 suba = TREE_OPERAND (*a, 0);
2585 wider_type = TREE_TYPE (suba);
2586 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2587 return atype;
2589 else
2590 return atype;
2592 if ((TREE_CODE (*b) == NOP_EXPR
2593 || TREE_CODE (*b) == CONVERT_EXPR))
2595 subb = TREE_OPERAND (*b, 0);
2596 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2597 return atype;
2599 else
2600 return atype;
2602 *a = suba;
2603 *b = subb;
2604 return wider_type;
2607 /* Determines the expression by that USE is expressed from induction variable
2608 CAND at statement AT in LOOP. The expression is stored in a decomposed
2609 form into AFF. Returns false if USE cannot be expressed using CAND. */
2611 static bool
2612 get_computation_aff (struct loop *loop,
2613 struct iv_use *use, struct iv_cand *cand, tree at,
2614 struct affine_tree_combination *aff)
2616 tree ubase = use->iv->base;
2617 tree ustep = use->iv->step;
2618 tree cbase = cand->iv->base;
2619 tree cstep = cand->iv->step, cstep_common;
2620 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2621 tree common_type, var;
2622 tree uutype;
2623 aff_tree cbase_aff, var_aff;
2624 double_int rat;
2626 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2628 /* We do not have a precision to express the values of use. */
2629 return false;
2632 var = var_at_stmt (loop, cand, at);
2633 uutype = unsigned_type_for (utype);
2635 /* If the conversion is not noop, perform it. */
2636 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2638 cstep = fold_convert (uutype, cstep);
2639 cbase = fold_convert (uutype, cbase);
2640 var = fold_convert (uutype, var);
2643 if (!constant_multiple_of (ustep, cstep, &rat))
2644 return false;
2646 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2647 type, we achieve better folding by computing their difference in this
2648 wider type, and cast the result to UUTYPE. We do not need to worry about
2649 overflows, as all the arithmetics will in the end be performed in UUTYPE
2650 anyway. */
2651 common_type = determine_common_wider_type (&ubase, &cbase);
2653 /* use = ubase - ratio * cbase + ratio * var. */
2654 tree_to_aff_combination (ubase, common_type, aff);
2655 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2656 tree_to_aff_combination (var, uutype, &var_aff);
2658 /* We need to shift the value if we are after the increment. */
2659 if (stmt_after_increment (loop, cand, at))
2661 aff_tree cstep_aff;
2663 if (common_type != uutype)
2664 cstep_common = fold_convert (common_type, cstep);
2665 else
2666 cstep_common = cstep;
2668 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2669 aff_combination_add (&cbase_aff, &cstep_aff);
2672 aff_combination_scale (&cbase_aff, double_int_neg (rat));
2673 aff_combination_add (aff, &cbase_aff);
2674 if (common_type != uutype)
2675 aff_combination_convert (aff, uutype);
2677 aff_combination_scale (&var_aff, rat);
2678 aff_combination_add (aff, &var_aff);
2680 return true;
2683 /* Determines the expression by that USE is expressed from induction variable
2684 CAND at statement AT in LOOP. The computation is unshared. */
2686 static tree
2687 get_computation_at (struct loop *loop,
2688 struct iv_use *use, struct iv_cand *cand, tree at)
2690 aff_tree aff;
2691 tree type = TREE_TYPE (use->iv->base);
2693 if (!get_computation_aff (loop, use, cand, at, &aff))
2694 return NULL_TREE;
2695 unshare_aff_combination (&aff);
2696 return fold_convert (type, aff_combination_to_tree (&aff));
2699 /* Determines the expression by that USE is expressed from induction variable
2700 CAND in LOOP. The computation is unshared. */
2702 static tree
2703 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2705 return get_computation_at (loop, use, cand, use->stmt);
2708 /* Returns cost of addition in MODE. */
2710 static unsigned
2711 add_cost (enum machine_mode mode)
2713 static unsigned costs[NUM_MACHINE_MODES];
2714 rtx seq;
2715 unsigned cost;
2717 if (costs[mode])
2718 return costs[mode];
2720 start_sequence ();
2721 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2722 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2723 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2724 NULL_RTX);
2725 seq = get_insns ();
2726 end_sequence ();
2728 cost = seq_cost (seq);
2729 if (!cost)
2730 cost = 1;
2732 costs[mode] = cost;
2734 if (dump_file && (dump_flags & TDF_DETAILS))
2735 fprintf (dump_file, "Addition in %s costs %d\n",
2736 GET_MODE_NAME (mode), cost);
2737 return cost;
2740 /* Entry in a hashtable of already known costs for multiplication. */
2741 struct mbc_entry
2743 HOST_WIDE_INT cst; /* The constant to multiply by. */
2744 enum machine_mode mode; /* In mode. */
2745 unsigned cost; /* The cost. */
2748 /* Counts hash value for the ENTRY. */
2750 static hashval_t
2751 mbc_entry_hash (const void *entry)
2753 const struct mbc_entry *e = (const struct mbc_entry *) entry;
2755 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2758 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2760 static int
2761 mbc_entry_eq (const void *entry1, const void *entry2)
2763 const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
2764 const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
2766 return (e1->mode == e2->mode
2767 && e1->cst == e2->cst);
2770 /* Returns cost of multiplication by constant CST in MODE. */
2772 unsigned
2773 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2775 static htab_t costs;
2776 struct mbc_entry **cached, act;
2777 rtx seq;
2778 unsigned cost;
2780 if (!costs)
2781 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2783 act.mode = mode;
2784 act.cst = cst;
2785 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2786 if (*cached)
2787 return (*cached)->cost;
2789 *cached = XNEW (struct mbc_entry);
2790 (*cached)->mode = mode;
2791 (*cached)->cst = cst;
2793 start_sequence ();
2794 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2795 gen_int_mode (cst, mode), NULL_RTX, 0);
2796 seq = get_insns ();
2797 end_sequence ();
2799 cost = seq_cost (seq);
2801 if (dump_file && (dump_flags & TDF_DETAILS))
2802 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2803 (int) cst, GET_MODE_NAME (mode), cost);
2805 (*cached)->cost = cost;
2807 return cost;
2810 /* Returns true if multiplying by RATIO is allowed in an address. Test the
2811 validity for a memory reference accessing memory of mode MODE. */
2813 bool
2814 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
2816 #define MAX_RATIO 128
2817 static sbitmap valid_mult[MAX_MACHINE_MODE];
2819 if (!valid_mult[mode])
2821 rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2822 rtx addr;
2823 HOST_WIDE_INT i;
2825 valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
2826 sbitmap_zero (valid_mult[mode]);
2827 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2828 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2830 XEXP (addr, 1) = gen_int_mode (i, Pmode);
2831 if (memory_address_p (mode, addr))
2832 SET_BIT (valid_mult[mode], i + MAX_RATIO);
2835 if (dump_file && (dump_flags & TDF_DETAILS))
2837 fprintf (dump_file, " allowed multipliers:");
2838 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2839 if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
2840 fprintf (dump_file, " %d", (int) i);
2841 fprintf (dump_file, "\n");
2842 fprintf (dump_file, "\n");
2846 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
2847 return false;
2849 return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
2852 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2853 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2854 variable is omitted. Compute the cost for a memory reference that accesses
2855 a memory location of mode MEM_MODE.
2857 TODO -- there must be some better way. This all is quite crude. */
2859 static unsigned
2860 get_address_cost (bool symbol_present, bool var_present,
2861 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
2862 enum machine_mode mem_mode)
2864 static bool initialized[MAX_MACHINE_MODE];
2865 static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
2866 static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
2867 static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
2868 unsigned cost, acost;
2869 bool offset_p, ratio_p;
2870 HOST_WIDE_INT s_offset;
2871 unsigned HOST_WIDE_INT mask;
2872 unsigned bits;
2874 if (!initialized[mem_mode])
2876 HOST_WIDE_INT i;
2877 HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
2878 int old_cse_not_expected;
2879 unsigned sym_p, var_p, off_p, rat_p, add_c;
2880 rtx seq, addr, base;
2881 rtx reg0, reg1;
2883 initialized[mem_mode] = true;
2885 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2887 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2888 for (i = start; i <= 1 << 20; i <<= 1)
2890 XEXP (addr, 1) = gen_int_mode (i, Pmode);
2891 if (!memory_address_p (mem_mode, addr))
2892 break;
2894 max_offset[mem_mode] = i == start ? 0 : i >> 1;
2895 off[mem_mode] = max_offset[mem_mode];
2897 for (i = start; i <= 1 << 20; i <<= 1)
2899 XEXP (addr, 1) = gen_int_mode (-i, Pmode);
2900 if (!memory_address_p (mem_mode, addr))
2901 break;
2903 min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
2905 if (dump_file && (dump_flags & TDF_DETAILS))
2907 fprintf (dump_file, "get_address_cost:\n");
2908 fprintf (dump_file, " min offset %s %d\n",
2909 GET_MODE_NAME (mem_mode),
2910 (int) min_offset[mem_mode]);
2911 fprintf (dump_file, " max offset %s %d\n",
2912 GET_MODE_NAME (mem_mode),
2913 (int) max_offset[mem_mode]);
2916 rat[mem_mode] = 1;
2917 for (i = 2; i <= MAX_RATIO; i++)
2918 if (multiplier_allowed_in_address_p (i, mem_mode))
2920 rat[mem_mode] = i;
2921 break;
2924 /* Compute the cost of various addressing modes. */
2925 acost = 0;
2926 reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2927 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
2929 for (i = 0; i < 16; i++)
2931 sym_p = i & 1;
2932 var_p = (i >> 1) & 1;
2933 off_p = (i >> 2) & 1;
2934 rat_p = (i >> 3) & 1;
2936 addr = reg0;
2937 if (rat_p)
2938 addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
2939 gen_int_mode (rat[mem_mode], Pmode));
2941 if (var_p)
2942 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
2944 if (sym_p)
2946 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2947 /* ??? We can run into trouble with some backends by presenting
2948 it with symbols which havn't been properly passed through
2949 targetm.encode_section_info. By setting the local bit, we
2950 enhance the probability of things working. */
2951 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
2953 if (off_p)
2954 base = gen_rtx_fmt_e (CONST, Pmode,
2955 gen_rtx_fmt_ee (PLUS, Pmode,
2956 base,
2957 gen_int_mode (off[mem_mode],
2958 Pmode)));
2960 else if (off_p)
2961 base = gen_int_mode (off[mem_mode], Pmode);
2962 else
2963 base = NULL_RTX;
2965 if (base)
2966 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
2968 start_sequence ();
2969 /* To avoid splitting addressing modes, pretend that no cse will
2970 follow. */
2971 old_cse_not_expected = cse_not_expected;
2972 cse_not_expected = true;
2973 addr = memory_address (mem_mode, addr);
2974 cse_not_expected = old_cse_not_expected;
2975 seq = get_insns ();
2976 end_sequence ();
2978 acost = seq_cost (seq);
2979 acost += address_cost (addr, mem_mode);
2981 if (!acost)
2982 acost = 1;
2983 costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
2986 /* On some targets, it is quite expensive to load symbol to a register,
2987 which makes addresses that contain symbols look much more expensive.
2988 However, the symbol will have to be loaded in any case before the
2989 loop (and quite likely we have it in register already), so it does not
2990 make much sense to penalize them too heavily. So make some final
2991 tweaks for the SYMBOL_PRESENT modes:
2993 If VAR_PRESENT is false, and the mode obtained by changing symbol to
2994 var is cheaper, use this mode with small penalty.
2995 If VAR_PRESENT is true, try whether the mode with
2996 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
2997 if this is the case, use it. */
2998 add_c = add_cost (Pmode);
2999 for (i = 0; i < 8; i++)
3001 var_p = i & 1;
3002 off_p = (i >> 1) & 1;
3003 rat_p = (i >> 2) & 1;
3005 acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
3006 if (var_p)
3007 acost += add_c;
3009 if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3010 costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3013 if (dump_file && (dump_flags & TDF_DETAILS))
3015 fprintf (dump_file, "Address costs:\n");
3017 for (i = 0; i < 16; i++)
3019 sym_p = i & 1;
3020 var_p = (i >> 1) & 1;
3021 off_p = (i >> 2) & 1;
3022 rat_p = (i >> 3) & 1;
3024 fprintf (dump_file, " ");
3025 if (sym_p)
3026 fprintf (dump_file, "sym + ");
3027 if (var_p)
3028 fprintf (dump_file, "var + ");
3029 if (off_p)
3030 fprintf (dump_file, "cst + ");
3031 if (rat_p)
3032 fprintf (dump_file, "rat * ");
3034 acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3035 fprintf (dump_file, "index costs %d\n", acost);
3037 fprintf (dump_file, "\n");
3041 bits = GET_MODE_BITSIZE (Pmode);
3042 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3043 offset &= mask;
3044 if ((offset >> (bits - 1) & 1))
3045 offset |= ~mask;
3046 s_offset = offset;
3048 cost = 0;
3049 offset_p = (s_offset != 0
3050 && min_offset[mem_mode] <= s_offset
3051 && s_offset <= max_offset[mem_mode]);
3052 ratio_p = (ratio != 1
3053 && multiplier_allowed_in_address_p (ratio, mem_mode));
3055 if (ratio != 1 && !ratio_p)
3056 cost += multiply_by_cost (ratio, Pmode);
3058 if (s_offset && !offset_p && !symbol_present)
3059 cost += add_cost (Pmode);
3061 acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3062 return cost + acost;
3065 /* Estimates cost of forcing expression EXPR into a variable. */
3067 unsigned
3068 force_expr_to_var_cost (tree expr)
3070 static bool costs_initialized = false;
3071 static unsigned integer_cost;
3072 static unsigned symbol_cost;
3073 static unsigned address_cost;
3074 tree op0, op1;
3075 unsigned cost0, cost1, cost;
3076 enum machine_mode mode;
3078 if (!costs_initialized)
3080 tree type = build_pointer_type (integer_type_node);
3081 tree var, addr;
3082 rtx x;
3084 var = create_tmp_var_raw (integer_type_node, "test_var");
3085 TREE_STATIC (var) = 1;
3086 x = produce_memory_decl_rtl (var, NULL);
3087 SET_DECL_RTL (var, x);
3089 integer_cost = computation_cost (build_int_cst (integer_type_node,
3090 2000));
3092 addr = build1 (ADDR_EXPR, type, var);
3093 symbol_cost = computation_cost (addr) + 1;
3095 address_cost
3096 = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3097 addr,
3098 build_int_cst (sizetype, 2000))) + 1;
3099 if (dump_file && (dump_flags & TDF_DETAILS))
3101 fprintf (dump_file, "force_expr_to_var_cost:\n");
3102 fprintf (dump_file, " integer %d\n", (int) integer_cost);
3103 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
3104 fprintf (dump_file, " address %d\n", (int) address_cost);
3105 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
3106 fprintf (dump_file, "\n");
3109 costs_initialized = true;
3112 STRIP_NOPS (expr);
3114 if (SSA_VAR_P (expr))
3115 return 0;
3117 if (TREE_INVARIANT (expr))
3119 if (TREE_CODE (expr) == INTEGER_CST)
3120 return integer_cost;
3122 if (TREE_CODE (expr) == ADDR_EXPR)
3124 tree obj = TREE_OPERAND (expr, 0);
3126 if (TREE_CODE (obj) == VAR_DECL
3127 || TREE_CODE (obj) == PARM_DECL
3128 || TREE_CODE (obj) == RESULT_DECL)
3129 return symbol_cost;
3132 return address_cost;
3135 switch (TREE_CODE (expr))
3137 case POINTER_PLUS_EXPR:
3138 case PLUS_EXPR:
3139 case MINUS_EXPR:
3140 case MULT_EXPR:
3141 op0 = TREE_OPERAND (expr, 0);
3142 op1 = TREE_OPERAND (expr, 1);
3143 STRIP_NOPS (op0);
3144 STRIP_NOPS (op1);
3146 if (is_gimple_val (op0))
3147 cost0 = 0;
3148 else
3149 cost0 = force_expr_to_var_cost (op0);
3151 if (is_gimple_val (op1))
3152 cost1 = 0;
3153 else
3154 cost1 = force_expr_to_var_cost (op1);
3156 break;
3158 default:
3159 /* Just an arbitrary value, FIXME. */
3160 return target_spill_cost;
3163 mode = TYPE_MODE (TREE_TYPE (expr));
3164 switch (TREE_CODE (expr))
3166 case POINTER_PLUS_EXPR:
3167 case PLUS_EXPR:
3168 case MINUS_EXPR:
3169 cost = add_cost (mode);
3170 break;
3172 case MULT_EXPR:
3173 if (cst_and_fits_in_hwi (op0))
3174 cost = multiply_by_cost (int_cst_value (op0), mode);
3175 else if (cst_and_fits_in_hwi (op1))
3176 cost = multiply_by_cost (int_cst_value (op1), mode);
3177 else
3178 return target_spill_cost;
3179 break;
3181 default:
3182 gcc_unreachable ();
3185 cost += cost0;
3186 cost += cost1;
3188 /* Bound the cost by target_spill_cost. The parts of complicated
3189 computations often are either loop invariant or at least can
3190 be shared between several iv uses, so letting this grow without
3191 limits would not give reasonable results. */
3192 return cost < target_spill_cost ? cost : target_spill_cost;
3195 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3196 invariants the computation depends on. */
3198 static unsigned
3199 force_var_cost (struct ivopts_data *data,
3200 tree expr, bitmap *depends_on)
3202 if (depends_on)
3204 fd_ivopts_data = data;
3205 walk_tree (&expr, find_depends, depends_on, NULL);
3208 return force_expr_to_var_cost (expr);
3211 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3212 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3213 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3214 invariants the computation depends on. */
3216 static unsigned
3217 split_address_cost (struct ivopts_data *data,
3218 tree addr, bool *symbol_present, bool *var_present,
3219 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3221 tree core;
3222 HOST_WIDE_INT bitsize;
3223 HOST_WIDE_INT bitpos;
3224 tree toffset;
3225 enum machine_mode mode;
3226 int unsignedp, volatilep;
3228 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3229 &unsignedp, &volatilep, false);
3231 if (toffset != 0
3232 || bitpos % BITS_PER_UNIT != 0
3233 || TREE_CODE (core) != VAR_DECL)
3235 *symbol_present = false;
3236 *var_present = true;
3237 fd_ivopts_data = data;
3238 walk_tree (&addr, find_depends, depends_on, NULL);
3239 return target_spill_cost;
3242 *offset += bitpos / BITS_PER_UNIT;
3243 if (TREE_STATIC (core)
3244 || DECL_EXTERNAL (core))
3246 *symbol_present = true;
3247 *var_present = false;
3248 return 0;
3251 *symbol_present = false;
3252 *var_present = true;
3253 return 0;
3256 /* Estimates cost of expressing difference of addresses E1 - E2 as
3257 var + symbol + offset. The value of offset is added to OFFSET,
3258 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3259 part is missing. DEPENDS_ON is a set of the invariants the computation
3260 depends on. */
3262 static unsigned
3263 ptr_difference_cost (struct ivopts_data *data,
3264 tree e1, tree e2, bool *symbol_present, bool *var_present,
3265 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3267 HOST_WIDE_INT diff = 0;
3268 unsigned cost;
3270 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3272 if (ptr_difference_const (e1, e2, &diff))
3274 *offset += diff;
3275 *symbol_present = false;
3276 *var_present = false;
3277 return 0;
3280 if (e2 == integer_zero_node)
3281 return split_address_cost (data, TREE_OPERAND (e1, 0),
3282 symbol_present, var_present, offset, depends_on);
3284 *symbol_present = false;
3285 *var_present = true;
3287 cost = force_var_cost (data, e1, depends_on);
3288 cost += force_var_cost (data, e2, depends_on);
3289 cost += add_cost (Pmode);
3291 return cost;
3294 /* Estimates cost of expressing difference E1 - E2 as
3295 var + symbol + offset. The value of offset is added to OFFSET,
3296 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3297 part is missing. DEPENDS_ON is a set of the invariants the computation
3298 depends on. */
3300 static unsigned
3301 difference_cost (struct ivopts_data *data,
3302 tree e1, tree e2, bool *symbol_present, bool *var_present,
3303 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3305 unsigned cost;
3306 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3307 unsigned HOST_WIDE_INT off1, off2;
3309 e1 = strip_offset (e1, &off1);
3310 e2 = strip_offset (e2, &off2);
3311 *offset += off1 - off2;
3313 STRIP_NOPS (e1);
3314 STRIP_NOPS (e2);
3316 if (TREE_CODE (e1) == ADDR_EXPR)
3317 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3318 depends_on);
3319 *symbol_present = false;
3321 if (operand_equal_p (e1, e2, 0))
3323 *var_present = false;
3324 return 0;
3326 *var_present = true;
3327 if (integer_zerop (e2))
3328 return force_var_cost (data, e1, depends_on);
3330 if (integer_zerop (e1))
3332 cost = force_var_cost (data, e2, depends_on);
3333 cost += multiply_by_cost (-1, mode);
3335 return cost;
3338 cost = force_var_cost (data, e1, depends_on);
3339 cost += force_var_cost (data, e2, depends_on);
3340 cost += add_cost (mode);
3342 return cost;
3345 /* Determines the cost of the computation by that USE is expressed
3346 from induction variable CAND. If ADDRESS_P is true, we just need
3347 to create an address from it, otherwise we want to get it into
3348 register. A set of invariants we depend on is stored in
3349 DEPENDS_ON. AT is the statement at that the value is computed. */
3351 static unsigned
3352 get_computation_cost_at (struct ivopts_data *data,
3353 struct iv_use *use, struct iv_cand *cand,
3354 bool address_p, bitmap *depends_on, tree at)
3356 tree ubase = use->iv->base, ustep = use->iv->step;
3357 tree cbase, cstep;
3358 tree utype = TREE_TYPE (ubase), ctype;
3359 unsigned HOST_WIDE_INT cstepi, offset = 0;
3360 HOST_WIDE_INT ratio, aratio;
3361 bool var_present, symbol_present;
3362 unsigned cost = 0, n_sums;
3363 double_int rat;
3365 *depends_on = NULL;
3367 /* Only consider real candidates. */
3368 if (!cand->iv)
3369 return INFTY;
3371 cbase = cand->iv->base;
3372 cstep = cand->iv->step;
3373 ctype = TREE_TYPE (cbase);
3375 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3377 /* We do not have a precision to express the values of use. */
3378 return INFTY;
3381 if (address_p)
3383 /* Do not try to express address of an object with computation based
3384 on address of a different object. This may cause problems in rtl
3385 level alias analysis (that does not expect this to be happening,
3386 as this is illegal in C), and would be unlikely to be useful
3387 anyway. */
3388 if (use->iv->base_object
3389 && cand->iv->base_object
3390 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3391 return INFTY;
3394 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3396 /* TODO -- add direct handling of this case. */
3397 goto fallback;
3400 /* CSTEPI is removed from the offset in case statement is after the
3401 increment. If the step is not constant, we use zero instead.
3402 This is a bit imprecise (there is the extra addition), but
3403 redundancy elimination is likely to transform the code so that
3404 it uses value of the variable before increment anyway,
3405 so it is not that much unrealistic. */
3406 if (cst_and_fits_in_hwi (cstep))
3407 cstepi = int_cst_value (cstep);
3408 else
3409 cstepi = 0;
3411 if (!constant_multiple_of (ustep, cstep, &rat))
3412 return INFTY;
3414 if (double_int_fits_in_shwi_p (rat))
3415 ratio = double_int_to_shwi (rat);
3416 else
3417 return INFTY;
3419 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3420 or ratio == 1, it is better to handle this like
3422 ubase - ratio * cbase + ratio * var
3424 (also holds in the case ratio == -1, TODO. */
3426 if (cst_and_fits_in_hwi (cbase))
3428 offset = - ratio * int_cst_value (cbase);
3429 cost += difference_cost (data,
3430 ubase, integer_zero_node,
3431 &symbol_present, &var_present, &offset,
3432 depends_on);
3434 else if (ratio == 1)
3436 cost += difference_cost (data,
3437 ubase, cbase,
3438 &symbol_present, &var_present, &offset,
3439 depends_on);
3441 else
3443 cost += force_var_cost (data, cbase, depends_on);
3444 cost += add_cost (TYPE_MODE (ctype));
3445 cost += difference_cost (data,
3446 ubase, integer_zero_node,
3447 &symbol_present, &var_present, &offset,
3448 depends_on);
3451 /* If we are after the increment, the value of the candidate is higher by
3452 one iteration. */
3453 if (stmt_after_increment (data->current_loop, cand, at))
3454 offset -= ratio * cstepi;
3456 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3457 (symbol/var/const parts may be omitted). If we are looking for an address,
3458 find the cost of addressing this. */
3459 if (address_p)
3460 return cost + get_address_cost (symbol_present, var_present, offset, ratio,
3461 TYPE_MODE (TREE_TYPE (*use->op_p)));
3463 /* Otherwise estimate the costs for computing the expression. */
3464 aratio = ratio > 0 ? ratio : -ratio;
3465 if (!symbol_present && !var_present && !offset)
3467 if (ratio != 1)
3468 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3470 return cost;
3473 if (aratio != 1)
3474 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3476 n_sums = 1;
3477 if (var_present
3478 /* Symbol + offset should be compile-time computable. */
3479 && (symbol_present || offset))
3480 n_sums++;
3482 return cost + n_sums * add_cost (TYPE_MODE (ctype));
3484 fallback:
3486 /* Just get the expression, expand it and measure the cost. */
3487 tree comp = get_computation_at (data->current_loop, use, cand, at);
3489 if (!comp)
3490 return INFTY;
3492 if (address_p)
3493 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3495 return computation_cost (comp);
3499 /* Determines the cost of the computation by that USE is expressed
3500 from induction variable CAND. If ADDRESS_P is true, we just need
3501 to create an address from it, otherwise we want to get it into
3502 register. A set of invariants we depend on is stored in
3503 DEPENDS_ON. */
3505 static unsigned
3506 get_computation_cost (struct ivopts_data *data,
3507 struct iv_use *use, struct iv_cand *cand,
3508 bool address_p, bitmap *depends_on)
3510 return get_computation_cost_at (data,
3511 use, cand, address_p, depends_on, use->stmt);
3514 /* Determines cost of basing replacement of USE on CAND in a generic
3515 expression. */
3517 static bool
3518 determine_use_iv_cost_generic (struct ivopts_data *data,
3519 struct iv_use *use, struct iv_cand *cand)
3521 bitmap depends_on;
3522 unsigned cost;
3524 /* The simple case first -- if we need to express value of the preserved
3525 original biv, the cost is 0. This also prevents us from counting the
3526 cost of increment twice -- once at this use and once in the cost of
3527 the candidate. */
3528 if (cand->pos == IP_ORIGINAL
3529 && cand->incremented_at == use->stmt)
3531 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3532 return true;
3535 cost = get_computation_cost (data, use, cand, false, &depends_on);
3536 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3538 return cost != INFTY;
3541 /* Determines cost of basing replacement of USE on CAND in an address. */
3543 static bool
3544 determine_use_iv_cost_address (struct ivopts_data *data,
3545 struct iv_use *use, struct iv_cand *cand)
3547 bitmap depends_on;
3548 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3550 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3552 return cost != INFTY;
3555 /* Computes value of candidate CAND at position AT in iteration NITER, and
3556 stores it to VAL. */
3558 static void
3559 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter,
3560 aff_tree *val)
3562 aff_tree step, delta, nit;
3563 struct iv *iv = cand->iv;
3564 tree type = TREE_TYPE (iv->base);
3566 tree_to_aff_combination (iv->step, type, &step);
3567 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3568 aff_combination_convert (&nit, type);
3569 aff_combination_mult (&nit, &step, &delta);
3570 if (stmt_after_increment (loop, cand, at))
3571 aff_combination_add (&delta, &step);
3573 tree_to_aff_combination (iv->base, type, val);
3574 aff_combination_add (val, &delta);
3577 /* Returns period of induction variable iv. */
3579 static tree
3580 iv_period (struct iv *iv)
3582 tree step = iv->step, period, type;
3583 tree pow2div;
3585 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3587 /* Period of the iv is gcd (step, type range). Since type range is power
3588 of two, it suffices to determine the maximum power of two that divides
3589 step. */
3590 pow2div = num_ending_zeros (step);
3591 type = unsigned_type_for (TREE_TYPE (step));
3593 period = build_low_bits_mask (type,
3594 (TYPE_PRECISION (type)
3595 - tree_low_cst (pow2div, 1)));
3597 return period;
3600 /* Returns the comparison operator used when eliminating the iv USE. */
3602 static enum tree_code
3603 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3605 struct loop *loop = data->current_loop;
3606 basic_block ex_bb;
3607 edge exit;
3609 ex_bb = bb_for_stmt (use->stmt);
3610 exit = EDGE_SUCC (ex_bb, 0);
3611 if (flow_bb_inside_loop_p (loop, exit->dest))
3612 exit = EDGE_SUCC (ex_bb, 1);
3614 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3617 /* Check whether it is possible to express the condition in USE by comparison
3618 of candidate CAND. If so, store the value compared with to BOUND. */
3620 static bool
3621 may_eliminate_iv (struct ivopts_data *data,
3622 struct iv_use *use, struct iv_cand *cand, tree *bound)
3624 basic_block ex_bb;
3625 edge exit;
3626 tree nit, period;
3627 struct loop *loop = data->current_loop;
3628 aff_tree bnd;
3629 double_int period_value, max_niter;
3631 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3632 return false;
3634 /* For now works only for exits that dominate the loop latch. TODO -- extend
3635 for other conditions inside loop body. */
3636 ex_bb = bb_for_stmt (use->stmt);
3637 if (use->stmt != last_stmt (ex_bb)
3638 || TREE_CODE (use->stmt) != COND_EXPR)
3639 return false;
3640 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3641 return false;
3643 exit = EDGE_SUCC (ex_bb, 0);
3644 if (flow_bb_inside_loop_p (loop, exit->dest))
3645 exit = EDGE_SUCC (ex_bb, 1);
3646 if (flow_bb_inside_loop_p (loop, exit->dest))
3647 return false;
3649 nit = niter_for_exit (data, exit);
3650 if (!nit)
3651 return false;
3653 /* Determine whether we may use the variable to test whether niter iterations
3654 elapsed. This is the case iff the period of the induction variable is
3655 greater than the number of iterations. */
3656 period = iv_period (cand->iv);
3657 if (!period)
3658 return false;
3660 /* Compare the period with the estimate on the number of iterations of the
3661 loop. */
3662 if (!estimated_loop_iterations (loop, true, &max_niter))
3663 return false;
3664 period_value = tree_to_double_int (period);
3665 if (double_int_ucmp (period_value, max_niter) <= 0)
3666 return false;
3668 cand_value_at (loop, cand, use->stmt, nit, &bnd);
3669 *bound = aff_combination_to_tree (&bnd);
3670 return true;
3673 /* Determines cost of basing replacement of USE on CAND in a condition. */
3675 static bool
3676 determine_use_iv_cost_condition (struct ivopts_data *data,
3677 struct iv_use *use, struct iv_cand *cand)
3679 tree bound = NULL_TREE;
3680 struct iv *cmp_iv;
3681 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
3682 unsigned elim_cost, express_cost, cost;
3683 bool ok;
3685 /* Only consider real candidates. */
3686 if (!cand->iv)
3688 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
3689 return false;
3692 /* Try iv elimination. */
3693 if (may_eliminate_iv (data, use, cand, &bound))
3695 elim_cost = force_var_cost (data, bound, &depends_on_elim);
3696 /* The bound is a loop invariant, so it will be only computed
3697 once. */
3698 elim_cost /= AVG_LOOP_NITER (data->current_loop);
3700 else
3701 elim_cost = INFTY;
3703 /* Try expressing the original giv. If it is compared with an invariant,
3704 note that we cannot get rid of it. */
3705 ok = extract_cond_operands (data, use->op_p, NULL, NULL, NULL, &cmp_iv);
3706 gcc_assert (ok);
3708 express_cost = get_computation_cost (data, use, cand, false,
3709 &depends_on_express);
3710 fd_ivopts_data = data;
3711 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
3713 /* Choose the better approach. */
3714 if (elim_cost < express_cost)
3716 cost = elim_cost;
3717 depends_on = depends_on_elim;
3718 depends_on_elim = NULL;
3720 else
3722 cost = express_cost;
3723 depends_on = depends_on_express;
3724 depends_on_express = NULL;
3725 bound = NULL_TREE;
3728 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
3730 if (depends_on_elim)
3731 BITMAP_FREE (depends_on_elim);
3732 if (depends_on_express)
3733 BITMAP_FREE (depends_on_express);
3735 return cost != INFTY;
3738 /* Determines cost of basing replacement of USE on CAND. Returns false
3739 if USE cannot be based on CAND. */
3741 static bool
3742 determine_use_iv_cost (struct ivopts_data *data,
3743 struct iv_use *use, struct iv_cand *cand)
3745 switch (use->type)
3747 case USE_NONLINEAR_EXPR:
3748 return determine_use_iv_cost_generic (data, use, cand);
3750 case USE_ADDRESS:
3751 return determine_use_iv_cost_address (data, use, cand);
3753 case USE_COMPARE:
3754 return determine_use_iv_cost_condition (data, use, cand);
3756 default:
3757 gcc_unreachable ();
3761 /* Determines costs of basing the use of the iv on an iv candidate. */
3763 static void
3764 determine_use_iv_costs (struct ivopts_data *data)
3766 unsigned i, j;
3767 struct iv_use *use;
3768 struct iv_cand *cand;
3769 bitmap to_clear = BITMAP_ALLOC (NULL);
3771 alloc_use_cost_map (data);
3773 for (i = 0; i < n_iv_uses (data); i++)
3775 use = iv_use (data, i);
3777 if (data->consider_all_candidates)
3779 for (j = 0; j < n_iv_cands (data); j++)
3781 cand = iv_cand (data, j);
3782 determine_use_iv_cost (data, use, cand);
3785 else
3787 bitmap_iterator bi;
3789 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3791 cand = iv_cand (data, j);
3792 if (!determine_use_iv_cost (data, use, cand))
3793 bitmap_set_bit (to_clear, j);
3796 /* Remove the candidates for that the cost is infinite from
3797 the list of related candidates. */
3798 bitmap_and_compl_into (use->related_cands, to_clear);
3799 bitmap_clear (to_clear);
3803 BITMAP_FREE (to_clear);
3805 if (dump_file && (dump_flags & TDF_DETAILS))
3807 fprintf (dump_file, "Use-candidate costs:\n");
3809 for (i = 0; i < n_iv_uses (data); i++)
3811 use = iv_use (data, i);
3813 fprintf (dump_file, "Use %d:\n", i);
3814 fprintf (dump_file, " cand\tcost\tdepends on\n");
3815 for (j = 0; j < use->n_map_members; j++)
3817 if (!use->cost_map[j].cand
3818 || use->cost_map[j].cost == INFTY)
3819 continue;
3821 fprintf (dump_file, " %d\t%d\t",
3822 use->cost_map[j].cand->id,
3823 use->cost_map[j].cost);
3824 if (use->cost_map[j].depends_on)
3825 bitmap_print (dump_file,
3826 use->cost_map[j].depends_on, "","");
3827 fprintf (dump_file, "\n");
3830 fprintf (dump_file, "\n");
3832 fprintf (dump_file, "\n");
3836 /* Determines cost of the candidate CAND. */
3838 static void
3839 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3841 unsigned cost_base, cost_step;
3842 tree base;
3844 if (!cand->iv)
3846 cand->cost = 0;
3847 return;
3850 /* There are two costs associated with the candidate -- its increment
3851 and its initialization. The second is almost negligible for any loop
3852 that rolls enough, so we take it just very little into account. */
3854 base = cand->iv->base;
3855 cost_base = force_var_cost (data, base, NULL);
3856 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3858 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3860 /* Prefer the original iv unless we may gain something by replacing it;
3861 this is not really relevant for artificial ivs created by other
3862 passes. */
3863 if (cand->pos == IP_ORIGINAL
3864 && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
3865 cand->cost--;
3867 /* Prefer not to insert statements into latch unless there are some
3868 already (so that we do not create unnecessary jumps). */
3869 if (cand->pos == IP_END
3870 && empty_block_p (ip_end_pos (data->current_loop)))
3871 cand->cost++;
3874 /* Determines costs of computation of the candidates. */
3876 static void
3877 determine_iv_costs (struct ivopts_data *data)
3879 unsigned i;
3881 if (dump_file && (dump_flags & TDF_DETAILS))
3883 fprintf (dump_file, "Candidate costs:\n");
3884 fprintf (dump_file, " cand\tcost\n");
3887 for (i = 0; i < n_iv_cands (data); i++)
3889 struct iv_cand *cand = iv_cand (data, i);
3891 determine_iv_cost (data, cand);
3893 if (dump_file && (dump_flags & TDF_DETAILS))
3894 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
3897 if (dump_file && (dump_flags & TDF_DETAILS))
3898 fprintf (dump_file, "\n");
3901 /* Calculates cost for having SIZE induction variables. */
3903 static unsigned
3904 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3906 /* We add size to the cost, so that we prefer eliminating ivs
3907 if possible. */
3908 return size + estimate_reg_pressure_cost (size, data->regs_used);
3911 /* For each size of the induction variable set determine the penalty. */
3913 static void
3914 determine_set_costs (struct ivopts_data *data)
3916 unsigned j, n;
3917 tree phi, op;
3918 struct loop *loop = data->current_loop;
3919 bitmap_iterator bi;
3921 /* We use the following model (definitely improvable, especially the
3922 cost function -- TODO):
3924 We estimate the number of registers available (using MD data), name it A.
3926 We estimate the number of registers used by the loop, name it U. This
3927 number is obtained as the number of loop phi nodes (not counting virtual
3928 registers and bivs) + the number of variables from outside of the loop.
3930 We set a reserve R (free regs that are used for temporary computations,
3931 etc.). For now the reserve is a constant 3.
3933 Let I be the number of induction variables.
3935 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3936 make a lot of ivs without a reason).
3937 -- if A - R < U + I <= A, the cost is I * PRES_COST
3938 -- if U + I > A, the cost is I * PRES_COST and
3939 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
3941 if (dump_file && (dump_flags & TDF_DETAILS))
3943 fprintf (dump_file, "Global costs:\n");
3944 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
3945 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost);
3946 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
3949 n = 0;
3950 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
3952 op = PHI_RESULT (phi);
3954 if (!is_gimple_reg (op))
3955 continue;
3957 if (get_iv (data, op))
3958 continue;
3960 n++;
3963 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
3965 struct version_info *info = ver_info (data, j);
3967 if (info->inv_id && info->has_nonlin_use)
3968 n++;
3971 data->regs_used = n;
3972 if (dump_file && (dump_flags & TDF_DETAILS))
3973 fprintf (dump_file, " regs_used %d\n", n);
3975 if (dump_file && (dump_flags & TDF_DETAILS))
3977 fprintf (dump_file, " cost for size:\n");
3978 fprintf (dump_file, " ivs\tcost\n");
3979 for (j = 0; j <= 2 * target_avail_regs; j++)
3980 fprintf (dump_file, " %d\t%d\n", j,
3981 ivopts_global_cost_for_size (data, j));
3982 fprintf (dump_file, "\n");
3986 /* Returns true if A is a cheaper cost pair than B. */
3988 static bool
3989 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
3991 if (!a)
3992 return false;
3994 if (!b)
3995 return true;
3997 if (a->cost < b->cost)
3998 return true;
4000 if (a->cost > b->cost)
4001 return false;
4003 /* In case the costs are the same, prefer the cheaper candidate. */
4004 if (a->cand->cost < b->cand->cost)
4005 return true;
4007 return false;
4010 /* Computes the cost field of IVS structure. */
4012 static void
4013 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4015 unsigned cost = 0;
4017 cost += ivs->cand_use_cost;
4018 cost += ivs->cand_cost;
4019 cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4021 ivs->cost = cost;
4024 /* Remove invariants in set INVS to set IVS. */
4026 static void
4027 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4029 bitmap_iterator bi;
4030 unsigned iid;
4032 if (!invs)
4033 return;
4035 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4037 ivs->n_invariant_uses[iid]--;
4038 if (ivs->n_invariant_uses[iid] == 0)
4039 ivs->n_regs--;
4043 /* Set USE not to be expressed by any candidate in IVS. */
4045 static void
4046 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4047 struct iv_use *use)
4049 unsigned uid = use->id, cid;
4050 struct cost_pair *cp;
4052 cp = ivs->cand_for_use[uid];
4053 if (!cp)
4054 return;
4055 cid = cp->cand->id;
4057 ivs->bad_uses++;
4058 ivs->cand_for_use[uid] = NULL;
4059 ivs->n_cand_uses[cid]--;
4061 if (ivs->n_cand_uses[cid] == 0)
4063 bitmap_clear_bit (ivs->cands, cid);
4064 /* Do not count the pseudocandidates. */
4065 if (cp->cand->iv)
4066 ivs->n_regs--;
4067 ivs->n_cands--;
4068 ivs->cand_cost -= cp->cand->cost;
4070 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4073 ivs->cand_use_cost -= cp->cost;
4075 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4076 iv_ca_recount_cost (data, ivs);
4079 /* Add invariants in set INVS to set IVS. */
4081 static void
4082 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4084 bitmap_iterator bi;
4085 unsigned iid;
4087 if (!invs)
4088 return;
4090 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4092 ivs->n_invariant_uses[iid]++;
4093 if (ivs->n_invariant_uses[iid] == 1)
4094 ivs->n_regs++;
4098 /* Set cost pair for USE in set IVS to CP. */
4100 static void
4101 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4102 struct iv_use *use, struct cost_pair *cp)
4104 unsigned uid = use->id, cid;
4106 if (ivs->cand_for_use[uid] == cp)
4107 return;
4109 if (ivs->cand_for_use[uid])
4110 iv_ca_set_no_cp (data, ivs, use);
4112 if (cp)
4114 cid = cp->cand->id;
4116 ivs->bad_uses--;
4117 ivs->cand_for_use[uid] = cp;
4118 ivs->n_cand_uses[cid]++;
4119 if (ivs->n_cand_uses[cid] == 1)
4121 bitmap_set_bit (ivs->cands, cid);
4122 /* Do not count the pseudocandidates. */
4123 if (cp->cand->iv)
4124 ivs->n_regs++;
4125 ivs->n_cands++;
4126 ivs->cand_cost += cp->cand->cost;
4128 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4131 ivs->cand_use_cost += cp->cost;
4132 iv_ca_set_add_invariants (ivs, cp->depends_on);
4133 iv_ca_recount_cost (data, ivs);
4137 /* Extend set IVS by expressing USE by some of the candidates in it
4138 if possible. */
4140 static void
4141 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4142 struct iv_use *use)
4144 struct cost_pair *best_cp = NULL, *cp;
4145 bitmap_iterator bi;
4146 unsigned i;
4148 gcc_assert (ivs->upto >= use->id);
4150 if (ivs->upto == use->id)
4152 ivs->upto++;
4153 ivs->bad_uses++;
4156 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4158 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4160 if (cheaper_cost_pair (cp, best_cp))
4161 best_cp = cp;
4164 iv_ca_set_cp (data, ivs, use, best_cp);
4167 /* Get cost for assignment IVS. */
4169 static unsigned
4170 iv_ca_cost (struct iv_ca *ivs)
4172 return (ivs->bad_uses ? INFTY : ivs->cost);
4175 /* Returns true if all dependences of CP are among invariants in IVS. */
4177 static bool
4178 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4180 unsigned i;
4181 bitmap_iterator bi;
4183 if (!cp->depends_on)
4184 return true;
4186 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4188 if (ivs->n_invariant_uses[i] == 0)
4189 return false;
4192 return true;
4195 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4196 it before NEXT_CHANGE. */
4198 static struct iv_ca_delta *
4199 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4200 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4202 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4204 change->use = use;
4205 change->old_cp = old_cp;
4206 change->new_cp = new_cp;
4207 change->next_change = next_change;
4209 return change;
4212 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4213 are rewritten. */
4215 static struct iv_ca_delta *
4216 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4218 struct iv_ca_delta *last;
4220 if (!l2)
4221 return l1;
4223 if (!l1)
4224 return l2;
4226 for (last = l1; last->next_change; last = last->next_change)
4227 continue;
4228 last->next_change = l2;
4230 return l1;
4233 /* Returns candidate by that USE is expressed in IVS. */
4235 static struct cost_pair *
4236 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4238 return ivs->cand_for_use[use->id];
4241 /* Reverse the list of changes DELTA, forming the inverse to it. */
4243 static struct iv_ca_delta *
4244 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4246 struct iv_ca_delta *act, *next, *prev = NULL;
4247 struct cost_pair *tmp;
4249 for (act = delta; act; act = next)
4251 next = act->next_change;
4252 act->next_change = prev;
4253 prev = act;
4255 tmp = act->old_cp;
4256 act->old_cp = act->new_cp;
4257 act->new_cp = tmp;
4260 return prev;
4263 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4264 reverted instead. */
4266 static void
4267 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4268 struct iv_ca_delta *delta, bool forward)
4270 struct cost_pair *from, *to;
4271 struct iv_ca_delta *act;
4273 if (!forward)
4274 delta = iv_ca_delta_reverse (delta);
4276 for (act = delta; act; act = act->next_change)
4278 from = act->old_cp;
4279 to = act->new_cp;
4280 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4281 iv_ca_set_cp (data, ivs, act->use, to);
4284 if (!forward)
4285 iv_ca_delta_reverse (delta);
4288 /* Returns true if CAND is used in IVS. */
4290 static bool
4291 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4293 return ivs->n_cand_uses[cand->id] > 0;
4296 /* Returns number of induction variable candidates in the set IVS. */
4298 static unsigned
4299 iv_ca_n_cands (struct iv_ca *ivs)
4301 return ivs->n_cands;
4304 /* Free the list of changes DELTA. */
4306 static void
4307 iv_ca_delta_free (struct iv_ca_delta **delta)
4309 struct iv_ca_delta *act, *next;
4311 for (act = *delta; act; act = next)
4313 next = act->next_change;
4314 free (act);
4317 *delta = NULL;
4320 /* Allocates new iv candidates assignment. */
4322 static struct iv_ca *
4323 iv_ca_new (struct ivopts_data *data)
4325 struct iv_ca *nw = XNEW (struct iv_ca);
4327 nw->upto = 0;
4328 nw->bad_uses = 0;
4329 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4330 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4331 nw->cands = BITMAP_ALLOC (NULL);
4332 nw->n_cands = 0;
4333 nw->n_regs = 0;
4334 nw->cand_use_cost = 0;
4335 nw->cand_cost = 0;
4336 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4337 nw->cost = 0;
4339 return nw;
4342 /* Free memory occupied by the set IVS. */
4344 static void
4345 iv_ca_free (struct iv_ca **ivs)
4347 free ((*ivs)->cand_for_use);
4348 free ((*ivs)->n_cand_uses);
4349 BITMAP_FREE ((*ivs)->cands);
4350 free ((*ivs)->n_invariant_uses);
4351 free (*ivs);
4352 *ivs = NULL;
4355 /* Dumps IVS to FILE. */
4357 static void
4358 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4360 const char *pref = " invariants ";
4361 unsigned i;
4363 fprintf (file, " cost %d\n", iv_ca_cost (ivs));
4364 bitmap_print (file, ivs->cands, " candidates ","\n");
4366 for (i = 1; i <= data->max_inv_id; i++)
4367 if (ivs->n_invariant_uses[i])
4369 fprintf (file, "%s%d", pref, i);
4370 pref = ", ";
4372 fprintf (file, "\n");
4375 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4376 new set, and store differences in DELTA. Number of induction variables
4377 in the new set is stored to N_IVS. */
4379 static unsigned
4380 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4381 struct iv_cand *cand, struct iv_ca_delta **delta,
4382 unsigned *n_ivs)
4384 unsigned i, cost;
4385 struct iv_use *use;
4386 struct cost_pair *old_cp, *new_cp;
4388 *delta = NULL;
4389 for (i = 0; i < ivs->upto; i++)
4391 use = iv_use (data, i);
4392 old_cp = iv_ca_cand_for_use (ivs, use);
4394 if (old_cp
4395 && old_cp->cand == cand)
4396 continue;
4398 new_cp = get_use_iv_cost (data, use, cand);
4399 if (!new_cp)
4400 continue;
4402 if (!iv_ca_has_deps (ivs, new_cp))
4403 continue;
4405 if (!cheaper_cost_pair (new_cp, old_cp))
4406 continue;
4408 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4411 iv_ca_delta_commit (data, ivs, *delta, true);
4412 cost = iv_ca_cost (ivs);
4413 if (n_ivs)
4414 *n_ivs = iv_ca_n_cands (ivs);
4415 iv_ca_delta_commit (data, ivs, *delta, false);
4417 return cost;
4420 /* Try narrowing set IVS by removing CAND. Return the cost of
4421 the new set and store the differences in DELTA. */
4423 static unsigned
4424 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4425 struct iv_cand *cand, struct iv_ca_delta **delta)
4427 unsigned i, ci;
4428 struct iv_use *use;
4429 struct cost_pair *old_cp, *new_cp, *cp;
4430 bitmap_iterator bi;
4431 struct iv_cand *cnd;
4432 unsigned cost;
4434 *delta = NULL;
4435 for (i = 0; i < n_iv_uses (data); i++)
4437 use = iv_use (data, i);
4439 old_cp = iv_ca_cand_for_use (ivs, use);
4440 if (old_cp->cand != cand)
4441 continue;
4443 new_cp = NULL;
4445 if (data->consider_all_candidates)
4447 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4449 if (ci == cand->id)
4450 continue;
4452 cnd = iv_cand (data, ci);
4454 cp = get_use_iv_cost (data, use, cnd);
4455 if (!cp)
4456 continue;
4457 if (!iv_ca_has_deps (ivs, cp))
4458 continue;
4460 if (!cheaper_cost_pair (cp, new_cp))
4461 continue;
4463 new_cp = cp;
4466 else
4468 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4470 if (ci == cand->id)
4471 continue;
4473 cnd = iv_cand (data, ci);
4475 cp = get_use_iv_cost (data, use, cnd);
4476 if (!cp)
4477 continue;
4478 if (!iv_ca_has_deps (ivs, cp))
4479 continue;
4481 if (!cheaper_cost_pair (cp, new_cp))
4482 continue;
4484 new_cp = cp;
4488 if (!new_cp)
4490 iv_ca_delta_free (delta);
4491 return INFTY;
4494 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4497 iv_ca_delta_commit (data, ivs, *delta, true);
4498 cost = iv_ca_cost (ivs);
4499 iv_ca_delta_commit (data, ivs, *delta, false);
4501 return cost;
4504 /* Try optimizing the set of candidates IVS by removing candidates different
4505 from to EXCEPT_CAND from it. Return cost of the new set, and store
4506 differences in DELTA. */
4508 static unsigned
4509 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4510 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4512 bitmap_iterator bi;
4513 struct iv_ca_delta *act_delta, *best_delta;
4514 unsigned i, best_cost, acost;
4515 struct iv_cand *cand;
4517 best_delta = NULL;
4518 best_cost = iv_ca_cost (ivs);
4520 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4522 cand = iv_cand (data, i);
4524 if (cand == except_cand)
4525 continue;
4527 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4529 if (acost < best_cost)
4531 best_cost = acost;
4532 iv_ca_delta_free (&best_delta);
4533 best_delta = act_delta;
4535 else
4536 iv_ca_delta_free (&act_delta);
4539 if (!best_delta)
4541 *delta = NULL;
4542 return best_cost;
4545 /* Recurse to possibly remove other unnecessary ivs. */
4546 iv_ca_delta_commit (data, ivs, best_delta, true);
4547 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4548 iv_ca_delta_commit (data, ivs, best_delta, false);
4549 *delta = iv_ca_delta_join (best_delta, *delta);
4550 return best_cost;
4553 /* Tries to extend the sets IVS in the best possible way in order
4554 to express the USE. */
4556 static bool
4557 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4558 struct iv_use *use)
4560 unsigned best_cost, act_cost;
4561 unsigned i;
4562 bitmap_iterator bi;
4563 struct iv_cand *cand;
4564 struct iv_ca_delta *best_delta = NULL, *act_delta;
4565 struct cost_pair *cp;
4567 iv_ca_add_use (data, ivs, use);
4568 best_cost = iv_ca_cost (ivs);
4570 cp = iv_ca_cand_for_use (ivs, use);
4571 if (cp)
4573 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4574 iv_ca_set_no_cp (data, ivs, use);
4577 /* First try important candidates. Only if it fails, try the specific ones.
4578 Rationale -- in loops with many variables the best choice often is to use
4579 just one generic biv. If we added here many ivs specific to the uses,
4580 the optimization algorithm later would be likely to get stuck in a local
4581 minimum, thus causing us to create too many ivs. The approach from
4582 few ivs to more seems more likely to be successful -- starting from few
4583 ivs, replacing an expensive use by a specific iv should always be a
4584 win. */
4585 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4587 cand = iv_cand (data, i);
4589 if (iv_ca_cand_used_p (ivs, cand))
4590 continue;
4592 cp = get_use_iv_cost (data, use, cand);
4593 if (!cp)
4594 continue;
4596 iv_ca_set_cp (data, ivs, use, cp);
4597 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4598 iv_ca_set_no_cp (data, ivs, use);
4599 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4601 if (act_cost < best_cost)
4603 best_cost = act_cost;
4605 iv_ca_delta_free (&best_delta);
4606 best_delta = act_delta;
4608 else
4609 iv_ca_delta_free (&act_delta);
4612 if (best_cost == INFTY)
4614 for (i = 0; i < use->n_map_members; i++)
4616 cp = use->cost_map + i;
4617 cand = cp->cand;
4618 if (!cand)
4619 continue;
4621 /* Already tried this. */
4622 if (cand->important)
4623 continue;
4625 if (iv_ca_cand_used_p (ivs, cand))
4626 continue;
4628 act_delta = NULL;
4629 iv_ca_set_cp (data, ivs, use, cp);
4630 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4631 iv_ca_set_no_cp (data, ivs, use);
4632 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4633 cp, act_delta);
4635 if (act_cost < best_cost)
4637 best_cost = act_cost;
4639 if (best_delta)
4640 iv_ca_delta_free (&best_delta);
4641 best_delta = act_delta;
4643 else
4644 iv_ca_delta_free (&act_delta);
4648 iv_ca_delta_commit (data, ivs, best_delta, true);
4649 iv_ca_delta_free (&best_delta);
4651 return (best_cost != INFTY);
4654 /* Finds an initial assignment of candidates to uses. */
4656 static struct iv_ca *
4657 get_initial_solution (struct ivopts_data *data)
4659 struct iv_ca *ivs = iv_ca_new (data);
4660 unsigned i;
4662 for (i = 0; i < n_iv_uses (data); i++)
4663 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4665 iv_ca_free (&ivs);
4666 return NULL;
4669 return ivs;
4672 /* Tries to improve set of induction variables IVS. */
4674 static bool
4675 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4677 unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
4678 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4679 struct iv_cand *cand;
4681 /* Try extending the set of induction variables by one. */
4682 for (i = 0; i < n_iv_cands (data); i++)
4684 cand = iv_cand (data, i);
4686 if (iv_ca_cand_used_p (ivs, cand))
4687 continue;
4689 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4690 if (!act_delta)
4691 continue;
4693 /* If we successfully added the candidate and the set is small enough,
4694 try optimizing it by removing other candidates. */
4695 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4697 iv_ca_delta_commit (data, ivs, act_delta, true);
4698 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4699 iv_ca_delta_commit (data, ivs, act_delta, false);
4700 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4703 if (acost < best_cost)
4705 best_cost = acost;
4706 iv_ca_delta_free (&best_delta);
4707 best_delta = act_delta;
4709 else
4710 iv_ca_delta_free (&act_delta);
4713 if (!best_delta)
4715 /* Try removing the candidates from the set instead. */
4716 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4718 /* Nothing more we can do. */
4719 if (!best_delta)
4720 return false;
4723 iv_ca_delta_commit (data, ivs, best_delta, true);
4724 gcc_assert (best_cost == iv_ca_cost (ivs));
4725 iv_ca_delta_free (&best_delta);
4726 return true;
4729 /* Attempts to find the optimal set of induction variables. We do simple
4730 greedy heuristic -- we try to replace at most one candidate in the selected
4731 solution and remove the unused ivs while this improves the cost. */
4733 static struct iv_ca *
4734 find_optimal_iv_set (struct ivopts_data *data)
4736 unsigned i;
4737 struct iv_ca *set;
4738 struct iv_use *use;
4740 /* Get the initial solution. */
4741 set = get_initial_solution (data);
4742 if (!set)
4744 if (dump_file && (dump_flags & TDF_DETAILS))
4745 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4746 return NULL;
4749 if (dump_file && (dump_flags & TDF_DETAILS))
4751 fprintf (dump_file, "Initial set of candidates:\n");
4752 iv_ca_dump (data, dump_file, set);
4755 while (try_improve_iv_set (data, set))
4757 if (dump_file && (dump_flags & TDF_DETAILS))
4759 fprintf (dump_file, "Improved to:\n");
4760 iv_ca_dump (data, dump_file, set);
4764 if (dump_file && (dump_flags & TDF_DETAILS))
4765 fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
4767 for (i = 0; i < n_iv_uses (data); i++)
4769 use = iv_use (data, i);
4770 use->selected = iv_ca_cand_for_use (set, use)->cand;
4773 return set;
4776 /* Creates a new induction variable corresponding to CAND. */
4778 static void
4779 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4781 block_stmt_iterator incr_pos;
4782 tree base;
4783 bool after = false;
4785 if (!cand->iv)
4786 return;
4788 switch (cand->pos)
4790 case IP_NORMAL:
4791 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4792 break;
4794 case IP_END:
4795 incr_pos = bsi_last (ip_end_pos (data->current_loop));
4796 after = true;
4797 break;
4799 case IP_ORIGINAL:
4800 /* Mark that the iv is preserved. */
4801 name_info (data, cand->var_before)->preserve_biv = true;
4802 name_info (data, cand->var_after)->preserve_biv = true;
4804 /* Rewrite the increment so that it uses var_before directly. */
4805 find_interesting_uses_op (data, cand->var_after)->selected = cand;
4807 return;
4810 gimple_add_tmp_var (cand->var_before);
4811 add_referenced_var (cand->var_before);
4813 base = unshare_expr (cand->iv->base);
4815 create_iv (base, unshare_expr (cand->iv->step),
4816 cand->var_before, data->current_loop,
4817 &incr_pos, after, &cand->var_before, &cand->var_after);
4820 /* Creates new induction variables described in SET. */
4822 static void
4823 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4825 unsigned i;
4826 struct iv_cand *cand;
4827 bitmap_iterator bi;
4829 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4831 cand = iv_cand (data, i);
4832 create_new_iv (data, cand);
4836 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
4837 is true, remove also the ssa name defined by the statement. */
4839 static void
4840 remove_statement (tree stmt, bool including_defined_name)
4842 if (TREE_CODE (stmt) == PHI_NODE)
4844 remove_phi_node (stmt, NULL_TREE, including_defined_name);
4846 else
4848 block_stmt_iterator bsi = bsi_for_stmt (stmt);
4850 bsi_remove (&bsi, true);
4851 release_defs (stmt);
4855 /* Rewrites USE (definition of iv used in a nonlinear expression)
4856 using candidate CAND. */
4858 static void
4859 rewrite_use_nonlinear_expr (struct ivopts_data *data,
4860 struct iv_use *use, struct iv_cand *cand)
4862 tree comp;
4863 tree op, tgt, ass;
4864 block_stmt_iterator bsi;
4866 /* An important special case -- if we are asked to express value of
4867 the original iv by itself, just exit; there is no need to
4868 introduce a new computation (that might also need casting the
4869 variable to unsigned and back). */
4870 if (cand->pos == IP_ORIGINAL
4871 && cand->incremented_at == use->stmt)
4873 tree step, ctype, utype;
4874 enum tree_code incr_code = PLUS_EXPR;
4876 gcc_assert (TREE_CODE (use->stmt) == GIMPLE_MODIFY_STMT);
4877 gcc_assert (GIMPLE_STMT_OPERAND (use->stmt, 0) == cand->var_after);
4879 step = cand->iv->step;
4880 ctype = TREE_TYPE (step);
4881 utype = TREE_TYPE (cand->var_after);
4882 if (TREE_CODE (step) == NEGATE_EXPR)
4884 incr_code = MINUS_EXPR;
4885 step = TREE_OPERAND (step, 0);
4888 /* Check whether we may leave the computation unchanged.
4889 This is the case only if it does not rely on other
4890 computations in the loop -- otherwise, the computation
4891 we rely upon may be removed in remove_unused_ivs,
4892 thus leading to ICE. */
4893 op = GIMPLE_STMT_OPERAND (use->stmt, 1);
4894 if (TREE_CODE (op) == PLUS_EXPR
4895 || TREE_CODE (op) == MINUS_EXPR
4896 || TREE_CODE (op) == POINTER_PLUS_EXPR)
4898 if (TREE_OPERAND (op, 0) == cand->var_before)
4899 op = TREE_OPERAND (op, 1);
4900 else if (TREE_CODE (op) != MINUS_EXPR
4901 && TREE_OPERAND (op, 1) == cand->var_before)
4902 op = TREE_OPERAND (op, 0);
4903 else
4904 op = NULL_TREE;
4906 else
4907 op = NULL_TREE;
4909 if (op
4910 && (TREE_CODE (op) == INTEGER_CST
4911 || operand_equal_p (op, step, 0)))
4912 return;
4914 /* Otherwise, add the necessary computations to express
4915 the iv. */
4916 op = fold_convert (ctype, cand->var_before);
4917 comp = fold_convert (utype,
4918 build2 (incr_code, ctype, op,
4919 unshare_expr (step)));
4921 else
4923 comp = get_computation (data->current_loop, use, cand);
4924 gcc_assert (comp != NULL_TREE);
4927 switch (TREE_CODE (use->stmt))
4929 case PHI_NODE:
4930 tgt = PHI_RESULT (use->stmt);
4932 /* If we should keep the biv, do not replace it. */
4933 if (name_info (data, tgt)->preserve_biv)
4934 return;
4936 bsi = bsi_after_labels (bb_for_stmt (use->stmt));
4937 break;
4939 case GIMPLE_MODIFY_STMT:
4940 tgt = GIMPLE_STMT_OPERAND (use->stmt, 0);
4941 bsi = bsi_for_stmt (use->stmt);
4942 break;
4944 default:
4945 gcc_unreachable ();
4948 op = force_gimple_operand_bsi (&bsi, comp, false, SSA_NAME_VAR (tgt),
4949 true, BSI_SAME_STMT);
4951 if (TREE_CODE (use->stmt) == PHI_NODE)
4953 ass = build_gimple_modify_stmt (tgt, op);
4954 bsi_insert_before (&bsi, ass, BSI_SAME_STMT);
4955 remove_statement (use->stmt, false);
4956 SSA_NAME_DEF_STMT (tgt) = ass;
4958 else
4959 GIMPLE_STMT_OPERAND (use->stmt, 1) = op;
4962 /* Replaces ssa name in index IDX by its basic variable. Callback for
4963 for_each_index. */
4965 static bool
4966 idx_remove_ssa_names (tree base, tree *idx,
4967 void *data ATTRIBUTE_UNUSED)
4969 tree *op;
4971 if (TREE_CODE (*idx) == SSA_NAME)
4972 *idx = SSA_NAME_VAR (*idx);
4974 if (TREE_CODE (base) == ARRAY_REF)
4976 op = &TREE_OPERAND (base, 2);
4977 if (*op
4978 && TREE_CODE (*op) == SSA_NAME)
4979 *op = SSA_NAME_VAR (*op);
4980 op = &TREE_OPERAND (base, 3);
4981 if (*op
4982 && TREE_CODE (*op) == SSA_NAME)
4983 *op = SSA_NAME_VAR (*op);
4986 return true;
4989 /* Unshares REF and replaces ssa names inside it by their basic variables. */
4991 static tree
4992 unshare_and_remove_ssa_names (tree ref)
4994 ref = unshare_expr (ref);
4995 for_each_index (&ref, idx_remove_ssa_names, NULL);
4997 return ref;
5000 /* Extract the alias analysis info for the memory reference REF. There are
5001 several ways how this information may be stored and what precisely is
5002 its semantics depending on the type of the reference, but there always is
5003 somewhere hidden one _DECL node that is used to determine the set of
5004 virtual operands for the reference. The code below deciphers this jungle
5005 and extracts this single useful piece of information. */
5007 static tree
5008 get_ref_tag (tree ref, tree orig)
5010 tree var = get_base_address (ref);
5011 tree aref = NULL_TREE, tag, sv;
5012 HOST_WIDE_INT offset, size, maxsize;
5014 for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5016 aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5017 if (ref)
5018 break;
5021 if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5022 return aref;
5024 if (!var)
5025 return NULL_TREE;
5027 if (TREE_CODE (var) == INDIRECT_REF)
5029 /* If the base is a dereference of a pointer, first check its name memory
5030 tag. If it does not have one, use its symbol memory tag. */
5031 var = TREE_OPERAND (var, 0);
5032 if (TREE_CODE (var) != SSA_NAME)
5033 return NULL_TREE;
5035 if (SSA_NAME_PTR_INFO (var))
5037 tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5038 if (tag)
5039 return tag;
5042 var = SSA_NAME_VAR (var);
5043 tag = symbol_mem_tag (var);
5044 gcc_assert (tag != NULL_TREE);
5045 return tag;
5047 else
5049 if (!DECL_P (var))
5050 return NULL_TREE;
5052 tag = symbol_mem_tag (var);
5053 if (tag)
5054 return tag;
5056 return var;
5060 /* Copies the reference information from OLD_REF to NEW_REF. */
5062 static void
5063 copy_ref_info (tree new_ref, tree old_ref)
5065 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5066 copy_mem_ref_info (new_ref, old_ref);
5067 else
5069 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5070 TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5074 /* Rewrites USE (address that is an iv) using candidate CAND. */
5076 static void
5077 rewrite_use_address (struct ivopts_data *data,
5078 struct iv_use *use, struct iv_cand *cand)
5080 aff_tree aff;
5081 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5082 tree ref;
5083 bool ok;
5085 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5086 gcc_assert (ok);
5087 unshare_aff_combination (&aff);
5089 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5090 copy_ref_info (ref, *use->op_p);
5091 *use->op_p = ref;
5094 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5095 candidate CAND. */
5097 static void
5098 rewrite_use_compare (struct ivopts_data *data,
5099 struct iv_use *use, struct iv_cand *cand)
5101 tree comp, *var_p, op, bound;
5102 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5103 enum tree_code compare;
5104 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5105 bool ok;
5107 bound = cp->value;
5108 if (bound)
5110 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5111 tree var_type = TREE_TYPE (var);
5113 compare = iv_elimination_compare (data, use);
5114 bound = unshare_expr (fold_convert (var_type, bound));
5115 op = force_gimple_operand_bsi (&bsi, bound, true, NULL_TREE,
5116 true, BSI_SAME_STMT);
5118 *use->op_p = build2 (compare, boolean_type_node, var, op);
5119 return;
5122 /* The induction variable elimination failed; just express the original
5123 giv. */
5124 comp = get_computation (data->current_loop, use, cand);
5125 gcc_assert (comp != NULL_TREE);
5127 ok = extract_cond_operands (data, use->op_p, &var_p, NULL, NULL, NULL);
5128 gcc_assert (ok);
5130 *var_p = force_gimple_operand_bsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
5131 true, BSI_SAME_STMT);
5134 /* Rewrites USE using candidate CAND. */
5136 static void
5137 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5139 push_stmt_changes (&use->stmt);
5141 switch (use->type)
5143 case USE_NONLINEAR_EXPR:
5144 rewrite_use_nonlinear_expr (data, use, cand);
5145 break;
5147 case USE_ADDRESS:
5148 rewrite_use_address (data, use, cand);
5149 break;
5151 case USE_COMPARE:
5152 rewrite_use_compare (data, use, cand);
5153 break;
5155 default:
5156 gcc_unreachable ();
5159 pop_stmt_changes (&use->stmt);
5162 /* Rewrite the uses using the selected induction variables. */
5164 static void
5165 rewrite_uses (struct ivopts_data *data)
5167 unsigned i;
5168 struct iv_cand *cand;
5169 struct iv_use *use;
5171 for (i = 0; i < n_iv_uses (data); i++)
5173 use = iv_use (data, i);
5174 cand = use->selected;
5175 gcc_assert (cand);
5177 rewrite_use (data, use, cand);
5181 /* Removes the ivs that are not used after rewriting. */
5183 static void
5184 remove_unused_ivs (struct ivopts_data *data)
5186 unsigned j;
5187 bitmap_iterator bi;
5189 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5191 struct version_info *info;
5193 info = ver_info (data, j);
5194 if (info->iv
5195 && !integer_zerop (info->iv->step)
5196 && !info->inv_id
5197 && !info->iv->have_use_for
5198 && !info->preserve_biv)
5199 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5203 /* Frees data allocated by the optimization of a single loop. */
5205 static void
5206 free_loop_data (struct ivopts_data *data)
5208 unsigned i, j;
5209 bitmap_iterator bi;
5210 tree obj;
5212 if (data->niters)
5214 pointer_map_destroy (data->niters);
5215 data->niters = NULL;
5218 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5220 struct version_info *info;
5222 info = ver_info (data, i);
5223 if (info->iv)
5224 free (info->iv);
5225 info->iv = NULL;
5226 info->has_nonlin_use = false;
5227 info->preserve_biv = false;
5228 info->inv_id = 0;
5230 bitmap_clear (data->relevant);
5231 bitmap_clear (data->important_candidates);
5233 for (i = 0; i < n_iv_uses (data); i++)
5235 struct iv_use *use = iv_use (data, i);
5237 free (use->iv);
5238 BITMAP_FREE (use->related_cands);
5239 for (j = 0; j < use->n_map_members; j++)
5240 if (use->cost_map[j].depends_on)
5241 BITMAP_FREE (use->cost_map[j].depends_on);
5242 free (use->cost_map);
5243 free (use);
5245 VEC_truncate (iv_use_p, data->iv_uses, 0);
5247 for (i = 0; i < n_iv_cands (data); i++)
5249 struct iv_cand *cand = iv_cand (data, i);
5251 if (cand->iv)
5252 free (cand->iv);
5253 if (cand->depends_on)
5254 BITMAP_FREE (cand->depends_on);
5255 free (cand);
5257 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5259 if (data->version_info_size < num_ssa_names)
5261 data->version_info_size = 2 * num_ssa_names;
5262 free (data->version_info);
5263 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5266 data->max_inv_id = 0;
5268 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5269 SET_DECL_RTL (obj, NULL_RTX);
5271 VEC_truncate (tree, decl_rtl_to_reset, 0);
5274 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5275 loop tree. */
5277 static void
5278 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5280 free_loop_data (data);
5281 free (data->version_info);
5282 BITMAP_FREE (data->relevant);
5283 BITMAP_FREE (data->important_candidates);
5285 VEC_free (tree, heap, decl_rtl_to_reset);
5286 VEC_free (iv_use_p, heap, data->iv_uses);
5287 VEC_free (iv_cand_p, heap, data->iv_candidates);
5290 /* Optimizes the LOOP. Returns true if anything changed. */
5292 static bool
5293 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5295 bool changed = false;
5296 struct iv_ca *iv_ca;
5297 edge exit;
5299 gcc_assert (!data->niters);
5300 data->current_loop = loop;
5302 if (dump_file && (dump_flags & TDF_DETAILS))
5304 fprintf (dump_file, "Processing loop %d\n", loop->num);
5306 exit = single_dom_exit (loop);
5307 if (exit)
5309 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5310 exit->src->index, exit->dest->index);
5311 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5312 fprintf (dump_file, "\n");
5315 fprintf (dump_file, "\n");
5318 /* For each ssa name determines whether it behaves as an induction variable
5319 in some loop. */
5320 if (!find_induction_variables (data))
5321 goto finish;
5323 /* Finds interesting uses (item 1). */
5324 find_interesting_uses (data);
5325 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5326 goto finish;
5328 /* Finds candidates for the induction variables (item 2). */
5329 find_iv_candidates (data);
5331 /* Calculates the costs (item 3, part 1). */
5332 determine_use_iv_costs (data);
5333 determine_iv_costs (data);
5334 determine_set_costs (data);
5336 /* Find the optimal set of induction variables (item 3, part 2). */
5337 iv_ca = find_optimal_iv_set (data);
5338 if (!iv_ca)
5339 goto finish;
5340 changed = true;
5342 /* Create the new induction variables (item 4, part 1). */
5343 create_new_ivs (data, iv_ca);
5344 iv_ca_free (&iv_ca);
5346 /* Rewrite the uses (item 4, part 2). */
5347 rewrite_uses (data);
5349 /* Remove the ivs that are unused after rewriting. */
5350 remove_unused_ivs (data);
5352 /* We have changed the structure of induction variables; it might happen
5353 that definitions in the scev database refer to some of them that were
5354 eliminated. */
5355 scev_reset ();
5357 finish:
5358 free_loop_data (data);
5360 return changed;
5363 /* Main entry point. Optimizes induction variables in loops. */
5365 void
5366 tree_ssa_iv_optimize (void)
5368 struct loop *loop;
5369 struct ivopts_data data;
5370 loop_iterator li;
5372 tree_ssa_iv_optimize_init (&data);
5374 /* Optimize the loops starting with the innermost ones. */
5375 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5377 if (dump_file && (dump_flags & TDF_DETAILS))
5378 flow_loop_dump (loop, dump_file, NULL, 1);
5380 tree_ssa_iv_optimize_loop (&data, loop);
5383 tree_ssa_iv_optimize_finalize (&data);