* basic-block.h (ei_safe_edge): New function.
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob224a68ff3bc13a591a538011bbdff45c3c6f0fe2
1 /* Induction variable optimizations.
2 Copyright (C) 2003 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "hashtab.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
89 #include "cfgloop.h"
90 #include "params.h"
92 /* The infinite cost. */
93 #define INFTY 10000000
95 /* The expected number of loop iterations. TODO -- use profiling instead of
96 this. */
97 #define AVG_LOOP_NITER(LOOP) 5
99 /* Just to shorten the ugly names. */
100 #define EXEC_BINARY nondestructive_fold_binary_to_constant
101 #define EXEC_UNARY nondestructive_fold_unary_to_constant
103 /* Representation of the induction variable. */
104 struct iv
106 tree base; /* Initial value of the iv. */
107 tree step; /* Step of the iv (constant only). */
108 tree ssa_name; /* The ssa name with the value. */
109 bool biv_p; /* Is it a biv? */
110 bool have_use_for; /* Do we already have a use for it? */
111 unsigned use_id; /* The identifier in the use if it is the case. */
114 /* Per-ssa version information (induction variable descriptions, etc.). */
115 struct version_info
117 tree name; /* The ssa name. */
118 struct iv *iv; /* Induction variable description. */
119 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
120 an expression that is not an induction variable. */
121 unsigned inv_id; /* Id of an invariant. */
122 bool preserve_biv; /* For the original biv, whether to preserve it. */
125 /* Information attached to loop. */
126 struct loop_data
128 struct tree_niter_desc niter;
129 /* Number of iterations. */
131 unsigned regs_used; /* Number of registers used. */
134 /* Types of uses. */
135 enum use_type
137 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
138 USE_OUTER, /* The induction variable is used outside the loop. */
139 USE_ADDRESS, /* Use in an address. */
140 USE_COMPARE /* Use is a compare. */
143 /* The candidate - cost pair. */
144 struct cost_pair
146 struct iv_cand *cand; /* The candidate. */
147 unsigned cost; /* The cost. */
148 bitmap depends_on; /* The list of invariants that have to be
149 preserved. */
152 /* Use. */
153 struct iv_use
155 unsigned id; /* The id of the use. */
156 enum use_type type; /* Type of the use. */
157 struct iv *iv; /* The induction variable it is based on. */
158 tree stmt; /* Statement in that it occurs. */
159 tree *op_p; /* The place where it occurs. */
160 bitmap related_cands; /* The set of "related" iv candidates. */
162 unsigned n_map_members; /* Number of candidates in the cost_map list. */
163 struct cost_pair *cost_map;
164 /* The costs wrto the iv candidates. */
166 struct iv_cand *selected;
167 /* The selected candidate. */
170 /* The position where the iv is computed. */
171 enum iv_position
173 IP_NORMAL, /* At the end, just before the exit condition. */
174 IP_END, /* At the end of the latch block. */
175 IP_ORIGINAL /* The original biv. */
178 /* The induction variable candidate. */
179 struct iv_cand
181 unsigned id; /* The number of the candidate. */
182 bool important; /* Whether this is an "important" candidate, i.e. such
183 that it should be considered by all uses. */
184 enum iv_position pos; /* Where it is computed. */
185 tree incremented_at; /* For original biv, the statement where it is
186 incremented. */
187 tree var_before; /* The variable used for it before increment. */
188 tree var_after; /* The variable used for it after increment. */
189 struct iv *iv; /* The value of the candidate. NULL for
190 "pseudocandidate" used to indicate the possibility
191 to replace the final value of an iv by direct
192 computation of the value. */
193 unsigned cost; /* Cost of the candidate. */
196 /* The data used by the induction variable optimizations. */
198 struct ivopts_data
200 /* The currently optimized loop. */
201 struct loop *current_loop;
203 /* The size of version_info array allocated. */
204 unsigned version_info_size;
206 /* The array of information for the ssa names. */
207 struct version_info *version_info;
209 /* The bitmap of indices in version_info whose value was changed. */
210 bitmap relevant;
212 /* The maximum invariant id. */
213 unsigned max_inv_id;
215 /* The uses of induction variables. */
216 varray_type iv_uses;
218 /* The candidates. */
219 varray_type iv_candidates;
221 /* Whether to consider just related and important candidates when replacing a
222 use. */
223 bool consider_all_candidates;
226 /* Bound on number of candidates below that all candidates are considered. */
228 #define CONSIDER_ALL_CANDIDATES_BOUND \
229 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
231 /* If there are more iv occurrences, we just give up (it is quite unlikely that
232 optimizing such a loop would help, and it would take ages). */
234 #define MAX_CONSIDERED_USES \
235 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
237 /* The list of trees for that the decl_rtl field must be reset is stored
238 here. */
240 static varray_type decl_rtl_to_reset;
242 /* Number of uses recorded in DATA. */
244 static inline unsigned
245 n_iv_uses (struct ivopts_data *data)
247 return VARRAY_ACTIVE_SIZE (data->iv_uses);
250 /* Ith use recorded in DATA. */
252 static inline struct iv_use *
253 iv_use (struct ivopts_data *data, unsigned i)
255 return VARRAY_GENERIC_PTR_NOGC (data->iv_uses, i);
258 /* Number of candidates recorded in DATA. */
260 static inline unsigned
261 n_iv_cands (struct ivopts_data *data)
263 return VARRAY_ACTIVE_SIZE (data->iv_candidates);
266 /* Ith candidate recorded in DATA. */
268 static inline struct iv_cand *
269 iv_cand (struct ivopts_data *data, unsigned i)
271 return VARRAY_GENERIC_PTR_NOGC (data->iv_candidates, i);
274 /* The data for LOOP. */
276 static inline struct loop_data *
277 loop_data (struct loop *loop)
279 return loop->aux;
282 /* The single loop exit if it dominates the latch, NULL otherwise. */
284 static edge
285 single_dom_exit (struct loop *loop)
287 edge exit = loop->single_exit;
289 if (!exit)
290 return NULL;
292 if (!just_once_each_iteration_p (loop, exit->src))
293 return NULL;
295 return exit;
298 /* Dumps information about the induction variable IV to FILE. */
300 extern void dump_iv (FILE *, struct iv *);
301 void
302 dump_iv (FILE *file, struct iv *iv)
304 fprintf (file, "ssa name ");
305 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
306 fprintf (file, "\n");
308 if (iv->step)
310 fprintf (file, " base ");
311 print_generic_expr (file, iv->base, TDF_SLIM);
312 fprintf (file, "\n");
314 fprintf (file, " step ");
315 print_generic_expr (file, iv->step, TDF_SLIM);
316 fprintf (file, "\n");
318 else
320 fprintf (file, " invariant ");
321 print_generic_expr (file, iv->base, TDF_SLIM);
322 fprintf (file, "\n");
325 if (iv->biv_p)
326 fprintf (file, " is a biv\n");
329 /* Dumps information about the USE to FILE. */
331 extern void dump_use (FILE *, struct iv_use *);
332 void
333 dump_use (FILE *file, struct iv_use *use)
335 struct iv *iv = use->iv;
337 fprintf (file, "use %d\n", use->id);
339 switch (use->type)
341 case USE_NONLINEAR_EXPR:
342 fprintf (file, " generic\n");
343 break;
345 case USE_OUTER:
346 fprintf (file, " outside\n");
347 break;
349 case USE_ADDRESS:
350 fprintf (file, " address\n");
351 break;
353 case USE_COMPARE:
354 fprintf (file, " compare\n");
355 break;
357 default:
358 gcc_unreachable ();
361 fprintf (file, " in statement ");
362 print_generic_expr (file, use->stmt, TDF_SLIM);
363 fprintf (file, "\n");
365 fprintf (file, " at position ");
366 if (use->op_p)
367 print_generic_expr (file, *use->op_p, TDF_SLIM);
368 fprintf (file, "\n");
370 if (iv->step)
372 fprintf (file, " base ");
373 print_generic_expr (file, iv->base, TDF_SLIM);
374 fprintf (file, "\n");
376 fprintf (file, " step ");
377 print_generic_expr (file, iv->step, TDF_SLIM);
378 fprintf (file, "\n");
380 else
382 fprintf (file, " invariant ");
383 print_generic_expr (file, iv->base, TDF_SLIM);
384 fprintf (file, "\n");
387 fprintf (file, " related candidates ");
388 dump_bitmap (file, use->related_cands);
391 /* Dumps information about the uses to FILE. */
393 extern void dump_uses (FILE *, struct ivopts_data *);
394 void
395 dump_uses (FILE *file, struct ivopts_data *data)
397 unsigned i;
398 struct iv_use *use;
400 for (i = 0; i < n_iv_uses (data); i++)
402 use = iv_use (data, i);
404 dump_use (file, use);
405 fprintf (file, "\n");
409 /* Dumps information about induction variable candidate CAND to FILE. */
411 extern void dump_cand (FILE *, struct iv_cand *);
412 void
413 dump_cand (FILE *file, struct iv_cand *cand)
415 struct iv *iv = cand->iv;
417 fprintf (file, "candidate %d%s\n",
418 cand->id, cand->important ? " (important)" : "");
420 if (!iv)
422 fprintf (file, " final value replacement\n");
423 return;
426 switch (cand->pos)
428 case IP_NORMAL:
429 fprintf (file, " incremented before exit test\n");
430 break;
432 case IP_END:
433 fprintf (file, " incremented at end\n");
434 break;
436 case IP_ORIGINAL:
437 fprintf (file, " original biv\n");
438 break;
441 if (iv->step)
443 fprintf (file, " base ");
444 print_generic_expr (file, iv->base, TDF_SLIM);
445 fprintf (file, "\n");
447 fprintf (file, " step ");
448 print_generic_expr (file, iv->step, TDF_SLIM);
449 fprintf (file, "\n");
451 else
453 fprintf (file, " invariant ");
454 print_generic_expr (file, iv->base, TDF_SLIM);
455 fprintf (file, "\n");
459 /* Returns the info for ssa version VER. */
461 static inline struct version_info *
462 ver_info (struct ivopts_data *data, unsigned ver)
464 return data->version_info + ver;
467 /* Returns the info for ssa name NAME. */
469 static inline struct version_info *
470 name_info (struct ivopts_data *data, tree name)
472 return ver_info (data, SSA_NAME_VERSION (name));
475 /* Checks whether there exists number X such that X * B = A, counting modulo
476 2^BITS. */
478 static bool
479 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
480 HOST_WIDE_INT *x)
482 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
483 unsigned HOST_WIDE_INT inv, ex, val;
484 unsigned i;
486 a &= mask;
487 b &= mask;
489 /* First divide the whole equation by 2 as long as possible. */
490 while (!(a & 1) && !(b & 1))
492 a >>= 1;
493 b >>= 1;
494 bits--;
495 mask >>= 1;
498 if (!(b & 1))
500 /* If b is still even, a is odd and there is no such x. */
501 return false;
504 /* Find the inverse of b. We compute it as
505 b^(2^(bits - 1) - 1) (mod 2^bits). */
506 inv = 1;
507 ex = b;
508 for (i = 0; i < bits - 1; i++)
510 inv = (inv * ex) & mask;
511 ex = (ex * ex) & mask;
514 val = (a * inv) & mask;
516 gcc_assert (((val * b) & mask) == a);
518 if ((val >> (bits - 1)) & 1)
519 val |= ~mask;
521 *x = val;
523 return true;
526 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
527 emitted in LOOP. */
529 static bool
530 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
532 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
534 gcc_assert (bb);
536 if (sbb == loop->latch)
537 return true;
539 if (sbb != bb)
540 return false;
542 return stmt == last_stmt (bb);
545 /* Returns true if STMT if after the place where the original induction
546 variable CAND is incremented. */
548 static bool
549 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
551 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
552 basic_block stmt_bb = bb_for_stmt (stmt);
553 block_stmt_iterator bsi;
555 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
556 return false;
558 if (stmt_bb != cand_bb)
559 return true;
561 /* Scan the block from the end, since the original ivs are usually
562 incremented at the end of the loop body. */
563 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
565 if (bsi_stmt (bsi) == cand->incremented_at)
566 return false;
567 if (bsi_stmt (bsi) == stmt)
568 return true;
572 /* Returns true if STMT if after the place where the induction variable
573 CAND is incremented in LOOP. */
575 static bool
576 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
578 switch (cand->pos)
580 case IP_END:
581 return false;
583 case IP_NORMAL:
584 return stmt_after_ip_normal_pos (loop, stmt);
586 case IP_ORIGINAL:
587 return stmt_after_ip_original_pos (cand, stmt);
589 default:
590 gcc_unreachable ();
594 /* Initializes data structures used by the iv optimization pass, stored
595 in DATA. LOOPS is the loop tree. */
597 static void
598 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
600 unsigned i;
602 data->version_info_size = 2 * num_ssa_names;
603 data->version_info = xcalloc (data->version_info_size,
604 sizeof (struct version_info));
605 data->relevant = BITMAP_XMALLOC ();
606 data->max_inv_id = 0;
608 for (i = 1; i < loops->num; i++)
609 if (loops->parray[i])
610 loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
612 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_uses, 20, "iv_uses");
613 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_candidates, 20, "iv_candidates");
614 VARRAY_GENERIC_PTR_NOGC_INIT (decl_rtl_to_reset, 20, "decl_rtl_to_reset");
617 /* Allocates an induction variable with given initial value BASE and step STEP
618 for loop LOOP. */
620 static struct iv *
621 alloc_iv (tree base, tree step)
623 struct iv *iv = xcalloc (1, sizeof (struct iv));
625 if (step && integer_zerop (step))
626 step = NULL_TREE;
628 iv->base = base;
629 iv->step = step;
630 iv->biv_p = false;
631 iv->have_use_for = false;
632 iv->use_id = 0;
633 iv->ssa_name = NULL_TREE;
635 return iv;
638 /* Sets STEP and BASE for induction variable IV. */
640 static void
641 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
643 struct version_info *info = name_info (data, iv);
645 gcc_assert (!info->iv);
647 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
648 info->iv = alloc_iv (base, step);
649 info->iv->ssa_name = iv;
652 /* Finds induction variable declaration for VAR. */
654 static struct iv *
655 get_iv (struct ivopts_data *data, tree var)
657 basic_block bb;
659 if (!name_info (data, var)->iv)
661 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
663 if (!bb
664 || !flow_bb_inside_loop_p (data->current_loop, bb))
665 set_iv (data, var, var, NULL_TREE);
668 return name_info (data, var)->iv;
671 /* Determines the step of a biv defined in PHI. */
673 static tree
674 determine_biv_step (tree phi)
676 struct loop *loop = bb_for_stmt (phi)->loop_father;
677 tree name = PHI_RESULT (phi), base, step;
678 tree type = TREE_TYPE (name);
680 if (!is_gimple_reg (name))
681 return NULL_TREE;
683 if (!simple_iv (loop, phi, name, &base, &step))
684 return NULL_TREE;
686 if (!step)
687 return build_int_cst (type, 0);
689 return step;
692 /* Returns false if INDEX is a ssa name that occurs in an
693 abnormal phi node. Callback for for_each_index. */
695 static bool
696 idx_contains_abnormal_ssa_name_p (tree base ATTRIBUTE_UNUSED, tree *index,
697 void *data ATTRIBUTE_UNUSED)
699 if (TREE_CODE (*index) != SSA_NAME)
700 return true;
702 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*index) == 0;
705 /* Returns true if EXPR contains a ssa name that occurs in an
706 abnormal phi node. */
708 static bool
709 contains_abnormal_ssa_name_p (tree expr)
711 enum tree_code code = TREE_CODE (expr);
712 char class = TREE_CODE_CLASS (code);
714 if (code == SSA_NAME)
715 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
717 if (code == INTEGER_CST
718 || is_gimple_min_invariant (expr))
719 return false;
721 if (code == ADDR_EXPR)
722 return !for_each_index (&TREE_OPERAND (expr, 1),
723 idx_contains_abnormal_ssa_name_p,
724 NULL);
726 switch (class)
728 case '2':
729 case '<':
730 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
731 return true;
733 /* Fallthru. */
734 case '1':
735 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
736 return true;
738 break;
740 default:
741 gcc_unreachable ();
744 return false;
747 /* Finds basic ivs. */
749 static bool
750 find_bivs (struct ivopts_data *data)
752 tree phi, step, type, base;
753 bool found = false;
754 struct loop *loop = data->current_loop;
756 for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
758 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
759 continue;
761 step = determine_biv_step (phi);
763 if (!step)
764 continue;
765 if (cst_and_fits_in_hwi (step)
766 && int_cst_value (step) == 0)
767 continue;
769 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
770 if (contains_abnormal_ssa_name_p (base))
771 continue;
773 type = TREE_TYPE (PHI_RESULT (phi));
774 base = fold_convert (type, base);
775 step = fold_convert (type, step);
777 /* FIXME: We do not handle induction variables whose step does
778 not satisfy cst_and_fits_in_hwi. */
779 if (!cst_and_fits_in_hwi (step))
780 continue;
782 set_iv (data, PHI_RESULT (phi), base, step);
783 found = true;
786 return found;
789 /* Marks basic ivs. */
791 static void
792 mark_bivs (struct ivopts_data *data)
794 tree phi, var;
795 struct iv *iv, *incr_iv;
796 struct loop *loop = data->current_loop;
797 basic_block incr_bb;
799 for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
801 iv = get_iv (data, PHI_RESULT (phi));
802 if (!iv)
803 continue;
805 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
806 incr_iv = get_iv (data, var);
807 if (!incr_iv)
808 continue;
810 /* If the increment is in the subloop, ignore it. */
811 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
812 if (incr_bb->loop_father != data->current_loop
813 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
814 continue;
816 iv->biv_p = true;
817 incr_iv->biv_p = true;
821 /* Checks whether STMT defines a linear induction variable and stores its
822 parameters to BASE and STEP. */
824 static bool
825 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
826 tree *base, tree *step)
828 tree lhs;
829 struct loop *loop = data->current_loop;
831 *base = NULL_TREE;
832 *step = NULL_TREE;
834 if (TREE_CODE (stmt) != MODIFY_EXPR)
835 return false;
837 lhs = TREE_OPERAND (stmt, 0);
838 if (TREE_CODE (lhs) != SSA_NAME)
839 return false;
841 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step))
842 return false;
844 /* FIXME: We do not handle induction variables whose step does
845 not satisfy cst_and_fits_in_hwi. */
846 if (!zero_p (*step)
847 && !cst_and_fits_in_hwi (*step))
848 return false;
850 if (contains_abnormal_ssa_name_p (*base))
851 return false;
853 return true;
856 /* Finds general ivs in statement STMT. */
858 static void
859 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
861 tree base, step;
863 if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
864 return;
866 set_iv (data, TREE_OPERAND (stmt, 0), base, step);
869 /* Finds general ivs in basic block BB. */
871 static void
872 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
874 block_stmt_iterator bsi;
876 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
877 find_givs_in_stmt (data, bsi_stmt (bsi));
880 /* Finds general ivs. */
882 static void
883 find_givs (struct ivopts_data *data)
885 struct loop *loop = data->current_loop;
886 basic_block *body = get_loop_body_in_dom_order (loop);
887 unsigned i;
889 for (i = 0; i < loop->num_nodes; i++)
890 find_givs_in_bb (data, body[i]);
891 free (body);
894 /* Determine the number of iterations of the current loop. */
896 static void
897 determine_number_of_iterations (struct ivopts_data *data)
899 struct loop *loop = data->current_loop;
900 edge exit = single_dom_exit (loop);
902 if (!exit)
903 return;
905 number_of_iterations_exit (loop, exit, &loop_data (loop)->niter);
908 /* For each ssa name defined in LOOP determines whether it is an induction
909 variable and if so, its initial value and step. */
911 static bool
912 find_induction_variables (struct ivopts_data *data)
914 unsigned i;
915 struct loop *loop = data->current_loop;
917 if (!find_bivs (data))
918 return false;
920 find_givs (data);
921 mark_bivs (data);
922 determine_number_of_iterations (data);
924 if (dump_file && (dump_flags & TDF_DETAILS))
926 if (loop_data (loop)->niter.niter)
928 fprintf (dump_file, " number of iterations ");
929 print_generic_expr (dump_file, loop_data (loop)->niter.niter,
930 TDF_SLIM);
931 fprintf (dump_file, "\n");
933 fprintf (dump_file, " may be zero if ");
934 print_generic_expr (dump_file, loop_data (loop)->niter.may_be_zero,
935 TDF_SLIM);
936 fprintf (dump_file, "\n");
938 fprintf (dump_file, " bogus unless ");
939 print_generic_expr (dump_file, loop_data (loop)->niter.assumptions,
940 TDF_SLIM);
941 fprintf (dump_file, "\n");
942 fprintf (dump_file, "\n");
945 fprintf (dump_file, "Induction variables:\n\n");
947 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i,
949 if (ver_info (data, i)->iv)
950 dump_iv (dump_file, ver_info (data, i)->iv);
954 return true;
957 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
959 static struct iv_use *
960 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
961 tree stmt, enum use_type use_type)
963 struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
965 use->id = n_iv_uses (data);
966 use->type = use_type;
967 use->iv = iv;
968 use->stmt = stmt;
969 use->op_p = use_p;
970 use->related_cands = BITMAP_XMALLOC ();
972 if (dump_file && (dump_flags & TDF_DETAILS))
973 dump_use (dump_file, use);
975 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_uses, use);
977 return use;
980 /* Checks whether OP is a loop-level invariant and if so, records it.
981 NONLINEAR_USE is true if the invariant is used in a way we do not
982 handle specially. */
984 static void
985 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
987 basic_block bb;
988 struct version_info *info;
990 if (TREE_CODE (op) != SSA_NAME
991 || !is_gimple_reg (op))
992 return;
994 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
995 if (bb
996 && flow_bb_inside_loop_p (data->current_loop, bb))
997 return;
999 info = name_info (data, op);
1000 info->name = op;
1001 info->has_nonlin_use |= nonlinear_use;
1002 if (!info->inv_id)
1003 info->inv_id = ++data->max_inv_id;
1004 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1007 /* Checks whether the use OP is interesting and if so, records it
1008 as TYPE. */
1010 static struct iv_use *
1011 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1012 enum use_type type)
1014 struct iv *iv;
1015 struct iv *civ;
1016 tree stmt;
1017 struct iv_use *use;
1019 if (TREE_CODE (op) != SSA_NAME)
1020 return NULL;
1022 iv = get_iv (data, op);
1023 if (!iv)
1024 return NULL;
1026 if (iv->have_use_for)
1028 use = iv_use (data, iv->use_id);
1030 gcc_assert (use->type == USE_NONLINEAR_EXPR
1031 || use->type == USE_OUTER);
1033 if (type == USE_NONLINEAR_EXPR)
1034 use->type = USE_NONLINEAR_EXPR;
1035 return use;
1038 if (zero_p (iv->step))
1040 record_invariant (data, op, true);
1041 return NULL;
1043 iv->have_use_for = true;
1045 civ = xmalloc (sizeof (struct iv));
1046 *civ = *iv;
1048 stmt = SSA_NAME_DEF_STMT (op);
1049 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1050 || TREE_CODE (stmt) == MODIFY_EXPR);
1052 use = record_use (data, NULL, civ, stmt, type);
1053 iv->use_id = use->id;
1055 return use;
1058 /* Checks whether the use OP is interesting and if so, records it. */
1060 static struct iv_use *
1061 find_interesting_uses_op (struct ivopts_data *data, tree op)
1063 return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1066 /* Records a definition of induction variable OP that is used outside of the
1067 loop. */
1069 static struct iv_use *
1070 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1072 return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1075 /* Checks whether the condition *COND_P in STMT is interesting
1076 and if so, records it. */
1078 static void
1079 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1081 tree *op0_p;
1082 tree *op1_p;
1083 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1084 struct iv const_iv;
1085 tree zero = integer_zero_node;
1087 const_iv.step = NULL_TREE;
1089 if (integer_zerop (*cond_p)
1090 || integer_nonzerop (*cond_p))
1091 return;
1093 if (TREE_CODE (*cond_p) == SSA_NAME)
1095 op0_p = cond_p;
1096 op1_p = &zero;
1098 else
1100 op0_p = &TREE_OPERAND (*cond_p, 0);
1101 op1_p = &TREE_OPERAND (*cond_p, 1);
1104 if (TREE_CODE (*op0_p) == SSA_NAME)
1105 iv0 = get_iv (data, *op0_p);
1106 else
1107 iv0 = &const_iv;
1109 if (TREE_CODE (*op1_p) == SSA_NAME)
1110 iv1 = get_iv (data, *op1_p);
1111 else
1112 iv1 = &const_iv;
1114 if (/* When comparing with non-invariant value, we may not do any senseful
1115 induction variable elimination. */
1116 (!iv0 || !iv1)
1117 /* Eliminating condition based on two ivs would be nontrivial.
1118 ??? TODO -- it is not really important to handle this case. */
1119 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1121 find_interesting_uses_op (data, *op0_p);
1122 find_interesting_uses_op (data, *op1_p);
1123 return;
1126 if (zero_p (iv0->step) && zero_p (iv1->step))
1128 /* If both are invariants, this is a work for unswitching. */
1129 return;
1132 civ = xmalloc (sizeof (struct iv));
1133 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1134 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1137 /* Cumulates the steps of indices into DATA and replaces their values with the
1138 initial ones. Returns false when the value of the index cannot be determined.
1139 Callback for for_each_index. */
1141 struct ifs_ivopts_data
1143 struct ivopts_data *ivopts_data;
1144 tree stmt;
1145 tree *step_p;
1148 static bool
1149 idx_find_step (tree base, tree *idx, void *data)
1151 struct ifs_ivopts_data *dta = data;
1152 struct iv *iv;
1153 tree step, type, iv_type, iv_step;
1155 if (TREE_CODE (*idx) != SSA_NAME)
1156 return true;
1158 iv = get_iv (dta->ivopts_data, *idx);
1159 if (!iv)
1160 return false;
1162 *idx = iv->base;
1164 if (!iv->step)
1165 return true;
1167 iv_type = TREE_TYPE (iv->base);
1168 type = build_pointer_type (TREE_TYPE (base));
1169 if (TREE_CODE (base) == ARRAY_REF)
1170 step = array_ref_element_size (base);
1171 else
1172 /* The step for pointer arithmetics already is 1 byte. */
1173 step = build_int_cst (type, 1);
1175 if (TYPE_PRECISION (iv_type) < TYPE_PRECISION (type))
1176 iv_step = can_count_iv_in_wider_type (dta->ivopts_data->current_loop,
1177 type, iv->base, iv->step, dta->stmt);
1178 else
1179 iv_step = fold_convert (iv_type, iv->step);
1181 if (!iv_step)
1183 /* The index might wrap. */
1184 return false;
1187 step = EXEC_BINARY (MULT_EXPR, type, step, iv_step);
1189 if (!*dta->step_p)
1190 *dta->step_p = step;
1191 else
1192 *dta->step_p = EXEC_BINARY (PLUS_EXPR, type, *dta->step_p, step);
1194 return true;
1197 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1198 object is passed to it in DATA. */
1200 static bool
1201 idx_record_use (tree base ATTRIBUTE_UNUSED, tree *idx,
1202 void *data)
1204 find_interesting_uses_op (data, *idx);
1205 return true;
1208 /* Finds addresses in *OP_P inside STMT. */
1210 static void
1211 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1213 tree base = unshare_expr (*op_p), step = NULL;
1214 struct iv *civ;
1215 struct ifs_ivopts_data ifs_ivopts_data;
1217 /* Ignore bitfields for now. Not really something terribly complicated
1218 to handle. TODO. */
1219 if (TREE_CODE (base) == COMPONENT_REF
1220 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1221 goto fail;
1223 ifs_ivopts_data.ivopts_data = data;
1224 ifs_ivopts_data.stmt = stmt;
1225 ifs_ivopts_data.step_p = &step;
1226 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1227 || zero_p (step))
1228 goto fail;
1230 if (TREE_CODE (base) == INDIRECT_REF)
1231 base = TREE_OPERAND (base, 0);
1232 else
1233 base = build_addr (base);
1235 civ = alloc_iv (base, step);
1236 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1237 return;
1239 fail:
1240 for_each_index (op_p, idx_record_use, data);
1243 /* Finds and records invariants used in STMT. */
1245 static void
1246 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1248 use_optype uses = NULL;
1249 unsigned i, n;
1250 tree op;
1252 if (TREE_CODE (stmt) == PHI_NODE)
1253 n = PHI_NUM_ARGS (stmt);
1254 else
1256 get_stmt_operands (stmt);
1257 uses = STMT_USE_OPS (stmt);
1258 n = NUM_USES (uses);
1261 for (i = 0; i < n; i++)
1263 if (TREE_CODE (stmt) == PHI_NODE)
1264 op = PHI_ARG_DEF (stmt, i);
1265 else
1266 op = USE_OP (uses, i);
1268 record_invariant (data, op, false);
1272 /* Finds interesting uses of induction variables in the statement STMT. */
1274 static void
1275 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1277 struct iv *iv;
1278 tree op, lhs, rhs;
1279 use_optype uses = NULL;
1280 unsigned i, n;
1282 find_invariants_stmt (data, stmt);
1284 if (TREE_CODE (stmt) == COND_EXPR)
1286 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1287 return;
1290 if (TREE_CODE (stmt) == MODIFY_EXPR)
1292 lhs = TREE_OPERAND (stmt, 0);
1293 rhs = TREE_OPERAND (stmt, 1);
1295 if (TREE_CODE (lhs) == SSA_NAME)
1297 /* If the statement defines an induction variable, the uses are not
1298 interesting by themselves. */
1300 iv = get_iv (data, lhs);
1302 if (iv && !zero_p (iv->step))
1303 return;
1306 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1308 case '<':
1309 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1310 return;
1312 case 'r':
1313 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1314 if (TREE_CODE_CLASS (TREE_CODE (lhs)) == 'r')
1315 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1316 return;
1318 default: ;
1321 if (TREE_CODE_CLASS (TREE_CODE (lhs)) == 'r')
1323 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1324 find_interesting_uses_op (data, rhs);
1325 return;
1329 if (TREE_CODE (stmt) == PHI_NODE
1330 && bb_for_stmt (stmt) == data->current_loop->header)
1332 lhs = PHI_RESULT (stmt);
1333 iv = get_iv (data, lhs);
1335 if (iv && !zero_p (iv->step))
1336 return;
1339 if (TREE_CODE (stmt) == PHI_NODE)
1340 n = PHI_NUM_ARGS (stmt);
1341 else
1343 uses = STMT_USE_OPS (stmt);
1344 n = NUM_USES (uses);
1347 for (i = 0; i < n; i++)
1349 if (TREE_CODE (stmt) == PHI_NODE)
1350 op = PHI_ARG_DEF (stmt, i);
1351 else
1352 op = USE_OP (uses, i);
1354 if (TREE_CODE (op) != SSA_NAME)
1355 continue;
1357 iv = get_iv (data, op);
1358 if (!iv)
1359 continue;
1361 find_interesting_uses_op (data, op);
1365 /* Finds interesting uses of induction variables outside of loops
1366 on loop exit edge EXIT. */
1368 static void
1369 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1371 tree phi, def;
1373 for (phi = phi_nodes (exit->dest); phi; phi = TREE_CHAIN (phi))
1375 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1376 find_interesting_uses_outer (data, def);
1380 /* Finds uses of the induction variables that are interesting. */
1382 static void
1383 find_interesting_uses (struct ivopts_data *data)
1385 basic_block bb;
1386 block_stmt_iterator bsi;
1387 tree phi;
1388 basic_block *body = get_loop_body (data->current_loop);
1389 unsigned i;
1390 struct version_info *info;
1391 edge e;
1393 if (dump_file && (dump_flags & TDF_DETAILS))
1394 fprintf (dump_file, "Uses:\n\n");
1396 for (i = 0; i < data->current_loop->num_nodes; i++)
1398 edge_iterator ei;
1399 bb = body[i];
1401 FOR_EACH_EDGE (e, ei, bb->succs)
1403 if (e->dest != EXIT_BLOCK_PTR
1404 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1405 find_interesting_uses_outside (data, e);
1408 for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi))
1409 find_interesting_uses_stmt (data, phi);
1410 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1411 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1414 if (dump_file && (dump_flags & TDF_DETAILS))
1416 fprintf (dump_file, "\n");
1418 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i,
1420 info = ver_info (data, i);
1421 if (info->inv_id)
1423 fprintf (dump_file, " ");
1424 print_generic_expr (dump_file, info->name, TDF_SLIM);
1425 fprintf (dump_file, " is invariant (%d)%s\n",
1426 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1430 fprintf (dump_file, "\n");
1433 free (body);
1436 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1437 position to POS. If USE is not NULL, the candidate is set as related to
1438 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1439 replacement of the final value of the iv by a direct computation. */
1441 static struct iv_cand *
1442 add_candidate_1 (struct ivopts_data *data,
1443 tree base, tree step, bool important, enum iv_position pos,
1444 struct iv_use *use, tree incremented_at)
1446 unsigned i;
1447 struct iv_cand *cand = NULL;
1448 tree type;
1450 if (base)
1452 type = TREE_TYPE (base);
1453 if (!TYPE_UNSIGNED (type))
1455 type = unsigned_type_for (type);
1456 base = fold_convert (type, base);
1457 if (step)
1458 step = fold_convert (type, step);
1462 for (i = 0; i < n_iv_cands (data); i++)
1464 cand = iv_cand (data, i);
1466 if (cand->pos != pos)
1467 continue;
1469 if (cand->incremented_at != incremented_at)
1470 continue;
1472 if (!cand->iv)
1474 if (!base && !step)
1475 break;
1477 continue;
1480 if (!base && !step)
1481 continue;
1483 if (!operand_equal_p (base, cand->iv->base, 0))
1484 continue;
1486 if (zero_p (cand->iv->step))
1488 if (zero_p (step))
1489 break;
1491 else
1493 if (step && operand_equal_p (step, cand->iv->step, 0))
1494 break;
1498 if (i == n_iv_cands (data))
1500 cand = xcalloc (1, sizeof (struct iv_cand));
1501 cand->id = i;
1503 if (!base && !step)
1504 cand->iv = NULL;
1505 else
1506 cand->iv = alloc_iv (base, step);
1508 cand->pos = pos;
1509 if (pos != IP_ORIGINAL && cand->iv)
1511 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
1512 cand->var_after = cand->var_before;
1514 cand->important = important;
1515 cand->incremented_at = incremented_at;
1516 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_candidates, cand);
1518 if (dump_file && (dump_flags & TDF_DETAILS))
1519 dump_cand (dump_file, cand);
1522 if (important && !cand->important)
1524 cand->important = true;
1525 if (dump_file && (dump_flags & TDF_DETAILS))
1526 fprintf (dump_file, "Candidate %d is important\n", cand->id);
1529 if (use)
1531 bitmap_set_bit (use->related_cands, i);
1532 if (dump_file && (dump_flags & TDF_DETAILS))
1533 fprintf (dump_file, "Candidate %d is related to use %d\n",
1534 cand->id, use->id);
1537 return cand;
1540 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1541 position to POS. If USE is not NULL, the candidate is set as related to
1542 it. The candidate computation is scheduled on all available positions. */
1544 static void
1545 add_candidate (struct ivopts_data *data,
1546 tree base, tree step, bool important, struct iv_use *use)
1548 if (ip_normal_pos (data->current_loop))
1549 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
1550 if (ip_end_pos (data->current_loop))
1551 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
1554 /* Adds standard iv candidates. */
1556 static void
1557 add_standard_iv_candidates (struct ivopts_data *data)
1559 /* Add 0 + 1 * iteration candidate. */
1560 add_candidate (data,
1561 build_int_cst (unsigned_intSI_type_node, 0),
1562 build_int_cst (unsigned_intSI_type_node, 1),
1563 true, NULL);
1565 /* The same for a long type if it is still fast enought. */
1566 if (BITS_PER_WORD > 32)
1567 add_candidate (data,
1568 build_int_cst (unsigned_intDI_type_node, 0),
1569 build_int_cst (unsigned_intDI_type_node, 1),
1570 true, NULL);
1574 /* Adds candidates bases on the old induction variable IV. */
1576 static void
1577 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
1579 tree phi, def;
1580 struct iv_cand *cand;
1582 add_candidate (data, iv->base, iv->step, true, NULL);
1584 /* The same, but with initial value zero. */
1585 add_candidate (data,
1586 build_int_cst (TREE_TYPE (iv->base), 0),
1587 iv->step, true, NULL);
1589 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
1590 if (TREE_CODE (phi) == PHI_NODE)
1592 /* Additionally record the possibility of leaving the original iv
1593 untouched. */
1594 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
1595 cand = add_candidate_1 (data,
1596 iv->base, iv->step, true, IP_ORIGINAL, NULL,
1597 SSA_NAME_DEF_STMT (def));
1598 cand->var_before = iv->ssa_name;
1599 cand->var_after = def;
1603 /* Adds candidates based on the old induction variables. */
1605 static void
1606 add_old_ivs_candidates (struct ivopts_data *data)
1608 unsigned i;
1609 struct iv *iv;
1611 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i,
1613 iv = ver_info (data, i)->iv;
1614 if (iv && iv->biv_p && !zero_p (iv->step))
1615 add_old_iv_candidates (data, iv);
1619 /* Adds candidates based on the value of the induction variable IV and USE. */
1621 static void
1622 add_iv_value_candidates (struct ivopts_data *data,
1623 struct iv *iv, struct iv_use *use)
1625 add_candidate (data, iv->base, iv->step, false, use);
1627 /* The same, but with initial value zero. */
1628 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
1629 iv->step, false, use);
1632 /* Adds candidates based on the address IV and USE. */
1634 static void
1635 add_address_candidates (struct ivopts_data *data,
1636 struct iv *iv, struct iv_use *use)
1638 tree base, abase, tmp, *act;
1640 /* First, the trivial choices. */
1641 add_iv_value_candidates (data, iv, use);
1643 /* Second, try removing the COMPONENT_REFs. */
1644 if (TREE_CODE (iv->base) == ADDR_EXPR)
1646 base = TREE_OPERAND (iv->base, 0);
1647 while (TREE_CODE (base) == COMPONENT_REF
1648 || (TREE_CODE (base) == ARRAY_REF
1649 && TREE_CODE (TREE_OPERAND (base, 1)) == INTEGER_CST))
1650 base = TREE_OPERAND (base, 0);
1652 if (base != TREE_OPERAND (iv->base, 0))
1654 if (TREE_CODE (base) == INDIRECT_REF)
1655 base = TREE_OPERAND (base, 0);
1656 else
1657 base = build_addr (base);
1658 add_candidate (data, base, iv->step, false, use);
1662 /* Third, try removing the constant offset. */
1663 abase = iv->base;
1664 while (TREE_CODE (abase) == PLUS_EXPR
1665 && TREE_CODE (TREE_OPERAND (abase, 1)) != INTEGER_CST)
1666 abase = TREE_OPERAND (abase, 0);
1667 /* We found the offset, so make the copy of the non-shared part and
1668 remove it. */
1669 if (TREE_CODE (abase) == PLUS_EXPR)
1671 tmp = iv->base;
1672 act = &base;
1674 for (tmp = iv->base; tmp != abase; tmp = TREE_OPERAND (tmp, 0))
1676 *act = build2 (PLUS_EXPR, TREE_TYPE (tmp),
1677 NULL_TREE, TREE_OPERAND (tmp, 1));
1678 act = &TREE_OPERAND (*act, 0);
1680 *act = TREE_OPERAND (tmp, 0);
1682 add_candidate (data, base, iv->step, false, use);
1686 /* Possibly adds pseudocandidate for replacing the final value of USE by
1687 a direct computation. */
1689 static void
1690 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
1692 struct tree_niter_desc *niter;
1693 struct loop *loop = data->current_loop;
1695 /* We must know where we exit the loop and how many times does it roll. */
1696 if (!single_dom_exit (loop))
1697 return;
1699 niter = &loop_data (loop)->niter;
1700 if (!niter->niter
1701 || !operand_equal_p (niter->assumptions, boolean_true_node, 0)
1702 || !operand_equal_p (niter->may_be_zero, boolean_false_node, 0))
1703 return;
1705 add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
1708 /* Adds candidates based on the uses. */
1710 static void
1711 add_derived_ivs_candidates (struct ivopts_data *data)
1713 unsigned i;
1715 for (i = 0; i < n_iv_uses (data); i++)
1717 struct iv_use *use = iv_use (data, i);
1719 if (!use)
1720 continue;
1722 switch (use->type)
1724 case USE_NONLINEAR_EXPR:
1725 case USE_COMPARE:
1726 /* Just add the ivs based on the value of the iv used here. */
1727 add_iv_value_candidates (data, use->iv, use);
1728 break;
1730 case USE_OUTER:
1731 add_iv_value_candidates (data, use->iv, use);
1733 /* Additionally, add the pseudocandidate for the possibility to
1734 replace the final value by a direct computation. */
1735 add_iv_outer_candidates (data, use);
1736 break;
1738 case USE_ADDRESS:
1739 add_address_candidates (data, use->iv, use);
1740 break;
1742 default:
1743 gcc_unreachable ();
1748 /* Finds the candidates for the induction variables. */
1750 static void
1751 find_iv_candidates (struct ivopts_data *data)
1753 /* Add commonly used ivs. */
1754 add_standard_iv_candidates (data);
1756 /* Add old induction variables. */
1757 add_old_ivs_candidates (data);
1759 /* Add induction variables derived from uses. */
1760 add_derived_ivs_candidates (data);
1763 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
1764 If consider_all_candidates is true, we use a two-dimensional array, otherwise
1765 we allocate a simple list to every use. */
1767 static void
1768 alloc_use_cost_map (struct ivopts_data *data)
1770 unsigned i, n_imp = 0, size, j;
1772 if (!data->consider_all_candidates)
1774 for (i = 0; i < n_iv_cands (data); i++)
1776 struct iv_cand *cand = iv_cand (data, i);
1777 if (cand->important)
1778 n_imp++;
1782 for (i = 0; i < n_iv_uses (data); i++)
1784 struct iv_use *use = iv_use (data, i);
1786 if (data->consider_all_candidates)
1788 size = n_iv_cands (data);
1789 use->n_map_members = size;
1791 else
1793 size = n_imp;
1794 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, size++);
1795 use->n_map_members = 0;
1798 use->cost_map = xcalloc (size, sizeof (struct cost_pair));
1802 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
1803 on invariants DEPENDS_ON. */
1805 static void
1806 set_use_iv_cost (struct ivopts_data *data,
1807 struct iv_use *use, struct iv_cand *cand, unsigned cost,
1808 bitmap depends_on)
1810 if (cost == INFTY
1811 && depends_on)
1813 BITMAP_XFREE (depends_on);
1814 depends_on = NULL;
1817 if (data->consider_all_candidates)
1819 use->cost_map[cand->id].cand = cand;
1820 use->cost_map[cand->id].cost = cost;
1821 use->cost_map[cand->id].depends_on = depends_on;
1822 return;
1825 if (cost == INFTY)
1826 return;
1828 use->cost_map[use->n_map_members].cand = cand;
1829 use->cost_map[use->n_map_members].cost = cost;
1830 use->cost_map[use->n_map_members].depends_on = depends_on;
1831 use->n_map_members++;
1834 /* Gets cost of (USE, CANDIDATE) pair. Stores the bitmap of dependencies to
1835 DEPENDS_ON. */
1837 static unsigned
1838 get_use_iv_cost (struct ivopts_data *data,
1839 struct iv_use *use, struct iv_cand *cand, bitmap *depends_on)
1841 unsigned i;
1843 if (!cand)
1844 return INFTY;
1846 if (data->consider_all_candidates)
1847 i = cand->id;
1848 else
1850 for (i = 0; i < use->n_map_members; i++)
1851 if (use->cost_map[i].cand == cand)
1852 break;
1854 if (i == use->n_map_members)
1855 return INFTY;
1858 if (depends_on)
1859 *depends_on = use->cost_map[i].depends_on;
1860 return use->cost_map[i].cost;
1863 /* Returns estimate on cost of computing SEQ. */
1865 static unsigned
1866 seq_cost (rtx seq)
1868 unsigned cost = 0;
1869 rtx set;
1871 for (; seq; seq = NEXT_INSN (seq))
1873 set = single_set (seq);
1874 if (set)
1875 cost += rtx_cost (set, SET);
1876 else
1877 cost++;
1880 return cost;
1883 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
1884 static rtx
1885 produce_memory_decl_rtl (tree obj, int *regno)
1887 rtx x;
1888 if (!obj)
1889 abort ();
1890 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
1892 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
1893 x = gen_rtx_SYMBOL_REF (Pmode, name);
1895 else
1896 x = gen_raw_REG (Pmode, (*regno)++);
1898 return gen_rtx_MEM (DECL_MODE (obj), x);
1901 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
1902 walk_tree. DATA contains the actual fake register number. */
1904 static tree
1905 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
1907 tree obj = NULL_TREE;
1908 rtx x = NULL_RTX;
1909 int *regno = data;
1911 switch (TREE_CODE (*expr_p))
1913 case ADDR_EXPR:
1914 for (expr_p = &TREE_OPERAND (*expr_p, 0);
1915 (handled_component_p (*expr_p)
1916 || TREE_CODE (*expr_p) == REALPART_EXPR
1917 || TREE_CODE (*expr_p) == IMAGPART_EXPR);
1918 expr_p = &TREE_OPERAND (*expr_p, 0));
1919 obj = *expr_p;
1920 if (DECL_P (obj))
1921 x = produce_memory_decl_rtl (obj, regno);
1922 break;
1924 case SSA_NAME:
1925 *ws = 0;
1926 obj = SSA_NAME_VAR (*expr_p);
1927 if (!DECL_RTL_SET_P (obj))
1928 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
1929 break;
1931 case VAR_DECL:
1932 case PARM_DECL:
1933 case RESULT_DECL:
1934 *ws = 0;
1935 obj = *expr_p;
1937 if (DECL_RTL_SET_P (obj))
1938 break;
1940 if (DECL_MODE (obj) == BLKmode)
1941 x = produce_memory_decl_rtl (obj, regno);
1942 else
1943 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
1945 break;
1947 default:
1948 break;
1951 if (x)
1953 VARRAY_PUSH_GENERIC_PTR_NOGC (decl_rtl_to_reset, obj);
1954 SET_DECL_RTL (obj, x);
1957 return NULL_TREE;
1960 /* Determines cost of the computation of EXPR. */
1962 static unsigned
1963 computation_cost (tree expr)
1965 rtx seq, rslt;
1966 tree type = TREE_TYPE (expr);
1967 unsigned cost;
1968 int regno = 0;
1970 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
1971 start_sequence ();
1972 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
1973 seq = get_insns ();
1974 end_sequence ();
1976 cost = seq_cost (seq);
1977 if (GET_CODE (rslt) == MEM)
1978 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
1980 return cost;
1983 /* Returns variable containing the value of candidate CAND at statement AT. */
1985 static tree
1986 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
1988 if (stmt_after_increment (loop, cand, stmt))
1989 return cand->var_after;
1990 else
1991 return cand->var_before;
1994 /* Determines the expression by that USE is expressed from induction variable
1995 CAND at statement AT in LOOP. */
1997 static tree
1998 get_computation_at (struct loop *loop,
1999 struct iv_use *use, struct iv_cand *cand, tree at)
2001 tree ubase = unsave_expr_now (use->iv->base);
2002 tree ustep = unsave_expr_now (use->iv->step);
2003 tree cbase = unsave_expr_now (cand->iv->base);
2004 tree cstep = unsave_expr_now (cand->iv->step);
2005 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2006 tree uutype;
2007 tree expr, delta;
2008 tree ratio;
2009 unsigned HOST_WIDE_INT ustepi, cstepi;
2010 HOST_WIDE_INT ratioi;
2012 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2014 /* We do not have a precision to express the values of use. */
2015 return NULL_TREE;
2018 expr = var_at_stmt (loop, cand, at);
2020 if (TREE_TYPE (expr) != ctype)
2022 /* This may happen with the original ivs. */
2023 expr = fold_convert (ctype, expr);
2026 if (TYPE_UNSIGNED (utype))
2027 uutype = utype;
2028 else
2030 uutype = unsigned_type_for (utype);
2031 ubase = fold_convert (uutype, ubase);
2032 ustep = fold_convert (uutype, ustep);
2035 if (uutype != ctype)
2037 expr = fold_convert (uutype, expr);
2038 cbase = fold_convert (uutype, cbase);
2039 cstep = fold_convert (uutype, cstep);
2042 if (!cst_and_fits_in_hwi (cstep)
2043 || !cst_and_fits_in_hwi (ustep))
2044 return NULL_TREE;
2046 ustepi = int_cst_value (ustep);
2047 cstepi = int_cst_value (cstep);
2049 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
2051 /* TODO maybe consider case when ustep divides cstep and the ratio is
2052 a power of 2 (so that the division is fast to execute)? We would
2053 need to be much more careful with overflows etc. then. */
2054 return NULL_TREE;
2057 /* We may need to shift the value if we are after the increment. */
2058 if (stmt_after_increment (loop, cand, at))
2059 cbase = fold (build2 (PLUS_EXPR, uutype, cbase, cstep));
2061 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
2062 or |ratio| == 1, it is better to handle this like
2064 ubase - ratio * cbase + ratio * var. */
2066 if (ratioi == 1)
2068 delta = fold (build2 (MINUS_EXPR, uutype, ubase, cbase));
2069 expr = fold (build2 (PLUS_EXPR, uutype, expr, delta));
2071 else if (ratioi == -1)
2073 delta = fold (build2 (PLUS_EXPR, uutype, ubase, cbase));
2074 expr = fold (build2 (MINUS_EXPR, uutype, delta, expr));
2076 else if (TREE_CODE (cbase) == INTEGER_CST)
2078 ratio = build_int_cst_type (uutype, ratioi);
2079 delta = fold (build2 (MULT_EXPR, uutype, ratio, cbase));
2080 delta = fold (build2 (MINUS_EXPR, uutype, ubase, delta));
2081 expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2082 expr = fold (build2 (PLUS_EXPR, uutype, delta, expr));
2084 else
2086 expr = fold (build2 (MINUS_EXPR, uutype, expr, cbase));
2087 ratio = build_int_cst_type (uutype, ratioi);
2088 expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2089 expr = fold (build2 (PLUS_EXPR, uutype, ubase, expr));
2092 return fold_convert (utype, expr);
2095 /* Determines the expression by that USE is expressed from induction variable
2096 CAND in LOOP. */
2098 static tree
2099 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2101 return get_computation_at (loop, use, cand, use->stmt);
2104 /* Strips constant offsets from EXPR and adds them to OFFSET. */
2106 static void
2107 strip_offset (tree *expr, unsigned HOST_WIDE_INT *offset)
2109 tree op0, op1;
2110 enum tree_code code;
2112 while (1)
2114 if (cst_and_fits_in_hwi (*expr))
2116 *offset += int_cst_value (*expr);
2117 *expr = integer_zero_node;
2118 return;
2121 code = TREE_CODE (*expr);
2123 if (code != PLUS_EXPR && code != MINUS_EXPR)
2124 return;
2126 op0 = TREE_OPERAND (*expr, 0);
2127 op1 = TREE_OPERAND (*expr, 1);
2129 if (cst_and_fits_in_hwi (op1))
2131 if (code == PLUS_EXPR)
2132 *offset += int_cst_value (op1);
2133 else
2134 *offset -= int_cst_value (op1);
2136 *expr = op0;
2137 continue;
2140 if (code != PLUS_EXPR)
2141 return;
2143 if (!cst_and_fits_in_hwi (op0))
2144 return;
2146 *offset += int_cst_value (op0);
2147 *expr = op1;
2151 /* Returns cost of addition in MODE. */
2153 static unsigned
2154 add_cost (enum machine_mode mode)
2156 static unsigned costs[NUM_MACHINE_MODES];
2157 rtx seq;
2158 unsigned cost;
2160 if (costs[mode])
2161 return costs[mode];
2163 start_sequence ();
2164 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2165 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER),
2166 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER + 1)),
2167 NULL_RTX);
2168 seq = get_insns ();
2169 end_sequence ();
2171 cost = seq_cost (seq);
2172 if (!cost)
2173 cost = 1;
2175 costs[mode] = cost;
2177 if (dump_file && (dump_flags & TDF_DETAILS))
2178 fprintf (dump_file, "Addition in %s costs %d\n",
2179 GET_MODE_NAME (mode), cost);
2180 return cost;
2183 /* Entry in a hashtable of already known costs for multiplication. */
2184 struct mbc_entry
2186 HOST_WIDE_INT cst; /* The constant to multiply by. */
2187 enum machine_mode mode; /* In mode. */
2188 unsigned cost; /* The cost. */
2191 /* Counts hash value for the ENTRY. */
2193 static hashval_t
2194 mbc_entry_hash (const void *entry)
2196 const struct mbc_entry *e = entry;
2198 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2201 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2203 static int
2204 mbc_entry_eq (const void *entry1, const void *entry2)
2206 const struct mbc_entry *e1 = entry1;
2207 const struct mbc_entry *e2 = entry2;
2209 return (e1->mode == e2->mode
2210 && e1->cst == e2->cst);
2213 /* Returns cost of multiplication by constant CST in MODE. */
2215 static unsigned
2216 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2218 static htab_t costs;
2219 struct mbc_entry **cached, act;
2220 rtx seq;
2221 unsigned cost;
2223 if (!costs)
2224 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2226 act.mode = mode;
2227 act.cst = cst;
2228 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2229 if (*cached)
2230 return (*cached)->cost;
2232 *cached = xmalloc (sizeof (struct mbc_entry));
2233 (*cached)->mode = mode;
2234 (*cached)->cst = cst;
2236 start_sequence ();
2237 expand_mult (mode, gen_raw_REG (mode, FIRST_PSEUDO_REGISTER), GEN_INT (cst),
2238 NULL_RTX, 0);
2239 seq = get_insns ();
2240 end_sequence ();
2242 cost = seq_cost (seq);
2244 if (dump_file && (dump_flags & TDF_DETAILS))
2245 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2246 (int) cst, GET_MODE_NAME (mode), cost);
2248 (*cached)->cost = cost;
2250 return cost;
2253 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2254 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2255 variable is omitted. The created memory accesses MODE.
2257 TODO -- there must be some better way. This all is quite crude. */
2259 static unsigned
2260 get_address_cost (bool symbol_present, bool var_present,
2261 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
2263 #define MAX_RATIO 128
2264 static sbitmap valid_mult;
2265 static HOST_WIDE_INT rat, off;
2266 static HOST_WIDE_INT min_offset, max_offset;
2267 static unsigned costs[2][2][2][2];
2268 unsigned cost, acost;
2269 rtx seq, addr, base;
2270 bool offset_p, ratio_p;
2271 rtx reg1;
2272 HOST_WIDE_INT s_offset;
2273 unsigned HOST_WIDE_INT mask;
2274 unsigned bits;
2276 if (!valid_mult)
2278 HOST_WIDE_INT i;
2280 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2282 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2283 for (i = 1; i <= 1 << 20; i <<= 1)
2285 XEXP (addr, 1) = GEN_INT (i);
2286 if (!memory_address_p (Pmode, addr))
2287 break;
2289 max_offset = i >> 1;
2290 off = max_offset;
2292 for (i = 1; i <= 1 << 20; i <<= 1)
2294 XEXP (addr, 1) = GEN_INT (-i);
2295 if (!memory_address_p (Pmode, addr))
2296 break;
2298 min_offset = -(i >> 1);
2300 if (dump_file && (dump_flags & TDF_DETAILS))
2302 fprintf (dump_file, "get_address_cost:\n");
2303 fprintf (dump_file, " min offset %d\n", (int) min_offset);
2304 fprintf (dump_file, " max offset %d\n", (int) max_offset);
2307 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
2308 sbitmap_zero (valid_mult);
2309 rat = 1;
2310 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2311 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2313 XEXP (addr, 1) = GEN_INT (i);
2314 if (memory_address_p (Pmode, addr))
2316 SET_BIT (valid_mult, i + MAX_RATIO);
2317 rat = i;
2321 if (dump_file && (dump_flags & TDF_DETAILS))
2323 fprintf (dump_file, " allowed multipliers:");
2324 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2325 if (TEST_BIT (valid_mult, i + MAX_RATIO))
2326 fprintf (dump_file, " %d", (int) i);
2327 fprintf (dump_file, "\n");
2328 fprintf (dump_file, "\n");
2332 bits = GET_MODE_BITSIZE (Pmode);
2333 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
2334 offset &= mask;
2335 if ((offset >> (bits - 1) & 1))
2336 offset |= ~mask;
2337 s_offset = offset;
2339 cost = 0;
2340 offset_p = (min_offset <= s_offset && s_offset <= max_offset);
2341 ratio_p = (ratio != 1
2342 && -MAX_RATIO <= ratio && ratio <= MAX_RATIO
2343 && TEST_BIT (valid_mult, ratio + MAX_RATIO));
2345 if (ratio != 1 && !ratio_p)
2346 cost += multiply_by_cost (ratio, Pmode);
2348 if (s_offset && !offset_p && !symbol_present)
2350 cost += add_cost (Pmode);
2351 var_present = true;
2354 acost = costs[symbol_present][var_present][offset_p][ratio_p];
2355 if (!acost)
2357 acost = 0;
2359 addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2360 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 1);
2361 if (ratio_p)
2362 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, GEN_INT (rat));
2364 if (symbol_present)
2366 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2367 if (offset_p)
2368 base = gen_rtx_fmt_e (CONST, Pmode,
2369 gen_rtx_fmt_ee (PLUS, Pmode,
2370 base,
2371 GEN_INT (off)));
2372 if (var_present)
2373 base = gen_rtx_fmt_ee (PLUS, Pmode, reg1, base);
2376 else if (var_present)
2378 base = reg1;
2379 if (offset_p)
2380 base = gen_rtx_fmt_ee (PLUS, Pmode, base, GEN_INT (off));
2382 else if (offset_p)
2383 base = GEN_INT (off);
2384 else
2385 base = NULL_RTX;
2387 if (base)
2388 addr = gen_rtx_fmt_ee (PLUS, Pmode, base, addr);
2390 start_sequence ();
2391 addr = memory_address (Pmode, addr);
2392 seq = get_insns ();
2393 end_sequence ();
2395 acost = seq_cost (seq);
2396 acost += address_cost (addr, Pmode);
2398 if (!acost)
2399 acost = 1;
2400 costs[symbol_present][var_present][offset_p][ratio_p] = acost;
2403 return cost + acost;
2406 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2407 the bitmap to that we should store it. */
2409 static struct ivopts_data *fd_ivopts_data;
2410 static tree
2411 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2413 bitmap *depends_on = data;
2414 struct version_info *info;
2416 if (TREE_CODE (*expr_p) != SSA_NAME)
2417 return NULL_TREE;
2418 info = name_info (fd_ivopts_data, *expr_p);
2420 if (!info->inv_id || info->has_nonlin_use)
2421 return NULL_TREE;
2423 if (!*depends_on)
2424 *depends_on = BITMAP_XMALLOC ();
2425 bitmap_set_bit (*depends_on, info->inv_id);
2427 return NULL_TREE;
2430 /* Estimates cost of forcing EXPR into variable. DEPENDS_ON is a set of the
2431 invariants the computation depends on. */
2433 static unsigned
2434 force_var_cost (struct ivopts_data *data,
2435 tree expr, bitmap *depends_on)
2437 static bool costs_initialized = false;
2438 static unsigned integer_cost;
2439 static unsigned symbol_cost;
2440 static unsigned address_cost;
2442 if (!costs_initialized)
2444 tree var = create_tmp_var_raw (integer_type_node, "test_var");
2445 rtx x = gen_rtx_MEM (DECL_MODE (var),
2446 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
2447 tree addr;
2448 tree type = build_pointer_type (integer_type_node);
2450 integer_cost = computation_cost (build_int_cst_type (integer_type_node,
2451 2000));
2453 SET_DECL_RTL (var, x);
2454 TREE_STATIC (var) = 1;
2455 addr = build1 (ADDR_EXPR, type, var);
2456 symbol_cost = computation_cost (addr) + 1;
2458 address_cost
2459 = computation_cost (build2 (PLUS_EXPR, type,
2460 addr,
2461 build_int_cst_type (type, 2000))) + 1;
2462 if (dump_file && (dump_flags & TDF_DETAILS))
2464 fprintf (dump_file, "force_var_cost:\n");
2465 fprintf (dump_file, " integer %d\n", (int) integer_cost);
2466 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
2467 fprintf (dump_file, " address %d\n", (int) address_cost);
2468 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
2469 fprintf (dump_file, "\n");
2472 costs_initialized = true;
2475 if (depends_on)
2477 fd_ivopts_data = data;
2478 walk_tree (&expr, find_depends, depends_on, NULL);
2481 if (SSA_VAR_P (expr))
2482 return 0;
2484 if (TREE_INVARIANT (expr))
2486 if (TREE_CODE (expr) == INTEGER_CST)
2487 return integer_cost;
2489 if (TREE_CODE (expr) == ADDR_EXPR)
2491 tree obj = TREE_OPERAND (expr, 0);
2493 if (TREE_CODE (obj) == VAR_DECL
2494 || TREE_CODE (obj) == PARM_DECL
2495 || TREE_CODE (obj) == RESULT_DECL)
2496 return symbol_cost;
2499 return address_cost;
2502 /* Just an arbitrary value, FIXME. */
2503 return target_spill_cost;
2506 /* Peels a single layer of ADDR. If DIFF is not NULL, do it only if the
2507 offset is constant and add the offset to DIFF. */
2509 static tree
2510 peel_address (tree addr, unsigned HOST_WIDE_INT *diff)
2512 tree off, size;
2513 HOST_WIDE_INT bit_offset;
2515 switch (TREE_CODE (addr))
2517 case SSA_NAME:
2518 case INDIRECT_REF:
2519 case BIT_FIELD_REF:
2520 case VAR_DECL:
2521 case PARM_DECL:
2522 case RESULT_DECL:
2523 case STRING_CST:
2524 case REALPART_EXPR:
2525 case IMAGPART_EXPR:
2526 return NULL_TREE;
2528 case COMPONENT_REF:
2529 off = DECL_FIELD_BIT_OFFSET (TREE_OPERAND (addr, 1));
2530 bit_offset = TREE_INT_CST_LOW (off);
2532 gcc_assert ((bit_offset % BITS_PER_UNIT) == 0);
2534 if (diff)
2535 *diff += bit_offset / BITS_PER_UNIT;
2537 return TREE_OPERAND (addr, 0);
2539 case VIEW_CONVERT_EXPR:
2540 return TREE_OPERAND (addr, 0);
2542 case ARRAY_REF:
2543 off = TREE_OPERAND (addr, 1);
2545 if (diff)
2547 if (!cst_and_fits_in_hwi (off))
2548 return NULL_TREE;
2550 size = TYPE_SIZE_UNIT (TREE_TYPE (addr));
2551 if (!cst_and_fits_in_hwi (size))
2552 return NULL_TREE;
2554 *diff += TREE_INT_CST_LOW (off) * TREE_INT_CST_LOW (size);
2557 return TREE_OPERAND (addr, 0);
2559 default:
2560 gcc_unreachable ();
2564 /* Checks whether E1 and E2 have constant difference, and if they do,
2565 store it in *DIFF. */
2567 static bool
2568 ptr_difference_const (tree e1, tree e2, unsigned HOST_WIDE_INT *diff)
2570 int d1 = 0, d2 = 0;
2571 tree x;
2572 unsigned HOST_WIDE_INT delta1 = 0, delta2 = 0;
2574 /* Find depths of E1 and E2. */
2575 for (x = e1; x; x = peel_address (x, NULL))
2576 d1++;
2577 for (x = e2; x; x = peel_address (x, NULL))
2578 d2++;
2580 for (; e1 && d1 > d2; e1 = peel_address (e1, &delta1))
2581 d1--;
2582 for (; e2 && d2 > d1; e2 = peel_address (e2, &delta2))
2583 d2--;
2585 while (e1 && e2 && !operand_equal_p (e1, e2, 0))
2587 e1 = peel_address (e1, &delta1);
2588 e2 = peel_address (e2, &delta1);
2591 if (!e1 || !e2)
2592 return false;
2594 *diff = delta1 - delta2;
2595 return true;
2598 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
2599 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
2600 to false if the corresponding part is missing. DEPENDS_ON is a set of the
2601 invariants the computation depends on. */
2603 static unsigned
2604 split_address_cost (struct ivopts_data *data,
2605 tree addr, bool *symbol_present, bool *var_present,
2606 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2608 tree core = addr;
2610 while (core
2611 && TREE_CODE (core) != VAR_DECL)
2612 core = peel_address (core, offset);
2614 if (!core)
2616 *symbol_present = false;
2617 *var_present = true;
2618 fd_ivopts_data = data;
2619 walk_tree (&addr, find_depends, depends_on, NULL);
2620 return target_spill_cost;
2623 if (TREE_STATIC (core)
2624 || DECL_EXTERNAL (core))
2626 *symbol_present = true;
2627 *var_present = false;
2628 return 0;
2631 *symbol_present = false;
2632 *var_present = true;
2633 return 0;
2636 /* Estimates cost of expressing difference of addresses E1 - E2 as
2637 var + symbol + offset. The value of offset is added to OFFSET,
2638 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
2639 part is missing. DEPENDS_ON is a set of the invariants the computation
2640 depends on. */
2642 static unsigned
2643 ptr_difference_cost (struct ivopts_data *data,
2644 tree e1, tree e2, bool *symbol_present, bool *var_present,
2645 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2647 unsigned HOST_WIDE_INT diff = 0;
2648 unsigned cost;
2650 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
2652 if (TREE_CODE (e2) == ADDR_EXPR
2653 && ptr_difference_const (TREE_OPERAND (e1, 0),
2654 TREE_OPERAND (e2, 0), &diff))
2656 *offset += diff;
2657 *symbol_present = false;
2658 *var_present = false;
2659 return 0;
2662 if (e2 == integer_zero_node)
2663 return split_address_cost (data, TREE_OPERAND (e1, 0),
2664 symbol_present, var_present, offset, depends_on);
2666 *symbol_present = false;
2667 *var_present = true;
2669 cost = force_var_cost (data, e1, depends_on);
2670 cost += force_var_cost (data, e2, depends_on);
2671 cost += add_cost (Pmode);
2673 return cost;
2676 /* Estimates cost of expressing difference E1 - E2 as
2677 var + symbol + offset. The value of offset is added to OFFSET,
2678 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
2679 part is missing. DEPENDS_ON is a set of the invariants the computation
2680 depends on. */
2682 static unsigned
2683 difference_cost (struct ivopts_data *data,
2684 tree e1, tree e2, bool *symbol_present, bool *var_present,
2685 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2687 unsigned cost;
2688 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
2690 strip_offset (&e1, offset);
2691 *offset = -*offset;
2692 strip_offset (&e2, offset);
2693 *offset = -*offset;
2695 if (TREE_CODE (e1) == ADDR_EXPR)
2696 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
2697 depends_on);
2698 *symbol_present = false;
2700 if (operand_equal_p (e1, e2, 0))
2702 *var_present = false;
2703 return 0;
2705 *var_present = true;
2706 if (zero_p (e2))
2707 return force_var_cost (data, e1, depends_on);
2709 if (zero_p (e1))
2711 cost = force_var_cost (data, e2, depends_on);
2712 cost += multiply_by_cost (-1, mode);
2714 return cost;
2717 cost = force_var_cost (data, e1, depends_on);
2718 cost += force_var_cost (data, e2, depends_on);
2719 cost += add_cost (mode);
2721 return cost;
2724 /* Determines the cost of the computation by that USE is expressed
2725 from induction variable CAND. If ADDRESS_P is true, we just need
2726 to create an address from it, otherwise we want to get it into
2727 register. A set of invariants we depend on is stored in
2728 DEPENDS_ON. AT is the statement at that the value is computed. */
2730 static unsigned
2731 get_computation_cost_at (struct ivopts_data *data,
2732 struct iv_use *use, struct iv_cand *cand,
2733 bool address_p, bitmap *depends_on, tree at)
2735 tree ubase = use->iv->base, ustep = use->iv->step;
2736 tree cbase, cstep;
2737 tree utype = TREE_TYPE (ubase), ctype;
2738 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
2739 HOST_WIDE_INT ratio, aratio;
2740 bool var_present, symbol_present;
2741 unsigned cost = 0, n_sums;
2743 *depends_on = NULL;
2745 /* Only consider real candidates. */
2746 if (!cand->iv)
2747 return INFTY;
2749 cbase = cand->iv->base;
2750 cstep = cand->iv->step;
2751 ctype = TREE_TYPE (cbase);
2753 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2755 /* We do not have a precision to express the values of use. */
2756 return INFTY;
2759 if (!cst_and_fits_in_hwi (ustep)
2760 || !cst_and_fits_in_hwi (cstep))
2761 return INFTY;
2763 if (TREE_CODE (ubase) == INTEGER_CST
2764 && !cst_and_fits_in_hwi (ubase))
2765 goto fallback;
2767 if (TREE_CODE (cbase) == INTEGER_CST
2768 && !cst_and_fits_in_hwi (cbase))
2769 goto fallback;
2771 ustepi = int_cst_value (ustep);
2772 cstepi = int_cst_value (cstep);
2774 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
2776 /* TODO -- add direct handling of this case. */
2777 goto fallback;
2780 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
2781 return INFTY;
2783 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
2784 or ratio == 1, it is better to handle this like
2786 ubase - ratio * cbase + ratio * var
2788 (also holds in the case ratio == -1, TODO. */
2790 if (TREE_CODE (cbase) == INTEGER_CST)
2792 offset = - ratio * int_cst_value (cbase);
2793 cost += difference_cost (data,
2794 ubase, integer_zero_node,
2795 &symbol_present, &var_present, &offset,
2796 depends_on);
2798 else if (ratio == 1)
2800 cost += difference_cost (data,
2801 ubase, cbase,
2802 &symbol_present, &var_present, &offset,
2803 depends_on);
2805 else
2807 cost += force_var_cost (data, cbase, depends_on);
2808 cost += add_cost (TYPE_MODE (ctype));
2809 cost += difference_cost (data,
2810 ubase, integer_zero_node,
2811 &symbol_present, &var_present, &offset,
2812 depends_on);
2815 /* If we are after the increment, the value of the candidate is higher by
2816 one iteration. */
2817 if (stmt_after_increment (data->current_loop, cand, at))
2818 offset -= ratio * cstepi;
2820 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
2821 (symbol/var/const parts may be omitted). If we are looking for an address,
2822 find the cost of addressing this. */
2823 if (address_p)
2824 return get_address_cost (symbol_present, var_present, offset, ratio);
2826 /* Otherwise estimate the costs for computing the expression. */
2827 aratio = ratio > 0 ? ratio : -ratio;
2828 if (!symbol_present && !var_present && !offset)
2830 if (ratio != 1)
2831 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
2833 return cost;
2836 if (aratio != 1)
2837 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
2839 n_sums = 1;
2840 if (var_present
2841 /* Symbol + offset should be compile-time computable. */
2842 && (symbol_present || offset))
2843 n_sums++;
2845 return cost + n_sums * add_cost (TYPE_MODE (ctype));
2847 fallback:
2849 /* Just get the expression, expand it and measure the cost. */
2850 tree comp = get_computation_at (data->current_loop, use, cand, at);
2852 if (!comp)
2853 return INFTY;
2855 if (address_p)
2856 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
2858 return computation_cost (comp);
2862 /* Determines the cost of the computation by that USE is expressed
2863 from induction variable CAND. If ADDRESS_P is true, we just need
2864 to create an address from it, otherwise we want to get it into
2865 register. A set of invariants we depend on is stored in
2866 DEPENDS_ON. */
2868 static unsigned
2869 get_computation_cost (struct ivopts_data *data,
2870 struct iv_use *use, struct iv_cand *cand,
2871 bool address_p, bitmap *depends_on)
2873 return get_computation_cost_at (data,
2874 use, cand, address_p, depends_on, use->stmt);
2877 /* Determines cost of basing replacement of USE on CAND in a generic
2878 expression. */
2880 static void
2881 determine_use_iv_cost_generic (struct ivopts_data *data,
2882 struct iv_use *use, struct iv_cand *cand)
2884 bitmap depends_on;
2885 unsigned cost = get_computation_cost (data, use, cand, false, &depends_on);
2887 set_use_iv_cost (data, use, cand, cost, depends_on);
2890 /* Determines cost of basing replacement of USE on CAND in an address. */
2892 static void
2893 determine_use_iv_cost_address (struct ivopts_data *data,
2894 struct iv_use *use, struct iv_cand *cand)
2896 bitmap depends_on;
2897 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
2899 set_use_iv_cost (data, use, cand, cost, depends_on);
2902 /* Computes value of induction variable IV in iteration NITER. */
2904 static tree
2905 iv_value (struct iv *iv, tree niter)
2907 tree val;
2908 tree type = TREE_TYPE (iv->base);
2910 niter = fold_convert (type, niter);
2911 val = fold (build2 (MULT_EXPR, type, iv->step, unsave_expr_now (niter)));
2913 return fold (build2 (PLUS_EXPR, type, iv->base, val));
2916 /* Computes value of candidate CAND at position AT in iteration NITER. */
2918 static tree
2919 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
2921 tree val = iv_value (cand->iv, niter);
2922 tree type = TREE_TYPE (cand->iv->base);
2924 if (stmt_after_increment (loop, cand, at))
2925 val = fold (build2 (PLUS_EXPR, type, val, cand->iv->step));
2927 return val;
2930 /* Check whether it is possible to express the condition in USE by comparison
2931 of candidate CAND. If so, store the comparison code to COMPARE and the
2932 value compared with to BOUND. */
2934 static bool
2935 may_eliminate_iv (struct loop *loop,
2936 struct iv_use *use, struct iv_cand *cand,
2937 enum tree_code *compare, tree *bound)
2939 edge exit;
2940 struct tree_niter_desc *niter, new_niter;
2941 tree wider_type, type, base;
2943 /* For now just very primitive -- we work just for the single exit condition,
2944 and are quite conservative about the possible overflows. TODO -- both of
2945 these can be improved. */
2946 exit = single_dom_exit (loop);
2947 if (!exit)
2948 return false;
2949 if (use->stmt != last_stmt (exit->src))
2950 return false;
2952 niter = &loop_data (loop)->niter;
2953 if (!niter->niter
2954 || !integer_nonzerop (niter->assumptions)
2955 || !integer_zerop (niter->may_be_zero))
2956 return false;
2958 if (exit->flags & EDGE_TRUE_VALUE)
2959 *compare = EQ_EXPR;
2960 else
2961 *compare = NE_EXPR;
2963 *bound = cand_value_at (loop, cand, use->stmt, niter->niter);
2965 /* Let us check there is not some problem with overflows, by checking that
2966 the number of iterations is unchanged. */
2967 base = cand->iv->base;
2968 type = TREE_TYPE (base);
2969 if (stmt_after_increment (loop, cand, use->stmt))
2970 base = fold (build2 (PLUS_EXPR, type, base, cand->iv->step));
2972 new_niter.niter = NULL_TREE;
2973 number_of_iterations_cond (TREE_TYPE (cand->iv->base), base,
2974 cand->iv->step, NE_EXPR, *bound, NULL_TREE,
2975 &new_niter);
2976 if (!new_niter.niter
2977 || !integer_nonzerop (new_niter.assumptions)
2978 || !integer_zerop (new_niter.may_be_zero))
2979 return false;
2981 wider_type = TREE_TYPE (new_niter.niter);
2982 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (TREE_TYPE (niter->niter)))
2983 wider_type = TREE_TYPE (niter->niter);
2984 if (!operand_equal_p (fold_convert (wider_type, niter->niter),
2985 fold_convert (wider_type, new_niter.niter), 0))
2986 return false;
2988 return true;
2991 /* Determines cost of basing replacement of USE on CAND in a condition. */
2993 static void
2994 determine_use_iv_cost_condition (struct ivopts_data *data,
2995 struct iv_use *use, struct iv_cand *cand)
2997 tree bound;
2998 enum tree_code compare;
3000 /* Only consider real candidates. */
3001 if (!cand->iv)
3003 set_use_iv_cost (data, use, cand, INFTY, NULL);
3004 return;
3007 if (may_eliminate_iv (data->current_loop, use, cand, &compare, &bound))
3009 bitmap depends_on = NULL;
3010 unsigned cost = force_var_cost (data, bound, &depends_on);
3012 set_use_iv_cost (data, use, cand, cost, depends_on);
3013 return;
3016 /* The induction variable elimination failed; just express the original
3017 giv. If it is compared with an invariant, note that we cannot get
3018 rid of it. */
3019 if (TREE_CODE (*use->op_p) == SSA_NAME)
3020 record_invariant (data, *use->op_p, true);
3021 else
3023 record_invariant (data, TREE_OPERAND (*use->op_p, 0), true);
3024 record_invariant (data, TREE_OPERAND (*use->op_p, 1), true);
3027 determine_use_iv_cost_generic (data, use, cand);
3030 /* Checks whether it is possible to replace the final value of USE by
3031 a direct computation. If so, the formula is stored to *VALUE. */
3033 static bool
3034 may_replace_final_value (struct loop *loop, struct iv_use *use, tree *value)
3036 edge exit;
3037 struct tree_niter_desc *niter;
3039 exit = single_dom_exit (loop);
3040 if (!exit)
3041 return false;
3043 gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
3044 bb_for_stmt (use->stmt)));
3046 niter = &loop_data (loop)->niter;
3047 if (!niter->niter
3048 || !operand_equal_p (niter->assumptions, boolean_true_node, 0)
3049 || !operand_equal_p (niter->may_be_zero, boolean_false_node, 0))
3050 return false;
3052 *value = iv_value (use->iv, niter->niter);
3054 return true;
3057 /* Determines cost of replacing final value of USE using CAND. */
3059 static void
3060 determine_use_iv_cost_outer (struct ivopts_data *data,
3061 struct iv_use *use, struct iv_cand *cand)
3063 bitmap depends_on;
3064 unsigned cost;
3065 edge exit;
3066 tree value;
3067 struct loop *loop = data->current_loop;
3069 if (!cand->iv)
3071 if (!may_replace_final_value (loop, use, &value))
3073 set_use_iv_cost (data, use, cand, INFTY, NULL);
3074 return;
3077 depends_on = NULL;
3078 cost = force_var_cost (data, value, &depends_on);
3080 cost /= AVG_LOOP_NITER (loop);
3082 set_use_iv_cost (data, use, cand, cost, depends_on);
3083 return;
3086 exit = single_dom_exit (loop);
3087 if (exit)
3089 /* If there is just a single exit, we may use value of the candidate
3090 after we take it to determine the value of use. */
3091 cost = get_computation_cost_at (data, use, cand, false, &depends_on,
3092 last_stmt (exit->src));
3093 if (cost != INFTY)
3094 cost /= AVG_LOOP_NITER (loop);
3096 else
3098 /* Otherwise we just need to compute the iv. */
3099 cost = get_computation_cost (data, use, cand, false, &depends_on);
3102 set_use_iv_cost (data, use, cand, cost, depends_on);
3105 /* Determines cost of basing replacement of USE on CAND. */
3107 static void
3108 determine_use_iv_cost (struct ivopts_data *data,
3109 struct iv_use *use, struct iv_cand *cand)
3111 switch (use->type)
3113 case USE_NONLINEAR_EXPR:
3114 determine_use_iv_cost_generic (data, use, cand);
3115 break;
3117 case USE_OUTER:
3118 determine_use_iv_cost_outer (data, use, cand);
3119 break;
3121 case USE_ADDRESS:
3122 determine_use_iv_cost_address (data, use, cand);
3123 break;
3125 case USE_COMPARE:
3126 determine_use_iv_cost_condition (data, use, cand);
3127 break;
3129 default:
3130 gcc_unreachable ();
3134 /* Determines costs of basing the use of the iv on an iv candidate. */
3136 static void
3137 determine_use_iv_costs (struct ivopts_data *data)
3139 unsigned i, j;
3140 struct iv_use *use;
3141 struct iv_cand *cand;
3143 data->consider_all_candidates = (n_iv_cands (data)
3144 <= CONSIDER_ALL_CANDIDATES_BOUND);
3146 alloc_use_cost_map (data);
3148 if (!data->consider_all_candidates)
3150 /* Add the important candidate entries. */
3151 for (j = 0; j < n_iv_cands (data); j++)
3153 cand = iv_cand (data, j);
3154 if (!cand->important)
3155 continue;
3156 for (i = 0; i < n_iv_uses (data); i++)
3158 use = iv_use (data, i);
3159 determine_use_iv_cost (data, use, cand);
3164 for (i = 0; i < n_iv_uses (data); i++)
3166 use = iv_use (data, i);
3168 if (data->consider_all_candidates)
3170 for (j = 0; j < n_iv_cands (data); j++)
3172 cand = iv_cand (data, j);
3173 determine_use_iv_cost (data, use, cand);
3176 else
3178 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j,
3180 cand = iv_cand (data, j);
3181 if (!cand->important)
3182 determine_use_iv_cost (data, use, cand);
3187 if (dump_file && (dump_flags & TDF_DETAILS))
3189 fprintf (dump_file, "Use-candidate costs:\n");
3191 for (i = 0; i < n_iv_uses (data); i++)
3193 use = iv_use (data, i);
3195 fprintf (dump_file, "Use %d:\n", i);
3196 fprintf (dump_file, " cand\tcost\tdepends on\n");
3197 for (j = 0; j < use->n_map_members; j++)
3199 if (!use->cost_map[j].cand
3200 || use->cost_map[j].cost == INFTY)
3201 continue;
3203 fprintf (dump_file, " %d\t%d\t",
3204 use->cost_map[j].cand->id,
3205 use->cost_map[j].cost);
3206 if (use->cost_map[j].depends_on)
3207 bitmap_print (dump_file,
3208 use->cost_map[j].depends_on, "","");
3209 fprintf (dump_file, "\n");
3212 fprintf (dump_file, "\n");
3214 fprintf (dump_file, "\n");
3218 /* Determines cost of the candidate CAND. */
3220 static void
3221 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3223 unsigned cost_base, cost_step;
3224 tree base, last;
3225 basic_block bb;
3227 if (!cand->iv)
3229 cand->cost = 0;
3230 return;
3233 /* There are two costs associated with the candidate -- its increment
3234 and its initialization. The second is almost negligible for any loop
3235 that rolls enough, so we take it just very little into account. */
3237 base = cand->iv->base;
3238 cost_base = force_var_cost (data, base, NULL);
3239 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3241 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3243 /* Prefer the original iv unless we may gain something by replacing it. */
3244 if (cand->pos == IP_ORIGINAL)
3245 cand->cost--;
3247 /* Prefer not to insert statements into latch unless there are some
3248 already (so that we do not create unnecessary jumps). */
3249 if (cand->pos == IP_END)
3251 bb = ip_end_pos (data->current_loop);
3252 last = last_stmt (bb);
3254 if (!last
3255 || TREE_CODE (last) == LABEL_EXPR)
3256 cand->cost++;
3260 /* Determines costs of computation of the candidates. */
3262 static void
3263 determine_iv_costs (struct ivopts_data *data)
3265 unsigned i;
3267 if (dump_file && (dump_flags & TDF_DETAILS))
3269 fprintf (dump_file, "Candidate costs:\n");
3270 fprintf (dump_file, " cand\tcost\n");
3273 for (i = 0; i < n_iv_cands (data); i++)
3275 struct iv_cand *cand = iv_cand (data, i);
3277 determine_iv_cost (data, cand);
3279 if (dump_file && (dump_flags & TDF_DETAILS))
3280 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
3283 if (dump_file && (dump_flags & TDF_DETAILS))
3284 fprintf (dump_file, "\n");
3287 /* Calculates cost for having SIZE induction variables. */
3289 static unsigned
3290 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3292 return global_cost_for_size (size,
3293 loop_data (data->current_loop)->regs_used,
3294 n_iv_uses (data));
3297 /* For each size of the induction variable set determine the penalty. */
3299 static void
3300 determine_set_costs (struct ivopts_data *data)
3302 unsigned j, n;
3303 tree phi, op;
3304 struct loop *loop = data->current_loop;
3306 /* We use the following model (definitely improvable, especially the
3307 cost function -- TODO):
3309 We estimate the number of registers available (using MD data), name it A.
3311 We estimate the number of registers used by the loop, name it U. This
3312 number is obtained as the number of loop phi nodes (not counting virtual
3313 registers and bivs) + the number of variables from outside of the loop.
3315 We set a reserve R (free regs that are used for temporary computations,
3316 etc.). For now the reserve is a constant 3.
3318 Let I be the number of induction variables.
3320 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3321 make a lot of ivs without a reason).
3322 -- if A - R < U + I <= A, the cost is I * PRES_COST
3323 -- if U + I > A, the cost is I * PRES_COST and
3324 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
3326 if (dump_file && (dump_flags & TDF_DETAILS))
3328 fprintf (dump_file, "Global costs:\n");
3329 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
3330 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
3331 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
3332 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
3335 n = 0;
3336 for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
3338 op = PHI_RESULT (phi);
3340 if (!is_gimple_reg (op))
3341 continue;
3343 if (get_iv (data, op))
3344 continue;
3346 n++;
3349 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j,
3351 struct version_info *info = ver_info (data, j);
3353 if (info->inv_id && info->has_nonlin_use)
3354 n++;
3357 loop_data (loop)->regs_used = n;
3358 if (dump_file && (dump_flags & TDF_DETAILS))
3359 fprintf (dump_file, " regs_used %d\n", n);
3361 if (dump_file && (dump_flags & TDF_DETAILS))
3363 fprintf (dump_file, " cost for size:\n");
3364 fprintf (dump_file, " ivs\tcost\n");
3365 for (j = 0; j <= 2 * target_avail_regs; j++)
3366 fprintf (dump_file, " %d\t%d\n", j,
3367 ivopts_global_cost_for_size (data, j));
3368 fprintf (dump_file, "\n");
3372 /* Finds a best candidate for USE and stores it to CAND. The candidates are
3373 taken from the set SOL and they may depend on invariants in the set INV.
3374 The really used candidate and invariants are noted to USED_IVS and
3375 USED_INV. */
3377 static unsigned
3378 find_best_candidate (struct ivopts_data *data,
3379 struct iv_use *use, bitmap sol, bitmap inv,
3380 bitmap used_ivs, bitmap used_inv, struct iv_cand **cand)
3382 unsigned c, d;
3383 unsigned best_cost = INFTY, cost;
3384 struct iv_cand *cnd = NULL, *acnd;
3385 bitmap depends_on = NULL, asol;
3387 if (data->consider_all_candidates)
3388 asol = sol;
3389 else
3391 asol = BITMAP_XMALLOC ();
3392 bitmap_a_and_b (asol, sol, use->related_cands);
3395 EXECUTE_IF_SET_IN_BITMAP (asol, 0, c,
3397 acnd = iv_cand (data, c);
3398 cost = get_use_iv_cost (data, use, acnd, &depends_on);
3400 if (cost == INFTY)
3401 goto next_cand;
3402 if (cost > best_cost)
3403 goto next_cand;
3404 if (cost == best_cost)
3406 /* Prefer the cheaper iv. */
3407 if (acnd->cost >= cnd->cost)
3408 goto next_cand;
3411 if (depends_on)
3413 EXECUTE_IF_AND_COMPL_IN_BITMAP (depends_on, inv, 0, d,
3414 goto next_cand);
3415 if (used_inv)
3416 bitmap_a_or_b (used_inv, used_inv, depends_on);
3419 cnd = acnd;
3420 best_cost = cost;
3421 next_cand: ;
3424 if (cnd && used_ivs)
3425 bitmap_set_bit (used_ivs, cnd->id);
3427 if (cand)
3428 *cand = cnd;
3430 if (!data->consider_all_candidates)
3431 BITMAP_XFREE (asol);
3433 return best_cost;
3436 /* Returns cost of set of ivs SOL + invariants INV. Removes unnecessary
3437 induction variable candidates and invariants from the sets. Only
3438 uses 0 .. MAX_USE - 1 are taken into account. */
3440 static unsigned
3441 set_cost_up_to (struct ivopts_data *data, bitmap sol, bitmap inv,
3442 unsigned max_use)
3444 unsigned i;
3445 unsigned cost = 0, size = 0, acost;
3446 struct iv_use *use;
3447 struct iv_cand *cand;
3448 bitmap used_ivs = BITMAP_XMALLOC (), used_inv = BITMAP_XMALLOC ();
3450 for (i = 0; i < max_use; i++)
3452 use = iv_use (data, i);
3453 acost = find_best_candidate (data, use, sol, inv,
3454 used_ivs, used_inv, NULL);
3455 if (acost == INFTY)
3457 BITMAP_XFREE (used_ivs);
3458 BITMAP_XFREE (used_inv);
3459 return INFTY;
3461 cost += acost;
3464 EXECUTE_IF_SET_IN_BITMAP (used_ivs, 0, i,
3466 cand = iv_cand (data, i);
3468 /* Do not count the pseudocandidates. */
3469 if (cand->iv)
3470 size++;
3472 cost += cand->cost;
3474 EXECUTE_IF_SET_IN_BITMAP (used_inv, 0, i, size++);
3475 cost += ivopts_global_cost_for_size (data, size);
3477 bitmap_copy (sol, used_ivs);
3478 bitmap_copy (inv, used_inv);
3480 BITMAP_XFREE (used_ivs);
3481 BITMAP_XFREE (used_inv);
3483 return cost;
3486 /* Computes cost of set of ivs SOL + invariants INV. Removes unnecessary
3487 induction variable candidates and invariants from the sets. */
3489 static unsigned
3490 set_cost (struct ivopts_data *data, bitmap sol, bitmap inv)
3492 return set_cost_up_to (data, sol, inv, n_iv_uses (data));
3495 /* Tries to extend the sets IVS and INV in the best possible way in order
3496 to express the USE. */
3498 static bool
3499 try_add_cand_for (struct ivopts_data *data, bitmap ivs, bitmap inv,
3500 struct iv_use *use)
3502 unsigned best_cost = set_cost_up_to (data, ivs, inv, use->id + 1), act_cost;
3503 bitmap best_ivs = BITMAP_XMALLOC ();
3504 bitmap best_inv = BITMAP_XMALLOC ();
3505 bitmap act_ivs = BITMAP_XMALLOC ();
3506 bitmap act_inv = BITMAP_XMALLOC ();
3507 unsigned i;
3508 struct cost_pair *cp;
3510 bitmap_copy (best_ivs, ivs);
3511 bitmap_copy (best_inv, inv);
3513 for (i = 0; i < use->n_map_members; i++)
3515 cp = use->cost_map + i;
3516 if (cp->cost == INFTY)
3517 continue;
3519 bitmap_copy (act_ivs, ivs);
3520 bitmap_set_bit (act_ivs, cp->cand->id);
3521 if (cp->depends_on)
3522 bitmap_a_or_b (act_inv, inv, cp->depends_on);
3523 else
3524 bitmap_copy (act_inv, inv);
3525 act_cost = set_cost_up_to (data, act_ivs, act_inv, use->id + 1);
3527 if (act_cost < best_cost)
3529 best_cost = act_cost;
3530 bitmap_copy (best_ivs, act_ivs);
3531 bitmap_copy (best_inv, act_inv);
3535 bitmap_copy (ivs, best_ivs);
3536 bitmap_copy (inv, best_inv);
3538 BITMAP_XFREE (best_ivs);
3539 BITMAP_XFREE (best_inv);
3540 BITMAP_XFREE (act_ivs);
3541 BITMAP_XFREE (act_inv);
3543 return (best_cost != INFTY);
3546 /* Finds an initial set of IVS and invariants INV. We do this by simply
3547 choosing the best candidate for each use. */
3549 static unsigned
3550 get_initial_solution (struct ivopts_data *data, bitmap ivs, bitmap inv)
3552 unsigned i;
3554 for (i = 0; i < n_iv_uses (data); i++)
3555 if (!try_add_cand_for (data, ivs, inv, iv_use (data, i)))
3556 return INFTY;
3558 return set_cost (data, ivs, inv);
3561 /* Tries to improve set of induction variables IVS and invariants INV to get
3562 it better than COST. */
3564 static bool
3565 try_improve_iv_set (struct ivopts_data *data,
3566 bitmap ivs, bitmap inv, unsigned *cost)
3568 unsigned i, acost;
3569 bitmap new_ivs = BITMAP_XMALLOC (), new_inv = BITMAP_XMALLOC ();
3570 bitmap best_new_ivs = NULL, best_new_inv = NULL;
3572 /* Try altering the set of induction variables by one. */
3573 for (i = 0; i < n_iv_cands (data); i++)
3575 bitmap_copy (new_ivs, ivs);
3576 bitmap_copy (new_inv, inv);
3578 if (bitmap_bit_p (ivs, i))
3579 bitmap_clear_bit (new_ivs, i);
3580 else
3581 bitmap_set_bit (new_ivs, i);
3583 acost = set_cost (data, new_ivs, new_inv);
3584 if (acost >= *cost)
3585 continue;
3587 if (!best_new_ivs)
3589 best_new_ivs = BITMAP_XMALLOC ();
3590 best_new_inv = BITMAP_XMALLOC ();
3593 *cost = acost;
3594 bitmap_copy (best_new_ivs, new_ivs);
3595 bitmap_copy (best_new_inv, new_inv);
3598 /* Ditto for invariants. */
3599 for (i = 1; i <= data->max_inv_id; i++)
3601 if (ver_info (data, i)->has_nonlin_use)
3602 continue;
3604 bitmap_copy (new_ivs, ivs);
3605 bitmap_copy (new_inv, inv);
3607 if (bitmap_bit_p (inv, i))
3608 bitmap_clear_bit (new_inv, i);
3609 else
3610 bitmap_set_bit (new_inv, i);
3612 acost = set_cost (data, new_ivs, new_inv);
3613 if (acost >= *cost)
3614 continue;
3616 if (!best_new_ivs)
3618 best_new_ivs = BITMAP_XMALLOC ();
3619 best_new_inv = BITMAP_XMALLOC ();
3622 *cost = acost;
3623 bitmap_copy (best_new_ivs, new_ivs);
3624 bitmap_copy (best_new_inv, new_inv);
3627 BITMAP_XFREE (new_ivs);
3628 BITMAP_XFREE (new_inv);
3630 if (!best_new_ivs)
3631 return false;
3633 bitmap_copy (ivs, best_new_ivs);
3634 bitmap_copy (inv, best_new_inv);
3635 BITMAP_XFREE (best_new_ivs);
3636 BITMAP_XFREE (best_new_inv);
3637 return true;
3640 /* Attempts to find the optimal set of induction variables. We do simple
3641 greedy heuristic -- we try to replace at most one candidate in the selected
3642 solution and remove the unused ivs while this improves the cost. */
3644 static bitmap
3645 find_optimal_iv_set (struct ivopts_data *data)
3647 unsigned cost, i;
3648 bitmap set = BITMAP_XMALLOC ();
3649 bitmap inv = BITMAP_XMALLOC ();
3650 struct iv_use *use;
3652 /* Set the upper bound. */
3653 cost = get_initial_solution (data, set, inv);
3654 if (cost == INFTY)
3656 if (dump_file && (dump_flags & TDF_DETAILS))
3657 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
3658 BITMAP_XFREE (inv);
3659 BITMAP_XFREE (set);
3660 return NULL;
3663 if (dump_file && (dump_flags & TDF_DETAILS))
3665 fprintf (dump_file, "Initial set of candidates (cost %d): ", cost);
3666 bitmap_print (dump_file, set, "", "");
3667 fprintf (dump_file, " invariants ");
3668 bitmap_print (dump_file, inv, "", "");
3669 fprintf (dump_file, "\n");
3672 while (try_improve_iv_set (data, set, inv, &cost))
3674 if (dump_file && (dump_flags & TDF_DETAILS))
3676 fprintf (dump_file, "Improved to (cost %d): ", cost);
3677 bitmap_print (dump_file, set, "", "");
3678 fprintf (dump_file, " invariants ");
3679 bitmap_print (dump_file, inv, "", "");
3680 fprintf (dump_file, "\n");
3684 if (dump_file && (dump_flags & TDF_DETAILS))
3685 fprintf (dump_file, "Final cost %d\n\n", cost);
3687 for (i = 0; i < n_iv_uses (data); i++)
3689 use = iv_use (data, i);
3690 find_best_candidate (data, use, set, inv, NULL, NULL, &use->selected);
3693 BITMAP_XFREE (inv);
3695 return set;
3698 /* Creates a new induction variable corresponding to CAND. */
3700 static void
3701 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
3703 block_stmt_iterator incr_pos;
3704 tree base;
3705 bool after = false;
3707 if (!cand->iv)
3708 return;
3710 switch (cand->pos)
3712 case IP_NORMAL:
3713 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
3714 break;
3716 case IP_END:
3717 incr_pos = bsi_last (ip_end_pos (data->current_loop));
3718 after = true;
3719 break;
3721 case IP_ORIGINAL:
3722 /* Mark that the iv is preserved. */
3723 name_info (data, cand->var_before)->preserve_biv = true;
3724 name_info (data, cand->var_after)->preserve_biv = true;
3726 /* Rewrite the increment so that it uses var_before directly. */
3727 find_interesting_uses_op (data, cand->var_after)->selected = cand;
3729 return;
3732 gimple_add_tmp_var (cand->var_before);
3733 add_referenced_tmp_var (cand->var_before);
3735 base = unshare_expr (cand->iv->base);
3737 create_iv (base, cand->iv->step, cand->var_before, data->current_loop,
3738 &incr_pos, after, &cand->var_before, &cand->var_after);
3741 /* Creates new induction variables described in SET. */
3743 static void
3744 create_new_ivs (struct ivopts_data *data, bitmap set)
3746 unsigned i;
3747 struct iv_cand *cand;
3749 EXECUTE_IF_SET_IN_BITMAP (set, 0, i,
3751 cand = iv_cand (data, i);
3752 create_new_iv (data, cand);
3756 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
3757 is true, remove also the ssa name defined by the statement. */
3759 static void
3760 remove_statement (tree stmt, bool including_defined_name)
3762 if (TREE_CODE (stmt) == PHI_NODE)
3764 if (!including_defined_name)
3766 /* Prevent the ssa name defined by the statement from being removed. */
3767 SET_PHI_RESULT (stmt, NULL);
3769 remove_phi_node (stmt, NULL_TREE, bb_for_stmt (stmt));
3771 else
3773 block_stmt_iterator bsi = stmt_for_bsi (stmt);
3775 bsi_remove (&bsi);
3779 /* Rewrites USE (definition of iv used in a nonlinear expression)
3780 using candidate CAND. */
3782 static void
3783 rewrite_use_nonlinear_expr (struct ivopts_data *data,
3784 struct iv_use *use, struct iv_cand *cand)
3786 tree comp = unshare_expr (get_computation (data->current_loop,
3787 use, cand));
3788 tree op, stmts, tgt, ass;
3789 block_stmt_iterator bsi, pbsi;
3791 switch (TREE_CODE (use->stmt))
3793 case PHI_NODE:
3794 tgt = PHI_RESULT (use->stmt);
3796 /* If we should keep the biv, do not replace it. */
3797 if (name_info (data, tgt)->preserve_biv)
3798 return;
3800 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
3801 while (!bsi_end_p (pbsi)
3802 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
3804 bsi = pbsi;
3805 bsi_next (&pbsi);
3807 break;
3809 case MODIFY_EXPR:
3810 tgt = TREE_OPERAND (use->stmt, 0);
3811 bsi = stmt_for_bsi (use->stmt);
3812 break;
3814 default:
3815 gcc_unreachable ();
3818 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
3820 if (TREE_CODE (use->stmt) == PHI_NODE)
3822 if (stmts)
3823 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
3824 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
3825 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
3826 remove_statement (use->stmt, false);
3827 SSA_NAME_DEF_STMT (tgt) = ass;
3829 else
3831 if (stmts)
3832 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
3833 TREE_OPERAND (use->stmt, 1) = op;
3837 /* Replaces ssa name in index IDX by its basic variable. Callback for
3838 for_each_index. */
3840 static bool
3841 idx_remove_ssa_names (tree base ATTRIBUTE_UNUSED, tree *idx,
3842 void *data ATTRIBUTE_UNUSED)
3844 if (TREE_CODE (*idx) == SSA_NAME)
3845 *idx = SSA_NAME_VAR (*idx);
3846 return true;
3849 /* Unshares REF and replaces ssa names inside it by their basic variables. */
3851 static tree
3852 unshare_and_remove_ssa_names (tree ref)
3854 ref = unshare_expr (ref);
3855 for_each_index (&ref, idx_remove_ssa_names, NULL);
3857 return ref;
3860 /* Rewrites base of memory access OP with expression WITH in statement
3861 pointed to by BSI. */
3863 static void
3864 rewrite_address_base (block_stmt_iterator *bsi, tree *op, tree with)
3866 tree var = get_base_address (*op), new_var, new_name, copy, name;
3867 tree orig;
3869 if (!var || TREE_CODE (with) != SSA_NAME)
3870 goto do_rewrite;
3872 if (TREE_CODE (var) == INDIRECT_REF)
3873 var = TREE_OPERAND (var, 0);
3874 if (TREE_CODE (var) == SSA_NAME)
3876 name = var;
3877 var = SSA_NAME_VAR (var);
3879 else if (DECL_P (var))
3880 name = NULL_TREE;
3881 else
3882 goto do_rewrite;
3884 if (var_ann (var)->type_mem_tag)
3885 var = var_ann (var)->type_mem_tag;
3887 /* We need to add a memory tag for the variable. But we do not want
3888 to add it to the temporary used for the computations, since this leads
3889 to problems in redundancy elimination when there are common parts
3890 in two computations referring to the different arrays. So we copy
3891 the variable to a new temporary. */
3892 copy = build2 (MODIFY_EXPR, void_type_node, NULL_TREE, with);
3893 if (name)
3894 new_name = duplicate_ssa_name (name, copy);
3895 else
3897 new_var = create_tmp_var (TREE_TYPE (with), "ruatmp");
3898 add_referenced_tmp_var (new_var);
3899 var_ann (new_var)->type_mem_tag = var;
3900 new_name = make_ssa_name (new_var, copy);
3902 TREE_OPERAND (copy, 0) = new_name;
3903 bsi_insert_before (bsi, copy, BSI_SAME_STMT);
3904 with = new_name;
3906 do_rewrite:
3908 orig = NULL_TREE;
3909 if (TREE_CODE (*op) == INDIRECT_REF)
3910 orig = REF_ORIGINAL (*op);
3911 if (!orig)
3912 orig = unshare_and_remove_ssa_names (*op);
3914 *op = build1 (INDIRECT_REF, TREE_TYPE (*op), with);
3915 /* Record the original reference, for purposes of alias analysis. */
3916 REF_ORIGINAL (*op) = orig;
3919 /* Rewrites USE (address that is an iv) using candidate CAND. */
3921 static void
3922 rewrite_use_address (struct ivopts_data *data,
3923 struct iv_use *use, struct iv_cand *cand)
3925 tree comp = unshare_expr (get_computation (data->current_loop,
3926 use, cand));
3927 block_stmt_iterator bsi = stmt_for_bsi (use->stmt);
3928 tree stmts;
3929 tree op = force_gimple_operand (comp, &stmts, true, NULL_TREE);
3931 if (stmts)
3932 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
3934 rewrite_address_base (&bsi, use->op_p, op);
3937 /* Rewrites USE (the condition such that one of the arguments is an iv) using
3938 candidate CAND. */
3940 static void
3941 rewrite_use_compare (struct ivopts_data *data,
3942 struct iv_use *use, struct iv_cand *cand)
3944 tree comp;
3945 tree *op_p, cond, op, stmts, bound;
3946 block_stmt_iterator bsi = stmt_for_bsi (use->stmt);
3947 enum tree_code compare;
3949 if (may_eliminate_iv (data->current_loop,
3950 use, cand, &compare, &bound))
3952 op = force_gimple_operand (unshare_expr (bound), &stmts,
3953 true, NULL_TREE);
3955 if (stmts)
3956 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
3958 *use->op_p = build2 (compare, boolean_type_node,
3959 var_at_stmt (data->current_loop,
3960 cand, use->stmt), op);
3961 modify_stmt (use->stmt);
3962 return;
3965 /* The induction variable elimination failed; just express the original
3966 giv. */
3967 comp = unshare_expr (get_computation (data->current_loop, use, cand));
3969 cond = *use->op_p;
3970 op_p = &TREE_OPERAND (cond, 0);
3971 if (TREE_CODE (*op_p) != SSA_NAME
3972 || zero_p (get_iv (data, *op_p)->step))
3973 op_p = &TREE_OPERAND (cond, 1);
3975 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
3976 if (stmts)
3977 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
3979 *op_p = op;
3982 /* Ensure that operand *OP_P may be used at the end of EXIT without
3983 violating loop closed ssa form. */
3985 static void
3986 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
3988 basic_block def_bb;
3989 struct loop *def_loop;
3990 tree phi, use;
3992 use = USE_FROM_PTR (op_p);
3993 if (TREE_CODE (use) != SSA_NAME)
3994 return;
3996 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
3997 if (!def_bb)
3998 return;
4000 def_loop = def_bb->loop_father;
4001 if (flow_bb_inside_loop_p (def_loop, exit->dest))
4002 return;
4004 /* Try finding a phi node that copies the value out of the loop. */
4005 for (phi = phi_nodes (exit->dest); phi; phi = TREE_CHAIN (phi))
4006 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
4007 break;
4009 if (!phi)
4011 /* Create such a phi node. */
4012 tree new_name = duplicate_ssa_name (use, NULL);
4014 phi = create_phi_node (new_name, exit->dest);
4015 SSA_NAME_DEF_STMT (new_name) = phi;
4016 add_phi_arg (&phi, use, exit);
4019 SET_USE (op_p, PHI_RESULT (phi));
4022 /* Ensure that operands of STMT may be used at the end of EXIT without
4023 violating loop closed ssa form. */
4025 static void
4026 protect_loop_closed_ssa_form (edge exit, tree stmt)
4028 use_optype uses;
4029 vuse_optype vuses;
4030 v_may_def_optype v_may_defs;
4031 unsigned i;
4033 get_stmt_operands (stmt);
4035 uses = STMT_USE_OPS (stmt);
4036 for (i = 0; i < NUM_USES (uses); i++)
4037 protect_loop_closed_ssa_form_use (exit, USE_OP_PTR (uses, i));
4039 vuses = STMT_VUSE_OPS (stmt);
4040 for (i = 0; i < NUM_VUSES (vuses); i++)
4041 protect_loop_closed_ssa_form_use (exit, VUSE_OP_PTR (vuses, i));
4043 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4044 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
4045 protect_loop_closed_ssa_form_use (exit, V_MAY_DEF_OP_PTR (v_may_defs, i));
4048 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
4049 so that they are emitted on the correct place, and so that the loop closed
4050 ssa form is preserved. */
4052 static void
4053 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
4055 tree_stmt_iterator tsi;
4056 block_stmt_iterator bsi;
4057 tree phi, stmt, def, next;
4059 if (EDGE_COUNT (exit->dest->preds) > 1)
4060 split_loop_exit_edge (exit);
4062 if (TREE_CODE (stmts) == STATEMENT_LIST)
4064 for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
4065 protect_loop_closed_ssa_form (exit, tsi_stmt (tsi));
4067 else
4068 protect_loop_closed_ssa_form (exit, stmts);
4070 /* Ensure there is label in exit->dest, so that we can
4071 insert after it. */
4072 tree_block_label (exit->dest);
4073 bsi = bsi_after_labels (exit->dest);
4074 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4076 if (!op)
4077 return;
4079 for (phi = phi_nodes (exit->dest); phi; phi = next)
4081 next = TREE_CHAIN (phi);
4083 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
4085 def = PHI_RESULT (phi);
4086 remove_statement (phi, false);
4087 stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
4088 def, op);
4089 SSA_NAME_DEF_STMT (def) = stmt;
4090 bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
4095 /* Rewrites the final value of USE (that is only needed outside of the loop)
4096 using candidate CAND. */
4098 static void
4099 rewrite_use_outer (struct ivopts_data *data,
4100 struct iv_use *use, struct iv_cand *cand)
4102 edge exit;
4103 tree value, op, stmts, tgt;
4104 tree phi;
4106 switch (TREE_CODE (use->stmt))
4108 case PHI_NODE:
4109 tgt = PHI_RESULT (use->stmt);
4110 break;
4111 case MODIFY_EXPR:
4112 tgt = TREE_OPERAND (use->stmt, 0);
4113 break;
4114 default:
4115 gcc_unreachable ();
4118 exit = single_dom_exit (data->current_loop);
4120 if (exit)
4122 if (!cand->iv)
4124 bool ok = may_replace_final_value (data->current_loop, use, &value);
4125 gcc_assert (ok);
4127 else
4128 value = get_computation_at (data->current_loop,
4129 use, cand, last_stmt (exit->src));
4131 op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
4133 /* If we will preserve the iv anyway and we would need to perform
4134 some computation to replace the final value, do nothing. */
4135 if (stmts && name_info (data, tgt)->preserve_biv)
4136 return;
4138 for (phi = phi_nodes (exit->dest); phi; phi = TREE_CHAIN (phi))
4140 use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
4142 if (USE_FROM_PTR (use_p) == tgt)
4143 SET_USE (use_p, op);
4146 if (stmts)
4147 compute_phi_arg_on_exit (exit, stmts, op);
4149 /* Enable removal of the statement. We cannot remove it directly,
4150 since we may still need the aliasing information attached to the
4151 ssa name defined by it. */
4152 name_info (data, tgt)->iv->have_use_for = false;
4153 return;
4156 /* If the variable is going to be preserved anyway, there is nothing to
4157 do. */
4158 if (name_info (data, tgt)->preserve_biv)
4159 return;
4161 /* Otherwise we just need to compute the iv. */
4162 rewrite_use_nonlinear_expr (data, use, cand);
4165 /* Rewrites USE using candidate CAND. */
4167 static void
4168 rewrite_use (struct ivopts_data *data,
4169 struct iv_use *use, struct iv_cand *cand)
4171 switch (use->type)
4173 case USE_NONLINEAR_EXPR:
4174 rewrite_use_nonlinear_expr (data, use, cand);
4175 break;
4177 case USE_OUTER:
4178 rewrite_use_outer (data, use, cand);
4179 break;
4181 case USE_ADDRESS:
4182 rewrite_use_address (data, use, cand);
4183 break;
4185 case USE_COMPARE:
4186 rewrite_use_compare (data, use, cand);
4187 break;
4189 default:
4190 gcc_unreachable ();
4192 modify_stmt (use->stmt);
4195 /* Rewrite the uses using the selected induction variables. */
4197 static void
4198 rewrite_uses (struct ivopts_data *data)
4200 unsigned i;
4201 struct iv_cand *cand;
4202 struct iv_use *use;
4204 for (i = 0; i < n_iv_uses (data); i++)
4206 use = iv_use (data, i);
4207 cand = use->selected;
4208 gcc_assert (cand);
4210 rewrite_use (data, use, cand);
4214 /* Removes the ivs that are not used after rewriting. */
4216 static void
4217 remove_unused_ivs (struct ivopts_data *data)
4219 unsigned j;
4221 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j,
4223 struct version_info *info;
4225 info = ver_info (data, j);
4226 if (info->iv
4227 && !zero_p (info->iv->step)
4228 && !info->inv_id
4229 && !info->iv->have_use_for
4230 && !info->preserve_biv)
4231 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
4235 /* Frees data allocated by the optimization of a single loop. */
4237 static void
4238 free_loop_data (struct ivopts_data *data)
4240 unsigned i, j;
4242 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i,
4244 struct version_info *info;
4246 info = ver_info (data, i);
4247 if (info->iv)
4248 free (info->iv);
4249 info->iv = NULL;
4250 info->has_nonlin_use = false;
4251 info->preserve_biv = false;
4252 info->inv_id = 0;
4254 bitmap_clear (data->relevant);
4256 for (i = 0; i < n_iv_uses (data); i++)
4258 struct iv_use *use = iv_use (data, i);
4260 free (use->iv);
4261 BITMAP_XFREE (use->related_cands);
4262 for (j = 0; j < use->n_map_members; j++)
4263 if (use->cost_map[j].depends_on)
4264 BITMAP_XFREE (use->cost_map[j].depends_on);
4265 free (use->cost_map);
4266 free (use);
4268 VARRAY_POP_ALL (data->iv_uses);
4270 for (i = 0; i < n_iv_cands (data); i++)
4272 struct iv_cand *cand = iv_cand (data, i);
4274 if (cand->iv)
4275 free (cand->iv);
4276 free (cand);
4278 VARRAY_POP_ALL (data->iv_candidates);
4280 if (data->version_info_size < num_ssa_names)
4282 data->version_info_size = 2 * num_ssa_names;
4283 free (data->version_info);
4284 data->version_info = xcalloc (data->version_info_size,
4285 sizeof (struct version_info));
4288 data->max_inv_id = 0;
4290 for (i = 0; i < VARRAY_ACTIVE_SIZE (decl_rtl_to_reset); i++)
4292 tree obj = VARRAY_GENERIC_PTR_NOGC (decl_rtl_to_reset, i);
4294 SET_DECL_RTL (obj, NULL_RTX);
4296 VARRAY_POP_ALL (decl_rtl_to_reset);
4299 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
4300 loop tree. */
4302 static void
4303 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
4305 unsigned i;
4307 for (i = 1; i < loops->num; i++)
4308 if (loops->parray[i])
4310 free (loops->parray[i]->aux);
4311 loops->parray[i]->aux = NULL;
4314 free_loop_data (data);
4315 free (data->version_info);
4316 BITMAP_XFREE (data->relevant);
4318 VARRAY_FREE (decl_rtl_to_reset);
4319 VARRAY_FREE (data->iv_uses);
4320 VARRAY_FREE (data->iv_candidates);
4323 /* Optimizes the LOOP. Returns true if anything changed. */
4325 static bool
4326 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
4328 bool changed = false;
4329 bitmap iv_set;
4330 edge exit;
4332 data->current_loop = loop;
4334 if (dump_file && (dump_flags & TDF_DETAILS))
4336 fprintf (dump_file, "Processing loop %d\n", loop->num);
4338 exit = single_dom_exit (loop);
4339 if (exit)
4341 fprintf (dump_file, " single exit %d -> %d, exit condition ",
4342 exit->src->index, exit->dest->index);
4343 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
4344 fprintf (dump_file, "\n");
4347 fprintf (dump_file, "\n");
4350 /* For each ssa name determines whether it behaves as an induction variable
4351 in some loop. */
4352 if (!find_induction_variables (data))
4353 goto finish;
4355 /* Finds interesting uses (item 1). */
4356 find_interesting_uses (data);
4357 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
4358 goto finish;
4360 /* Finds candidates for the induction variables (item 2). */
4361 find_iv_candidates (data);
4363 /* Calculates the costs (item 3, part 1). */
4364 determine_use_iv_costs (data);
4365 determine_iv_costs (data);
4366 determine_set_costs (data);
4368 /* Find the optimal set of induction variables (item 3, part 2). */
4369 iv_set = find_optimal_iv_set (data);
4370 if (!iv_set)
4371 goto finish;
4372 changed = true;
4374 /* Create the new induction variables (item 4, part 1). */
4375 create_new_ivs (data, iv_set);
4377 /* Rewrite the uses (item 4, part 2). */
4378 rewrite_uses (data);
4380 /* Remove the ivs that are unused after rewriting. */
4381 remove_unused_ivs (data);
4383 loop_commit_inserts ();
4385 BITMAP_XFREE (iv_set);
4387 /* We have changed the structure of induction variables; it might happen
4388 that definitions in the scev database refer to some of them that were
4389 eliminated. */
4390 scev_reset ();
4392 finish:
4393 free_loop_data (data);
4395 return changed;
4398 /* Main entry point. Optimizes induction variables in LOOPS. */
4400 void
4401 tree_ssa_iv_optimize (struct loops *loops)
4403 struct loop *loop;
4404 struct ivopts_data data;
4406 tree_ssa_iv_optimize_init (loops, &data);
4408 /* Optimize the loops starting with the innermost ones. */
4409 loop = loops->tree_root;
4410 while (loop->inner)
4411 loop = loop->inner;
4413 #ifdef ENABLE_CHECKING
4414 verify_loop_closed_ssa ();
4415 verify_stmts ();
4416 #endif
4418 /* Scan the loops, inner ones first. */
4419 while (loop != loops->tree_root)
4421 if (dump_file && (dump_flags & TDF_DETAILS))
4422 flow_loop_dump (loop, dump_file, NULL, 1);
4423 if (tree_ssa_iv_optimize_loop (&data, loop))
4425 #ifdef ENABLE_CHECKING
4426 verify_loop_closed_ssa ();
4427 verify_stmts ();
4428 #endif
4431 if (loop->next)
4433 loop = loop->next;
4434 while (loop->inner)
4435 loop = loop->inner;
4437 else
4438 loop = loop->outer;
4441 tree_ssa_iv_optimize_finalize (loops, &data);