1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
67 #include "coretypes.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
82 #include "tree-pass.h"
84 #include "insn-config.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
92 /* The infinite cost. */
93 #define INFTY 10000000
95 /* The expected number of loop iterations. TODO -- use profiling instead of
97 #define AVG_LOOP_NITER(LOOP) 5
99 /* Just to shorten the ugly names. */
100 #define EXEC_BINARY nondestructive_fold_binary_to_constant
101 #define EXEC_UNARY nondestructive_fold_unary_to_constant
103 /* Representation of the induction variable. */
106 tree base
; /* Initial value of the iv. */
107 tree base_object
; /* A memory object to that the induction variable points. */
108 tree step
; /* Step of the iv (constant only). */
109 tree ssa_name
; /* The ssa name with the value. */
110 bool biv_p
; /* Is it a biv? */
111 bool have_use_for
; /* Do we already have a use for it? */
112 unsigned use_id
; /* The identifier in the use if it is the case. */
115 /* Per-ssa version information (induction variable descriptions, etc.). */
118 tree name
; /* The ssa name. */
119 struct iv
*iv
; /* Induction variable description. */
120 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
121 an expression that is not an induction variable. */
122 unsigned inv_id
; /* Id of an invariant. */
123 bool preserve_biv
; /* For the original biv, whether to preserve it. */
126 /* Information attached to loop. */
129 struct tree_niter_desc niter
;
130 /* Number of iterations. */
132 unsigned regs_used
; /* Number of registers used. */
138 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
139 USE_OUTER
, /* The induction variable is used outside the loop. */
140 USE_ADDRESS
, /* Use in an address. */
141 USE_COMPARE
/* Use is a compare. */
144 /* The candidate - cost pair. */
147 struct iv_cand
*cand
; /* The candidate. */
148 unsigned cost
; /* The cost. */
149 bitmap depends_on
; /* The list of invariants that have to be
156 unsigned id
; /* The id of the use. */
157 enum use_type type
; /* Type of the use. */
158 struct iv
*iv
; /* The induction variable it is based on. */
159 tree stmt
; /* Statement in that it occurs. */
160 tree
*op_p
; /* The place where it occurs. */
161 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
164 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
165 struct cost_pair
*cost_map
;
166 /* The costs wrto the iv candidates. */
168 struct iv_cand
*selected
;
169 /* The selected candidate. */
172 /* The position where the iv is computed. */
175 IP_NORMAL
, /* At the end, just before the exit condition. */
176 IP_END
, /* At the end of the latch block. */
177 IP_ORIGINAL
/* The original biv. */
180 /* The induction variable candidate. */
183 unsigned id
; /* The number of the candidate. */
184 bool important
; /* Whether this is an "important" candidate, i.e. such
185 that it should be considered by all uses. */
186 enum iv_position pos
; /* Where it is computed. */
187 tree incremented_at
; /* For original biv, the statement where it is
189 tree var_before
; /* The variable used for it before increment. */
190 tree var_after
; /* The variable used for it after increment. */
191 struct iv
*iv
; /* The value of the candidate. NULL for
192 "pseudocandidate" used to indicate the possibility
193 to replace the final value of an iv by direct
194 computation of the value. */
195 unsigned cost
; /* Cost of the candidate. */
198 /* The data used by the induction variable optimizations. */
202 /* The currently optimized loop. */
203 struct loop
*current_loop
;
205 /* The size of version_info array allocated. */
206 unsigned version_info_size
;
208 /* The array of information for the ssa names. */
209 struct version_info
*version_info
;
211 /* The bitmap of indices in version_info whose value was changed. */
214 /* The maximum invariant id. */
217 /* The uses of induction variables. */
220 /* The candidates. */
221 varray_type iv_candidates
;
223 /* A bitmap of important candidates. */
224 bitmap important_candidates
;
226 /* Whether to consider just related and important candidates when replacing a
228 bool consider_all_candidates
;
231 /* An assignment of iv candidates to uses. */
235 /* The number of uses covered by the assignment. */
238 /* Number of uses that cannot be expressed by the candidates in the set. */
241 /* Candidate assigned to a use, together with the related costs. */
242 struct cost_pair
**cand_for_use
;
244 /* Number of times each candidate is used. */
245 unsigned *n_cand_uses
;
247 /* The candidates used. */
250 /* Total number of registers needed. */
253 /* Total cost of expressing uses. */
254 unsigned cand_use_cost
;
256 /* Total cost of candidates. */
259 /* Number of times each invariant is used. */
260 unsigned *n_invariant_uses
;
262 /* Total cost of the assignment. */
266 /* Difference of two iv candidate assignments. */
273 /* An old assignment (for rollback purposes). */
274 struct cost_pair
*old_cp
;
276 /* A new assignment. */
277 struct cost_pair
*new_cp
;
279 /* Next change in the list. */
280 struct iv_ca_delta
*next_change
;
283 /* Bound on number of candidates below that all candidates are considered. */
285 #define CONSIDER_ALL_CANDIDATES_BOUND \
286 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
288 /* If there are more iv occurrences, we just give up (it is quite unlikely that
289 optimizing such a loop would help, and it would take ages). */
291 #define MAX_CONSIDERED_USES \
292 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
294 /* The list of trees for that the decl_rtl field must be reset is stored
297 static varray_type decl_rtl_to_reset
;
299 /* Number of uses recorded in DATA. */
301 static inline unsigned
302 n_iv_uses (struct ivopts_data
*data
)
304 return VARRAY_ACTIVE_SIZE (data
->iv_uses
);
307 /* Ith use recorded in DATA. */
309 static inline struct iv_use
*
310 iv_use (struct ivopts_data
*data
, unsigned i
)
312 return VARRAY_GENERIC_PTR_NOGC (data
->iv_uses
, i
);
315 /* Number of candidates recorded in DATA. */
317 static inline unsigned
318 n_iv_cands (struct ivopts_data
*data
)
320 return VARRAY_ACTIVE_SIZE (data
->iv_candidates
);
323 /* Ith candidate recorded in DATA. */
325 static inline struct iv_cand
*
326 iv_cand (struct ivopts_data
*data
, unsigned i
)
328 return VARRAY_GENERIC_PTR_NOGC (data
->iv_candidates
, i
);
331 /* The data for LOOP. */
333 static inline struct loop_data
*
334 loop_data (struct loop
*loop
)
339 /* The single loop exit if it dominates the latch, NULL otherwise. */
342 single_dom_exit (struct loop
*loop
)
344 edge exit
= loop
->single_exit
;
349 if (!just_once_each_iteration_p (loop
, exit
->src
))
355 /* Dumps information about the induction variable IV to FILE. */
357 extern void dump_iv (FILE *, struct iv
*);
359 dump_iv (FILE *file
, struct iv
*iv
)
363 fprintf (file
, "ssa name ");
364 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
365 fprintf (file
, "\n");
368 fprintf (file
, " type ");
369 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
370 fprintf (file
, "\n");
374 fprintf (file
, " base ");
375 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
376 fprintf (file
, "\n");
378 fprintf (file
, " step ");
379 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
380 fprintf (file
, "\n");
384 fprintf (file
, " invariant ");
385 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
386 fprintf (file
, "\n");
391 fprintf (file
, " base object ");
392 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
393 fprintf (file
, "\n");
397 fprintf (file
, " is a biv\n");
400 /* Dumps information about the USE to FILE. */
402 extern void dump_use (FILE *, struct iv_use
*);
404 dump_use (FILE *file
, struct iv_use
*use
)
406 fprintf (file
, "use %d\n", use
->id
);
410 case USE_NONLINEAR_EXPR
:
411 fprintf (file
, " generic\n");
415 fprintf (file
, " outside\n");
419 fprintf (file
, " address\n");
423 fprintf (file
, " compare\n");
430 fprintf (file
, " in statement ");
431 print_generic_expr (file
, use
->stmt
, TDF_SLIM
);
432 fprintf (file
, "\n");
434 fprintf (file
, " at position ");
436 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
437 fprintf (file
, "\n");
439 dump_iv (file
, use
->iv
);
441 fprintf (file
, " related candidates ");
442 dump_bitmap (file
, use
->related_cands
);
445 /* Dumps information about the uses to FILE. */
447 extern void dump_uses (FILE *, struct ivopts_data
*);
449 dump_uses (FILE *file
, struct ivopts_data
*data
)
454 for (i
= 0; i
< n_iv_uses (data
); i
++)
456 use
= iv_use (data
, i
);
458 dump_use (file
, use
);
459 fprintf (file
, "\n");
463 /* Dumps information about induction variable candidate CAND to FILE. */
465 extern void dump_cand (FILE *, struct iv_cand
*);
467 dump_cand (FILE *file
, struct iv_cand
*cand
)
469 struct iv
*iv
= cand
->iv
;
471 fprintf (file
, "candidate %d%s\n",
472 cand
->id
, cand
->important
? " (important)" : "");
476 fprintf (file
, " final value replacement\n");
483 fprintf (file
, " incremented before exit test\n");
487 fprintf (file
, " incremented at end\n");
491 fprintf (file
, " original biv\n");
498 /* Returns the info for ssa version VER. */
500 static inline struct version_info
*
501 ver_info (struct ivopts_data
*data
, unsigned ver
)
503 return data
->version_info
+ ver
;
506 /* Returns the info for ssa name NAME. */
508 static inline struct version_info
*
509 name_info (struct ivopts_data
*data
, tree name
)
511 return ver_info (data
, SSA_NAME_VERSION (name
));
514 /* Checks whether there exists number X such that X * B = A, counting modulo
518 divide (unsigned bits
, unsigned HOST_WIDE_INT a
, unsigned HOST_WIDE_INT b
,
521 unsigned HOST_WIDE_INT mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
522 unsigned HOST_WIDE_INT inv
, ex
, val
;
528 /* First divide the whole equation by 2 as long as possible. */
529 while (!(a
& 1) && !(b
& 1))
539 /* If b is still even, a is odd and there is no such x. */
543 /* Find the inverse of b. We compute it as
544 b^(2^(bits - 1) - 1) (mod 2^bits). */
547 for (i
= 0; i
< bits
- 1; i
++)
549 inv
= (inv
* ex
) & mask
;
550 ex
= (ex
* ex
) & mask
;
553 val
= (a
* inv
) & mask
;
555 gcc_assert (((val
* b
) & mask
) == a
);
557 if ((val
>> (bits
- 1)) & 1)
565 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
569 stmt_after_ip_normal_pos (struct loop
*loop
, tree stmt
)
571 basic_block bb
= ip_normal_pos (loop
), sbb
= bb_for_stmt (stmt
);
575 if (sbb
== loop
->latch
)
581 return stmt
== last_stmt (bb
);
584 /* Returns true if STMT if after the place where the original induction
585 variable CAND is incremented. */
588 stmt_after_ip_original_pos (struct iv_cand
*cand
, tree stmt
)
590 basic_block cand_bb
= bb_for_stmt (cand
->incremented_at
);
591 basic_block stmt_bb
= bb_for_stmt (stmt
);
592 block_stmt_iterator bsi
;
594 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
597 if (stmt_bb
!= cand_bb
)
600 /* Scan the block from the end, since the original ivs are usually
601 incremented at the end of the loop body. */
602 for (bsi
= bsi_last (stmt_bb
); ; bsi_prev (&bsi
))
604 if (bsi_stmt (bsi
) == cand
->incremented_at
)
606 if (bsi_stmt (bsi
) == stmt
)
611 /* Returns true if STMT if after the place where the induction variable
612 CAND is incremented in LOOP. */
615 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, tree stmt
)
623 return stmt_after_ip_normal_pos (loop
, stmt
);
626 return stmt_after_ip_original_pos (cand
, stmt
);
633 /* Initializes data structures used by the iv optimization pass, stored
634 in DATA. LOOPS is the loop tree. */
637 tree_ssa_iv_optimize_init (struct loops
*loops
, struct ivopts_data
*data
)
641 data
->version_info_size
= 2 * num_ssa_names
;
642 data
->version_info
= xcalloc (data
->version_info_size
,
643 sizeof (struct version_info
));
644 data
->relevant
= BITMAP_XMALLOC ();
645 data
->important_candidates
= BITMAP_XMALLOC ();
646 data
->max_inv_id
= 0;
648 for (i
= 1; i
< loops
->num
; i
++)
649 if (loops
->parray
[i
])
650 loops
->parray
[i
]->aux
= xcalloc (1, sizeof (struct loop_data
));
652 VARRAY_GENERIC_PTR_NOGC_INIT (data
->iv_uses
, 20, "iv_uses");
653 VARRAY_GENERIC_PTR_NOGC_INIT (data
->iv_candidates
, 20, "iv_candidates");
654 VARRAY_GENERIC_PTR_NOGC_INIT (decl_rtl_to_reset
, 20, "decl_rtl_to_reset");
657 /* Returns a memory object to that EXPR points. In case we are able to
658 determine that it does not point to any such object, NULL is returned. */
661 determine_base_object (tree expr
)
663 enum tree_code code
= TREE_CODE (expr
);
664 tree base
, obj
, op0
, op1
;
666 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
675 obj
= TREE_OPERAND (expr
, 0);
676 base
= get_base_address (obj
);
679 return fold_convert (ptr_type_node
, expr
);
681 if (TREE_CODE (base
) == INDIRECT_REF
)
682 return fold_convert (ptr_type_node
, TREE_OPERAND (base
, 0));
684 return fold (build1 (ADDR_EXPR
, ptr_type_node
, base
));
688 op0
= determine_base_object (TREE_OPERAND (expr
, 0));
689 op1
= determine_base_object (TREE_OPERAND (expr
, 1));
695 return (code
== PLUS_EXPR
697 : fold (build1 (NEGATE_EXPR
, ptr_type_node
, op1
)));
699 return fold (build (code
, ptr_type_node
, op0
, op1
));
702 return fold_convert (ptr_type_node
, expr
);
706 /* Allocates an induction variable with given initial value BASE and step STEP
710 alloc_iv (tree base
, tree step
)
712 struct iv
*iv
= xcalloc (1, sizeof (struct iv
));
714 if (step
&& integer_zerop (step
))
718 iv
->base_object
= determine_base_object (base
);
721 iv
->have_use_for
= false;
723 iv
->ssa_name
= NULL_TREE
;
728 /* Sets STEP and BASE for induction variable IV. */
731 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
)
733 struct version_info
*info
= name_info (data
, iv
);
735 gcc_assert (!info
->iv
);
737 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
738 info
->iv
= alloc_iv (base
, step
);
739 info
->iv
->ssa_name
= iv
;
742 /* Finds induction variable declaration for VAR. */
745 get_iv (struct ivopts_data
*data
, tree var
)
749 if (!name_info (data
, var
)->iv
)
751 bb
= bb_for_stmt (SSA_NAME_DEF_STMT (var
));
754 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
755 set_iv (data
, var
, var
, NULL_TREE
);
758 return name_info (data
, var
)->iv
;
761 /* Determines the step of a biv defined in PHI. */
764 determine_biv_step (tree phi
)
766 struct loop
*loop
= bb_for_stmt (phi
)->loop_father
;
767 tree name
= PHI_RESULT (phi
), base
, step
;
768 tree type
= TREE_TYPE (name
);
770 if (!is_gimple_reg (name
))
773 if (!simple_iv (loop
, phi
, name
, &base
, &step
))
777 return build_int_cst (type
, 0);
782 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
785 abnormal_ssa_name_p (tree exp
)
790 if (TREE_CODE (exp
) != SSA_NAME
)
793 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
796 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
797 abnormal phi node. Callback for for_each_index. */
800 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
801 void *data ATTRIBUTE_UNUSED
)
803 if (TREE_CODE (base
) == ARRAY_REF
)
805 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
807 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
811 return !abnormal_ssa_name_p (*index
);
814 /* Returns true if EXPR contains a ssa name that occurs in an
815 abnormal phi node. */
818 contains_abnormal_ssa_name_p (tree expr
)
820 enum tree_code code
= TREE_CODE (expr
);
821 enum tree_code_class
class = TREE_CODE_CLASS (code
);
823 if (code
== SSA_NAME
)
824 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
826 if (code
== INTEGER_CST
827 || is_gimple_min_invariant (expr
))
830 if (code
== ADDR_EXPR
)
831 return !for_each_index (&TREE_OPERAND (expr
, 1),
832 idx_contains_abnormal_ssa_name_p
,
839 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
844 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
856 /* Finds basic ivs. */
859 find_bivs (struct ivopts_data
*data
)
861 tree phi
, step
, type
, base
;
863 struct loop
*loop
= data
->current_loop
;
865 for (phi
= phi_nodes (loop
->header
); phi
; phi
= PHI_CHAIN (phi
))
867 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
870 step
= determine_biv_step (phi
);
874 if (cst_and_fits_in_hwi (step
)
875 && int_cst_value (step
) == 0)
878 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
879 if (contains_abnormal_ssa_name_p (base
))
882 type
= TREE_TYPE (PHI_RESULT (phi
));
883 base
= fold_convert (type
, base
);
884 step
= fold_convert (type
, step
);
886 /* FIXME: We do not handle induction variables whose step does
887 not satisfy cst_and_fits_in_hwi. */
888 if (!cst_and_fits_in_hwi (step
))
891 set_iv (data
, PHI_RESULT (phi
), base
, step
);
898 /* Marks basic ivs. */
901 mark_bivs (struct ivopts_data
*data
)
904 struct iv
*iv
, *incr_iv
;
905 struct loop
*loop
= data
->current_loop
;
908 for (phi
= phi_nodes (loop
->header
); phi
; phi
= PHI_CHAIN (phi
))
910 iv
= get_iv (data
, PHI_RESULT (phi
));
914 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
915 incr_iv
= get_iv (data
, var
);
919 /* If the increment is in the subloop, ignore it. */
920 incr_bb
= bb_for_stmt (SSA_NAME_DEF_STMT (var
));
921 if (incr_bb
->loop_father
!= data
->current_loop
922 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
926 incr_iv
->biv_p
= true;
930 /* Checks whether STMT defines a linear induction variable and stores its
931 parameters to BASE and STEP. */
934 find_givs_in_stmt_scev (struct ivopts_data
*data
, tree stmt
,
935 tree
*base
, tree
*step
)
938 struct loop
*loop
= data
->current_loop
;
943 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
946 lhs
= TREE_OPERAND (stmt
, 0);
947 if (TREE_CODE (lhs
) != SSA_NAME
)
950 if (!simple_iv (loop
, stmt
, TREE_OPERAND (stmt
, 1), base
, step
))
953 /* FIXME: We do not handle induction variables whose step does
954 not satisfy cst_and_fits_in_hwi. */
956 && !cst_and_fits_in_hwi (*step
))
959 if (contains_abnormal_ssa_name_p (*base
))
965 /* Finds general ivs in statement STMT. */
968 find_givs_in_stmt (struct ivopts_data
*data
, tree stmt
)
972 if (!find_givs_in_stmt_scev (data
, stmt
, &base
, &step
))
975 set_iv (data
, TREE_OPERAND (stmt
, 0), base
, step
);
978 /* Finds general ivs in basic block BB. */
981 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
983 block_stmt_iterator bsi
;
985 for (bsi
= bsi_start (bb
); !bsi_end_p (bsi
); bsi_next (&bsi
))
986 find_givs_in_stmt (data
, bsi_stmt (bsi
));
989 /* Finds general ivs. */
992 find_givs (struct ivopts_data
*data
)
994 struct loop
*loop
= data
->current_loop
;
995 basic_block
*body
= get_loop_body_in_dom_order (loop
);
998 for (i
= 0; i
< loop
->num_nodes
; i
++)
999 find_givs_in_bb (data
, body
[i
]);
1003 /* Determine the number of iterations of the current loop. */
1006 determine_number_of_iterations (struct ivopts_data
*data
)
1008 struct loop
*loop
= data
->current_loop
;
1009 edge exit
= single_dom_exit (loop
);
1014 number_of_iterations_exit (loop
, exit
, &loop_data (loop
)->niter
);
1017 /* For each ssa name defined in LOOP determines whether it is an induction
1018 variable and if so, its initial value and step. */
1021 find_induction_variables (struct ivopts_data
*data
)
1024 struct loop
*loop
= data
->current_loop
;
1027 if (!find_bivs (data
))
1032 determine_number_of_iterations (data
);
1034 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1036 if (loop_data (loop
)->niter
.niter
)
1038 fprintf (dump_file
, " number of iterations ");
1039 print_generic_expr (dump_file
, loop_data (loop
)->niter
.niter
,
1041 fprintf (dump_file
, "\n");
1043 fprintf (dump_file
, " may be zero if ");
1044 print_generic_expr (dump_file
, loop_data (loop
)->niter
.may_be_zero
,
1046 fprintf (dump_file
, "\n");
1048 fprintf (dump_file
, " bogus unless ");
1049 print_generic_expr (dump_file
, loop_data (loop
)->niter
.assumptions
,
1051 fprintf (dump_file
, "\n");
1052 fprintf (dump_file
, "\n");
1055 fprintf (dump_file
, "Induction variables:\n\n");
1057 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1059 if (ver_info (data
, i
)->iv
)
1060 dump_iv (dump_file
, ver_info (data
, i
)->iv
);
1067 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1069 static struct iv_use
*
1070 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1071 tree stmt
, enum use_type use_type
)
1073 struct iv_use
*use
= xcalloc (1, sizeof (struct iv_use
));
1075 use
->id
= n_iv_uses (data
);
1076 use
->type
= use_type
;
1080 use
->related_cands
= BITMAP_XMALLOC ();
1082 /* To avoid showing ssa name in the dumps, if it was not reset by the
1084 iv
->ssa_name
= NULL_TREE
;
1086 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1087 dump_use (dump_file
, use
);
1089 VARRAY_PUSH_GENERIC_PTR_NOGC (data
->iv_uses
, use
);
1094 /* Checks whether OP is a loop-level invariant and if so, records it.
1095 NONLINEAR_USE is true if the invariant is used in a way we do not
1096 handle specially. */
1099 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1102 struct version_info
*info
;
1104 if (TREE_CODE (op
) != SSA_NAME
1105 || !is_gimple_reg (op
))
1108 bb
= bb_for_stmt (SSA_NAME_DEF_STMT (op
));
1110 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1113 info
= name_info (data
, op
);
1115 info
->has_nonlin_use
|= nonlinear_use
;
1117 info
->inv_id
= ++data
->max_inv_id
;
1118 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1121 /* Checks whether the use OP is interesting and if so, records it
1124 static struct iv_use
*
1125 find_interesting_uses_outer_or_nonlin (struct ivopts_data
*data
, tree op
,
1133 if (TREE_CODE (op
) != SSA_NAME
)
1136 iv
= get_iv (data
, op
);
1140 if (iv
->have_use_for
)
1142 use
= iv_use (data
, iv
->use_id
);
1144 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
1145 || use
->type
== USE_OUTER
);
1147 if (type
== USE_NONLINEAR_EXPR
)
1148 use
->type
= USE_NONLINEAR_EXPR
;
1152 if (zero_p (iv
->step
))
1154 record_invariant (data
, op
, true);
1157 iv
->have_use_for
= true;
1159 civ
= xmalloc (sizeof (struct iv
));
1162 stmt
= SSA_NAME_DEF_STMT (op
);
1163 gcc_assert (TREE_CODE (stmt
) == PHI_NODE
1164 || TREE_CODE (stmt
) == MODIFY_EXPR
);
1166 use
= record_use (data
, NULL
, civ
, stmt
, type
);
1167 iv
->use_id
= use
->id
;
1172 /* Checks whether the use OP is interesting and if so, records it. */
1174 static struct iv_use
*
1175 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1177 return find_interesting_uses_outer_or_nonlin (data
, op
, USE_NONLINEAR_EXPR
);
1180 /* Records a definition of induction variable OP that is used outside of the
1183 static struct iv_use
*
1184 find_interesting_uses_outer (struct ivopts_data
*data
, tree op
)
1186 return find_interesting_uses_outer_or_nonlin (data
, op
, USE_OUTER
);
1189 /* Checks whether the condition *COND_P in STMT is interesting
1190 and if so, records it. */
1193 find_interesting_uses_cond (struct ivopts_data
*data
, tree stmt
, tree
*cond_p
)
1197 struct iv
*iv0
= NULL
, *iv1
= NULL
, *civ
;
1199 tree zero
= integer_zero_node
;
1201 const_iv
.step
= NULL_TREE
;
1203 if (integer_zerop (*cond_p
)
1204 || integer_nonzerop (*cond_p
))
1207 if (TREE_CODE (*cond_p
) == SSA_NAME
)
1214 op0_p
= &TREE_OPERAND (*cond_p
, 0);
1215 op1_p
= &TREE_OPERAND (*cond_p
, 1);
1218 if (TREE_CODE (*op0_p
) == SSA_NAME
)
1219 iv0
= get_iv (data
, *op0_p
);
1223 if (TREE_CODE (*op1_p
) == SSA_NAME
)
1224 iv1
= get_iv (data
, *op1_p
);
1228 if (/* When comparing with non-invariant value, we may not do any senseful
1229 induction variable elimination. */
1231 /* Eliminating condition based on two ivs would be nontrivial.
1232 ??? TODO -- it is not really important to handle this case. */
1233 || (!zero_p (iv0
->step
) && !zero_p (iv1
->step
)))
1235 find_interesting_uses_op (data
, *op0_p
);
1236 find_interesting_uses_op (data
, *op1_p
);
1240 if (zero_p (iv0
->step
) && zero_p (iv1
->step
))
1242 /* If both are invariants, this is a work for unswitching. */
1246 civ
= xmalloc (sizeof (struct iv
));
1247 *civ
= zero_p (iv0
->step
) ? *iv1
: *iv0
;
1248 record_use (data
, cond_p
, civ
, stmt
, USE_COMPARE
);
1251 /* Returns true if expression EXPR is obviously invariant in LOOP,
1252 i.e. if all its operands are defined outside of the LOOP. */
1255 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1260 if (is_gimple_min_invariant (expr
))
1263 if (TREE_CODE (expr
) == SSA_NAME
)
1265 def_bb
= bb_for_stmt (SSA_NAME_DEF_STMT (expr
));
1267 && flow_bb_inside_loop_p (loop
, def_bb
))
1276 len
= first_rtl_op (TREE_CODE (expr
));
1277 for (i
= 0; i
< len
; i
++)
1278 if (!expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1284 /* Cumulates the steps of indices into DATA and replaces their values with the
1285 initial ones. Returns false when the value of the index cannot be determined.
1286 Callback for for_each_index. */
1288 struct ifs_ivopts_data
1290 struct ivopts_data
*ivopts_data
;
1296 idx_find_step (tree base
, tree
*idx
, void *data
)
1298 struct ifs_ivopts_data
*dta
= data
;
1300 tree step
, type
, iv_type
, iv_step
, lbound
, off
;
1301 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1303 if (TREE_CODE (base
) == MISALIGNED_INDIRECT_REF
1304 || TREE_CODE (base
) == ALIGN_INDIRECT_REF
)
1307 /* If base is a component ref, require that the offset of the reference
1309 if (TREE_CODE (base
) == COMPONENT_REF
)
1311 off
= component_ref_field_offset (base
);
1312 return expr_invariant_in_loop_p (loop
, off
);
1315 /* If base is array, first check whether we will be able to move the
1316 reference out of the loop (in order to take its address in strength
1317 reduction). In order for this to work we need both lower bound
1318 and step to be loop invariants. */
1319 if (TREE_CODE (base
) == ARRAY_REF
)
1321 step
= array_ref_element_size (base
);
1322 lbound
= array_ref_low_bound (base
);
1324 if (!expr_invariant_in_loop_p (loop
, step
)
1325 || !expr_invariant_in_loop_p (loop
, lbound
))
1329 if (TREE_CODE (*idx
) != SSA_NAME
)
1332 iv
= get_iv (dta
->ivopts_data
, *idx
);
1341 iv_type
= TREE_TYPE (iv
->base
);
1342 type
= build_pointer_type (TREE_TYPE (base
));
1343 if (TREE_CODE (base
) == ARRAY_REF
)
1345 step
= array_ref_element_size (base
);
1347 /* We only handle addresses whose step is an integer constant. */
1348 if (TREE_CODE (step
) != INTEGER_CST
)
1352 /* The step for pointer arithmetics already is 1 byte. */
1353 step
= build_int_cst (type
, 1);
1355 if (TYPE_PRECISION (iv_type
) < TYPE_PRECISION (type
))
1356 iv_step
= can_count_iv_in_wider_type (dta
->ivopts_data
->current_loop
,
1357 type
, iv
->base
, iv
->step
, dta
->stmt
);
1359 iv_step
= fold_convert (iv_type
, iv
->step
);
1363 /* The index might wrap. */
1367 step
= EXEC_BINARY (MULT_EXPR
, type
, step
, iv_step
);
1370 *dta
->step_p
= step
;
1372 *dta
->step_p
= EXEC_BINARY (PLUS_EXPR
, type
, *dta
->step_p
, step
);
1377 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1378 object is passed to it in DATA. */
1381 idx_record_use (tree base
, tree
*idx
,
1384 find_interesting_uses_op (data
, *idx
);
1385 if (TREE_CODE (base
) == ARRAY_REF
)
1387 find_interesting_uses_op (data
, array_ref_element_size (base
));
1388 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1393 /* Finds addresses in *OP_P inside STMT. */
1396 find_interesting_uses_address (struct ivopts_data
*data
, tree stmt
, tree
*op_p
)
1398 tree base
= unshare_expr (*op_p
), step
= NULL
;
1400 struct ifs_ivopts_data ifs_ivopts_data
;
1402 /* Ignore bitfields for now. Not really something terribly complicated
1404 if (TREE_CODE (base
) == COMPONENT_REF
1405 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base
, 1)))
1408 ifs_ivopts_data
.ivopts_data
= data
;
1409 ifs_ivopts_data
.stmt
= stmt
;
1410 ifs_ivopts_data
.step_p
= &step
;
1411 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
1415 gcc_assert (TREE_CODE (base
) != ALIGN_INDIRECT_REF
);
1416 gcc_assert (TREE_CODE (base
) != MISALIGNED_INDIRECT_REF
);
1418 if (TREE_CODE (base
) == INDIRECT_REF
)
1419 base
= TREE_OPERAND (base
, 0);
1421 base
= build_addr (base
);
1423 civ
= alloc_iv (base
, step
);
1424 record_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
1428 for_each_index (op_p
, idx_record_use
, data
);
1431 /* Finds and records invariants used in STMT. */
1434 find_invariants_stmt (struct ivopts_data
*data
, tree stmt
)
1436 use_optype uses
= NULL
;
1440 if (TREE_CODE (stmt
) == PHI_NODE
)
1441 n
= PHI_NUM_ARGS (stmt
);
1444 get_stmt_operands (stmt
);
1445 uses
= STMT_USE_OPS (stmt
);
1446 n
= NUM_USES (uses
);
1449 for (i
= 0; i
< n
; i
++)
1451 if (TREE_CODE (stmt
) == PHI_NODE
)
1452 op
= PHI_ARG_DEF (stmt
, i
);
1454 op
= USE_OP (uses
, i
);
1456 record_invariant (data
, op
, false);
1460 /* Finds interesting uses of induction variables in the statement STMT. */
1463 find_interesting_uses_stmt (struct ivopts_data
*data
, tree stmt
)
1467 use_optype uses
= NULL
;
1470 find_invariants_stmt (data
, stmt
);
1472 if (TREE_CODE (stmt
) == COND_EXPR
)
1474 find_interesting_uses_cond (data
, stmt
, &COND_EXPR_COND (stmt
));
1478 if (TREE_CODE (stmt
) == MODIFY_EXPR
)
1480 lhs
= TREE_OPERAND (stmt
, 0);
1481 rhs
= TREE_OPERAND (stmt
, 1);
1483 if (TREE_CODE (lhs
) == SSA_NAME
)
1485 /* If the statement defines an induction variable, the uses are not
1486 interesting by themselves. */
1488 iv
= get_iv (data
, lhs
);
1490 if (iv
&& !zero_p (iv
->step
))
1494 switch (TREE_CODE_CLASS (TREE_CODE (rhs
)))
1496 case tcc_comparison
:
1497 find_interesting_uses_cond (data
, stmt
, &TREE_OPERAND (stmt
, 1));
1501 find_interesting_uses_address (data
, stmt
, &TREE_OPERAND (stmt
, 1));
1502 if (REFERENCE_CLASS_P (lhs
))
1503 find_interesting_uses_address (data
, stmt
, &TREE_OPERAND (stmt
, 0));
1509 if (REFERENCE_CLASS_P (lhs
)
1510 && is_gimple_val (rhs
))
1512 find_interesting_uses_address (data
, stmt
, &TREE_OPERAND (stmt
, 0));
1513 find_interesting_uses_op (data
, rhs
);
1517 /* TODO -- we should also handle address uses of type
1519 memory = call (whatever);
1526 if (TREE_CODE (stmt
) == PHI_NODE
1527 && bb_for_stmt (stmt
) == data
->current_loop
->header
)
1529 lhs
= PHI_RESULT (stmt
);
1530 iv
= get_iv (data
, lhs
);
1532 if (iv
&& !zero_p (iv
->step
))
1536 if (TREE_CODE (stmt
) == PHI_NODE
)
1537 n
= PHI_NUM_ARGS (stmt
);
1540 uses
= STMT_USE_OPS (stmt
);
1541 n
= NUM_USES (uses
);
1544 for (i
= 0; i
< n
; i
++)
1546 if (TREE_CODE (stmt
) == PHI_NODE
)
1547 op
= PHI_ARG_DEF (stmt
, i
);
1549 op
= USE_OP (uses
, i
);
1551 if (TREE_CODE (op
) != SSA_NAME
)
1554 iv
= get_iv (data
, op
);
1558 find_interesting_uses_op (data
, op
);
1562 /* Finds interesting uses of induction variables outside of loops
1563 on loop exit edge EXIT. */
1566 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
1570 for (phi
= phi_nodes (exit
->dest
); phi
; phi
= PHI_CHAIN (phi
))
1572 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1573 find_interesting_uses_outer (data
, def
);
1577 /* Finds uses of the induction variables that are interesting. */
1580 find_interesting_uses (struct ivopts_data
*data
)
1583 block_stmt_iterator bsi
;
1585 basic_block
*body
= get_loop_body (data
->current_loop
);
1587 struct version_info
*info
;
1590 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1591 fprintf (dump_file
, "Uses:\n\n");
1593 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
1598 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1599 if (e
->dest
!= EXIT_BLOCK_PTR
1600 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
1601 find_interesting_uses_outside (data
, e
);
1603 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
1604 find_interesting_uses_stmt (data
, phi
);
1605 for (bsi
= bsi_start (bb
); !bsi_end_p (bsi
); bsi_next (&bsi
))
1606 find_interesting_uses_stmt (data
, bsi_stmt (bsi
));
1609 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1613 fprintf (dump_file
, "\n");
1615 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1617 info
= ver_info (data
, i
);
1620 fprintf (dump_file
, " ");
1621 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
1622 fprintf (dump_file
, " is invariant (%d)%s\n",
1623 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
1627 fprintf (dump_file
, "\n");
1633 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1634 position to POS. If USE is not NULL, the candidate is set as related to
1635 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1636 replacement of the final value of the iv by a direct computation. */
1638 static struct iv_cand
*
1639 add_candidate_1 (struct ivopts_data
*data
,
1640 tree base
, tree step
, bool important
, enum iv_position pos
,
1641 struct iv_use
*use
, tree incremented_at
)
1644 struct iv_cand
*cand
= NULL
;
1649 type
= TREE_TYPE (base
);
1650 if (!TYPE_UNSIGNED (type
))
1652 type
= unsigned_type_for (type
);
1653 base
= fold_convert (type
, base
);
1655 step
= fold_convert (type
, step
);
1659 for (i
= 0; i
< n_iv_cands (data
); i
++)
1661 cand
= iv_cand (data
, i
);
1663 if (cand
->pos
!= pos
)
1666 if (cand
->incremented_at
!= incremented_at
)
1680 if (!operand_equal_p (base
, cand
->iv
->base
, 0))
1683 if (zero_p (cand
->iv
->step
))
1690 if (step
&& operand_equal_p (step
, cand
->iv
->step
, 0))
1695 if (i
== n_iv_cands (data
))
1697 cand
= xcalloc (1, sizeof (struct iv_cand
));
1703 cand
->iv
= alloc_iv (base
, step
);
1706 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
1708 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
1709 cand
->var_after
= cand
->var_before
;
1711 cand
->important
= important
;
1712 cand
->incremented_at
= incremented_at
;
1713 VARRAY_PUSH_GENERIC_PTR_NOGC (data
->iv_candidates
, cand
);
1715 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1716 dump_cand (dump_file
, cand
);
1719 if (important
&& !cand
->important
)
1721 cand
->important
= true;
1722 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1723 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
1728 bitmap_set_bit (use
->related_cands
, i
);
1729 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1730 fprintf (dump_file
, "Candidate %d is related to use %d\n",
1737 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1738 position to POS. If USE is not NULL, the candidate is set as related to
1739 it. The candidate computation is scheduled on all available positions. */
1742 add_candidate (struct ivopts_data
*data
,
1743 tree base
, tree step
, bool important
, struct iv_use
*use
)
1745 if (ip_normal_pos (data
->current_loop
))
1746 add_candidate_1 (data
, base
, step
, important
, IP_NORMAL
, use
, NULL_TREE
);
1747 if (ip_end_pos (data
->current_loop
))
1748 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL_TREE
);
1751 /* Adds standard iv candidates. */
1754 add_standard_iv_candidates (struct ivopts_data
*data
)
1756 /* Add 0 + 1 * iteration candidate. */
1757 add_candidate (data
,
1758 build_int_cst (unsigned_intSI_type_node
, 0),
1759 build_int_cst (unsigned_intSI_type_node
, 1),
1762 /* The same for a long type if it is still fast enough. */
1763 if (BITS_PER_WORD
> 32)
1764 add_candidate (data
,
1765 build_int_cst (unsigned_intDI_type_node
, 0),
1766 build_int_cst (unsigned_intDI_type_node
, 1),
1771 /* Adds candidates bases on the old induction variable IV. */
1774 add_old_iv_candidates (struct ivopts_data
*data
, struct iv
*iv
)
1777 struct iv_cand
*cand
;
1779 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
1781 /* The same, but with initial value zero. */
1782 add_candidate (data
,
1783 build_int_cst (TREE_TYPE (iv
->base
), 0),
1784 iv
->step
, true, NULL
);
1786 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
1787 if (TREE_CODE (phi
) == PHI_NODE
)
1789 /* Additionally record the possibility of leaving the original iv
1791 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
1792 cand
= add_candidate_1 (data
,
1793 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
1794 SSA_NAME_DEF_STMT (def
));
1795 cand
->var_before
= iv
->ssa_name
;
1796 cand
->var_after
= def
;
1800 /* Adds candidates based on the old induction variables. */
1803 add_old_ivs_candidates (struct ivopts_data
*data
)
1809 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1811 iv
= ver_info (data
, i
)->iv
;
1812 if (iv
&& iv
->biv_p
&& !zero_p (iv
->step
))
1813 add_old_iv_candidates (data
, iv
);
1817 /* Adds candidates based on the value of the induction variable IV and USE. */
1820 add_iv_value_candidates (struct ivopts_data
*data
,
1821 struct iv
*iv
, struct iv_use
*use
)
1823 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
1825 /* The same, but with initial value zero. */
1826 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
1827 iv
->step
, false, use
);
1830 /* Adds candidates based on the address IV and USE. */
1833 add_address_candidates (struct ivopts_data
*data
,
1834 struct iv
*iv
, struct iv_use
*use
)
1836 tree base
, abase
, tmp
, *act
;
1838 /* First, the trivial choices. */
1839 add_iv_value_candidates (data
, iv
, use
);
1841 /* Second, try removing the COMPONENT_REFs. */
1842 if (TREE_CODE (iv
->base
) == ADDR_EXPR
)
1844 base
= TREE_OPERAND (iv
->base
, 0);
1845 while (TREE_CODE (base
) == COMPONENT_REF
1846 || (TREE_CODE (base
) == ARRAY_REF
1847 && TREE_CODE (TREE_OPERAND (base
, 1)) == INTEGER_CST
))
1848 base
= TREE_OPERAND (base
, 0);
1850 if (base
!= TREE_OPERAND (iv
->base
, 0))
1852 gcc_assert (TREE_CODE (base
) != ALIGN_INDIRECT_REF
);
1853 gcc_assert (TREE_CODE (base
) != MISALIGNED_INDIRECT_REF
);
1855 if (TREE_CODE (base
) == INDIRECT_REF
)
1856 base
= TREE_OPERAND (base
, 0);
1858 base
= build_addr (base
);
1859 add_candidate (data
, base
, iv
->step
, false, use
);
1863 /* Third, try removing the constant offset. */
1865 while (TREE_CODE (abase
) == PLUS_EXPR
1866 && TREE_CODE (TREE_OPERAND (abase
, 1)) != INTEGER_CST
)
1867 abase
= TREE_OPERAND (abase
, 0);
1868 /* We found the offset, so make the copy of the non-shared part and
1870 if (TREE_CODE (abase
) == PLUS_EXPR
)
1875 for (tmp
= iv
->base
; tmp
!= abase
; tmp
= TREE_OPERAND (tmp
, 0))
1877 *act
= build2 (PLUS_EXPR
, TREE_TYPE (tmp
),
1878 NULL_TREE
, TREE_OPERAND (tmp
, 1));
1879 act
= &TREE_OPERAND (*act
, 0);
1881 *act
= TREE_OPERAND (tmp
, 0);
1883 add_candidate (data
, base
, iv
->step
, false, use
);
1887 /* Possibly adds pseudocandidate for replacing the final value of USE by
1888 a direct computation. */
1891 add_iv_outer_candidates (struct ivopts_data
*data
, struct iv_use
*use
)
1893 struct tree_niter_desc
*niter
;
1894 struct loop
*loop
= data
->current_loop
;
1896 /* We must know where we exit the loop and how many times does it roll. */
1897 if (!single_dom_exit (loop
))
1900 niter
= &loop_data (loop
)->niter
;
1902 || !operand_equal_p (niter
->assumptions
, boolean_true_node
, 0)
1903 || !operand_equal_p (niter
->may_be_zero
, boolean_false_node
, 0))
1906 add_candidate_1 (data
, NULL
, NULL
, false, IP_NORMAL
, use
, NULL_TREE
);
1909 /* Adds candidates based on the uses. */
1912 add_derived_ivs_candidates (struct ivopts_data
*data
)
1916 for (i
= 0; i
< n_iv_uses (data
); i
++)
1918 struct iv_use
*use
= iv_use (data
, i
);
1925 case USE_NONLINEAR_EXPR
:
1927 /* Just add the ivs based on the value of the iv used here. */
1928 add_iv_value_candidates (data
, use
->iv
, use
);
1932 add_iv_value_candidates (data
, use
->iv
, use
);
1934 /* Additionally, add the pseudocandidate for the possibility to
1935 replace the final value by a direct computation. */
1936 add_iv_outer_candidates (data
, use
);
1940 add_address_candidates (data
, use
->iv
, use
);
1949 /* Record important candidates and add them to related_cands bitmaps
1953 record_important_candidates (struct ivopts_data
*data
)
1958 for (i
= 0; i
< n_iv_cands (data
); i
++)
1960 struct iv_cand
*cand
= iv_cand (data
, i
);
1962 if (cand
->important
)
1963 bitmap_set_bit (data
->important_candidates
, i
);
1966 data
->consider_all_candidates
= (n_iv_cands (data
)
1967 <= CONSIDER_ALL_CANDIDATES_BOUND
);
1969 if (data
->consider_all_candidates
)
1971 /* We will not need "related_cands" bitmaps in this case,
1972 so release them to decrease peak memory consumption. */
1973 for (i
= 0; i
< n_iv_uses (data
); i
++)
1975 use
= iv_use (data
, i
);
1976 BITMAP_XFREE (use
->related_cands
);
1981 /* Add important candidates to the related_cands bitmaps. */
1982 for (i
= 0; i
< n_iv_uses (data
); i
++)
1983 bitmap_ior_into (iv_use (data
, i
)->related_cands
,
1984 data
->important_candidates
);
1988 /* Finds the candidates for the induction variables. */
1991 find_iv_candidates (struct ivopts_data
*data
)
1993 /* Add commonly used ivs. */
1994 add_standard_iv_candidates (data
);
1996 /* Add old induction variables. */
1997 add_old_ivs_candidates (data
);
1999 /* Add induction variables derived from uses. */
2000 add_derived_ivs_candidates (data
);
2002 /* Record the important candidates. */
2003 record_important_candidates (data
);
2006 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2007 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2008 we allocate a simple list to every use. */
2011 alloc_use_cost_map (struct ivopts_data
*data
)
2013 unsigned i
, size
, s
, j
;
2015 for (i
= 0; i
< n_iv_uses (data
); i
++)
2017 struct iv_use
*use
= iv_use (data
, i
);
2020 if (data
->consider_all_candidates
)
2021 size
= n_iv_cands (data
);
2025 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
2030 /* Round up to the power of two, so that moduling by it is fast. */
2031 for (size
= 1; size
< s
; size
<<= 1)
2035 use
->n_map_members
= size
;
2036 use
->cost_map
= xcalloc (size
, sizeof (struct cost_pair
));
2040 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2041 on invariants DEPENDS_ON. */
2044 set_use_iv_cost (struct ivopts_data
*data
,
2045 struct iv_use
*use
, struct iv_cand
*cand
, unsigned cost
,
2053 BITMAP_XFREE (depends_on
);
2057 if (data
->consider_all_candidates
)
2059 use
->cost_map
[cand
->id
].cand
= cand
;
2060 use
->cost_map
[cand
->id
].cost
= cost
;
2061 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
2065 /* n_map_members is a power of two, so this computes modulo. */
2066 s
= cand
->id
& (use
->n_map_members
- 1);
2067 for (i
= s
; i
< use
->n_map_members
; i
++)
2068 if (!use
->cost_map
[i
].cand
)
2070 for (i
= 0; i
< s
; i
++)
2071 if (!use
->cost_map
[i
].cand
)
2077 use
->cost_map
[i
].cand
= cand
;
2078 use
->cost_map
[i
].cost
= cost
;
2079 use
->cost_map
[i
].depends_on
= depends_on
;
2082 /* Gets cost of (USE, CANDIDATE) pair. */
2084 static struct cost_pair
*
2085 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
2086 struct iv_cand
*cand
)
2089 struct cost_pair
*ret
;
2094 if (data
->consider_all_candidates
)
2096 ret
= use
->cost_map
+ cand
->id
;
2103 /* n_map_members is a power of two, so this computes modulo. */
2104 s
= cand
->id
& (use
->n_map_members
- 1);
2105 for (i
= s
; i
< use
->n_map_members
; i
++)
2106 if (use
->cost_map
[i
].cand
== cand
)
2107 return use
->cost_map
+ i
;
2109 for (i
= 0; i
< s
; i
++)
2110 if (use
->cost_map
[i
].cand
== cand
)
2111 return use
->cost_map
+ i
;
2116 /* Returns estimate on cost of computing SEQ. */
2124 for (; seq
; seq
= NEXT_INSN (seq
))
2126 set
= single_set (seq
);
2128 cost
+= rtx_cost (set
, SET
);
2136 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2138 produce_memory_decl_rtl (tree obj
, int *regno
)
2143 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
2145 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
2146 x
= gen_rtx_SYMBOL_REF (Pmode
, name
);
2149 x
= gen_raw_REG (Pmode
, (*regno
)++);
2151 return gen_rtx_MEM (DECL_MODE (obj
), x
);
2154 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2155 walk_tree. DATA contains the actual fake register number. */
2158 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
2160 tree obj
= NULL_TREE
;
2164 switch (TREE_CODE (*expr_p
))
2167 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
2168 (handled_component_p (*expr_p
)
2169 || TREE_CODE (*expr_p
) == REALPART_EXPR
2170 || TREE_CODE (*expr_p
) == IMAGPART_EXPR
);
2171 expr_p
= &TREE_OPERAND (*expr_p
, 0));
2174 x
= produce_memory_decl_rtl (obj
, regno
);
2179 obj
= SSA_NAME_VAR (*expr_p
);
2180 if (!DECL_RTL_SET_P (obj
))
2181 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2190 if (DECL_RTL_SET_P (obj
))
2193 if (DECL_MODE (obj
) == BLKmode
)
2194 x
= produce_memory_decl_rtl (obj
, regno
);
2196 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2206 VARRAY_PUSH_GENERIC_PTR_NOGC (decl_rtl_to_reset
, obj
);
2207 SET_DECL_RTL (obj
, x
);
2213 /* Determines cost of the computation of EXPR. */
2216 computation_cost (tree expr
)
2219 tree type
= TREE_TYPE (expr
);
2223 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
2225 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
2229 cost
= seq_cost (seq
);
2230 if (GET_CODE (rslt
) == MEM
)
2231 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
));
2236 /* Returns variable containing the value of candidate CAND at statement AT. */
2239 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, tree stmt
)
2241 if (stmt_after_increment (loop
, cand
, stmt
))
2242 return cand
->var_after
;
2244 return cand
->var_before
;
2247 /* Determines the expression by that USE is expressed from induction variable
2248 CAND at statement AT in LOOP. */
2251 get_computation_at (struct loop
*loop
,
2252 struct iv_use
*use
, struct iv_cand
*cand
, tree at
)
2254 tree ubase
= use
->iv
->base
;
2255 tree ustep
= use
->iv
->step
;
2256 tree cbase
= cand
->iv
->base
;
2257 tree cstep
= cand
->iv
->step
;
2258 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
2262 unsigned HOST_WIDE_INT ustepi
, cstepi
;
2263 HOST_WIDE_INT ratioi
;
2265 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
2267 /* We do not have a precision to express the values of use. */
2271 expr
= var_at_stmt (loop
, cand
, at
);
2273 if (TREE_TYPE (expr
) != ctype
)
2275 /* This may happen with the original ivs. */
2276 expr
= fold_convert (ctype
, expr
);
2279 if (TYPE_UNSIGNED (utype
))
2283 uutype
= unsigned_type_for (utype
);
2284 ubase
= fold_convert (uutype
, ubase
);
2285 ustep
= fold_convert (uutype
, ustep
);
2288 if (uutype
!= ctype
)
2290 expr
= fold_convert (uutype
, expr
);
2291 cbase
= fold_convert (uutype
, cbase
);
2292 cstep
= fold_convert (uutype
, cstep
);
2295 if (!cst_and_fits_in_hwi (cstep
)
2296 || !cst_and_fits_in_hwi (ustep
))
2299 ustepi
= int_cst_value (ustep
);
2300 cstepi
= int_cst_value (cstep
);
2302 if (!divide (TYPE_PRECISION (uutype
), ustepi
, cstepi
, &ratioi
))
2304 /* TODO maybe consider case when ustep divides cstep and the ratio is
2305 a power of 2 (so that the division is fast to execute)? We would
2306 need to be much more careful with overflows etc. then. */
2310 /* We may need to shift the value if we are after the increment. */
2311 if (stmt_after_increment (loop
, cand
, at
))
2312 cbase
= fold (build2 (PLUS_EXPR
, uutype
, cbase
, cstep
));
2314 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
2315 or |ratio| == 1, it is better to handle this like
2317 ubase - ratio * cbase + ratio * var. */
2321 delta
= fold (build2 (MINUS_EXPR
, uutype
, ubase
, cbase
));
2322 expr
= fold (build2 (PLUS_EXPR
, uutype
, expr
, delta
));
2324 else if (ratioi
== -1)
2326 delta
= fold (build2 (PLUS_EXPR
, uutype
, ubase
, cbase
));
2327 expr
= fold (build2 (MINUS_EXPR
, uutype
, delta
, expr
));
2329 else if (TREE_CODE (cbase
) == INTEGER_CST
)
2331 ratio
= build_int_cst_type (uutype
, ratioi
);
2332 delta
= fold (build2 (MULT_EXPR
, uutype
, ratio
, cbase
));
2333 delta
= fold (build2 (MINUS_EXPR
, uutype
, ubase
, delta
));
2334 expr
= fold (build2 (MULT_EXPR
, uutype
, ratio
, expr
));
2335 expr
= fold (build2 (PLUS_EXPR
, uutype
, delta
, expr
));
2339 expr
= fold (build2 (MINUS_EXPR
, uutype
, expr
, cbase
));
2340 ratio
= build_int_cst_type (uutype
, ratioi
);
2341 expr
= fold (build2 (MULT_EXPR
, uutype
, ratio
, expr
));
2342 expr
= fold (build2 (PLUS_EXPR
, uutype
, ubase
, expr
));
2345 return fold_convert (utype
, expr
);
2348 /* Determines the expression by that USE is expressed from induction variable
2352 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
2354 return get_computation_at (loop
, use
, cand
, use
->stmt
);
2357 /* Strips constant offsets from EXPR and adds them to OFFSET. */
2360 strip_offset (tree
*expr
, unsigned HOST_WIDE_INT
*offset
)
2363 enum tree_code code
;
2367 if (cst_and_fits_in_hwi (*expr
))
2369 *offset
+= int_cst_value (*expr
);
2370 *expr
= integer_zero_node
;
2374 code
= TREE_CODE (*expr
);
2376 if (code
!= PLUS_EXPR
&& code
!= MINUS_EXPR
)
2379 op0
= TREE_OPERAND (*expr
, 0);
2380 op1
= TREE_OPERAND (*expr
, 1);
2382 if (cst_and_fits_in_hwi (op1
))
2384 if (code
== PLUS_EXPR
)
2385 *offset
+= int_cst_value (op1
);
2387 *offset
-= int_cst_value (op1
);
2393 if (code
!= PLUS_EXPR
)
2396 if (!cst_and_fits_in_hwi (op0
))
2399 *offset
+= int_cst_value (op0
);
2404 /* Returns cost of addition in MODE. */
2407 add_cost (enum machine_mode mode
)
2409 static unsigned costs
[NUM_MACHINE_MODES
];
2417 force_operand (gen_rtx_fmt_ee (PLUS
, mode
,
2418 gen_raw_REG (mode
, FIRST_PSEUDO_REGISTER
),
2419 gen_raw_REG (mode
, FIRST_PSEUDO_REGISTER
+ 1)),
2424 cost
= seq_cost (seq
);
2430 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2431 fprintf (dump_file
, "Addition in %s costs %d\n",
2432 GET_MODE_NAME (mode
), cost
);
2436 /* Entry in a hashtable of already known costs for multiplication. */
2439 HOST_WIDE_INT cst
; /* The constant to multiply by. */
2440 enum machine_mode mode
; /* In mode. */
2441 unsigned cost
; /* The cost. */
2444 /* Counts hash value for the ENTRY. */
2447 mbc_entry_hash (const void *entry
)
2449 const struct mbc_entry
*e
= entry
;
2451 return 57 * (hashval_t
) e
->mode
+ (hashval_t
) (e
->cst
% 877);
2454 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2457 mbc_entry_eq (const void *entry1
, const void *entry2
)
2459 const struct mbc_entry
*e1
= entry1
;
2460 const struct mbc_entry
*e2
= entry2
;
2462 return (e1
->mode
== e2
->mode
2463 && e1
->cst
== e2
->cst
);
2466 /* Returns cost of multiplication by constant CST in MODE. */
2469 multiply_by_cost (HOST_WIDE_INT cst
, enum machine_mode mode
)
2471 static htab_t costs
;
2472 struct mbc_entry
**cached
, act
;
2477 costs
= htab_create (100, mbc_entry_hash
, mbc_entry_eq
, free
);
2481 cached
= (struct mbc_entry
**) htab_find_slot (costs
, &act
, INSERT
);
2483 return (*cached
)->cost
;
2485 *cached
= xmalloc (sizeof (struct mbc_entry
));
2486 (*cached
)->mode
= mode
;
2487 (*cached
)->cst
= cst
;
2490 expand_mult (mode
, gen_raw_REG (mode
, FIRST_PSEUDO_REGISTER
), GEN_INT (cst
),
2495 cost
= seq_cost (seq
);
2497 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2498 fprintf (dump_file
, "Multiplication by %d in %s costs %d\n",
2499 (int) cst
, GET_MODE_NAME (mode
), cost
);
2501 (*cached
)->cost
= cost
;
2506 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2507 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2508 variable is omitted. The created memory accesses MODE.
2510 TODO -- there must be some better way. This all is quite crude. */
2513 get_address_cost (bool symbol_present
, bool var_present
,
2514 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
)
2516 #define MAX_RATIO 128
2517 static sbitmap valid_mult
;
2518 static HOST_WIDE_INT rat
, off
;
2519 static HOST_WIDE_INT min_offset
, max_offset
;
2520 static unsigned costs
[2][2][2][2];
2521 unsigned cost
, acost
;
2522 rtx seq
, addr
, base
;
2523 bool offset_p
, ratio_p
;
2525 HOST_WIDE_INT s_offset
;
2526 unsigned HOST_WIDE_INT mask
;
2533 reg1
= gen_raw_REG (Pmode
, FIRST_PSEUDO_REGISTER
);
2535 addr
= gen_rtx_fmt_ee (PLUS
, Pmode
, reg1
, NULL_RTX
);
2536 for (i
= 1; i
<= 1 << 20; i
<<= 1)
2538 XEXP (addr
, 1) = GEN_INT (i
);
2539 if (!memory_address_p (Pmode
, addr
))
2542 max_offset
= i
>> 1;
2545 for (i
= 1; i
<= 1 << 20; i
<<= 1)
2547 XEXP (addr
, 1) = GEN_INT (-i
);
2548 if (!memory_address_p (Pmode
, addr
))
2551 min_offset
= -(i
>> 1);
2553 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2555 fprintf (dump_file
, "get_address_cost:\n");
2556 fprintf (dump_file
, " min offset %d\n", (int) min_offset
);
2557 fprintf (dump_file
, " max offset %d\n", (int) max_offset
);
2560 valid_mult
= sbitmap_alloc (2 * MAX_RATIO
+ 1);
2561 sbitmap_zero (valid_mult
);
2563 addr
= gen_rtx_fmt_ee (MULT
, Pmode
, reg1
, NULL_RTX
);
2564 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
2566 XEXP (addr
, 1) = GEN_INT (i
);
2567 if (memory_address_p (Pmode
, addr
))
2569 SET_BIT (valid_mult
, i
+ MAX_RATIO
);
2574 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2576 fprintf (dump_file
, " allowed multipliers:");
2577 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
2578 if (TEST_BIT (valid_mult
, i
+ MAX_RATIO
))
2579 fprintf (dump_file
, " %d", (int) i
);
2580 fprintf (dump_file
, "\n");
2581 fprintf (dump_file
, "\n");
2585 bits
= GET_MODE_BITSIZE (Pmode
);
2586 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
2588 if ((offset
>> (bits
- 1) & 1))
2593 offset_p
= (s_offset
!= 0
2594 && min_offset
<= s_offset
&& s_offset
<= max_offset
);
2595 ratio_p
= (ratio
!= 1
2596 && -MAX_RATIO
<= ratio
&& ratio
<= MAX_RATIO
2597 && TEST_BIT (valid_mult
, ratio
+ MAX_RATIO
));
2599 if (ratio
!= 1 && !ratio_p
)
2600 cost
+= multiply_by_cost (ratio
, Pmode
);
2602 if (s_offset
&& !offset_p
&& !symbol_present
)
2604 cost
+= add_cost (Pmode
);
2608 acost
= costs
[symbol_present
][var_present
][offset_p
][ratio_p
];
2613 addr
= gen_raw_REG (Pmode
, FIRST_PSEUDO_REGISTER
);
2614 reg1
= gen_raw_REG (Pmode
, FIRST_PSEUDO_REGISTER
+ 1);
2616 addr
= gen_rtx_fmt_ee (MULT
, Pmode
, addr
, GEN_INT (rat
));
2619 addr
= gen_rtx_fmt_ee (PLUS
, Pmode
, addr
, reg1
);
2623 base
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (""));
2625 base
= gen_rtx_fmt_e (CONST
, Pmode
,
2626 gen_rtx_fmt_ee (PLUS
, Pmode
,
2631 base
= GEN_INT (off
);
2636 addr
= gen_rtx_fmt_ee (PLUS
, Pmode
, addr
, base
);
2639 addr
= memory_address (Pmode
, addr
);
2643 acost
= seq_cost (seq
);
2644 acost
+= address_cost (addr
, Pmode
);
2648 costs
[symbol_present
][var_present
][offset_p
][ratio_p
] = acost
;
2651 return cost
+ acost
;
2654 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2655 the bitmap to that we should store it. */
2657 static struct ivopts_data
*fd_ivopts_data
;
2659 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
2661 bitmap
*depends_on
= data
;
2662 struct version_info
*info
;
2664 if (TREE_CODE (*expr_p
) != SSA_NAME
)
2666 info
= name_info (fd_ivopts_data
, *expr_p
);
2668 if (!info
->inv_id
|| info
->has_nonlin_use
)
2672 *depends_on
= BITMAP_XMALLOC ();
2673 bitmap_set_bit (*depends_on
, info
->inv_id
);
2678 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
2679 invariants the computation depends on. */
2682 force_var_cost (struct ivopts_data
*data
,
2683 tree expr
, bitmap
*depends_on
)
2685 static bool costs_initialized
= false;
2686 static unsigned integer_cost
;
2687 static unsigned symbol_cost
;
2688 static unsigned address_cost
;
2690 unsigned cost0
, cost1
, cost
;
2691 enum machine_mode mode
;
2693 if (!costs_initialized
)
2695 tree var
= create_tmp_var_raw (integer_type_node
, "test_var");
2696 rtx x
= gen_rtx_MEM (DECL_MODE (var
),
2697 gen_rtx_SYMBOL_REF (Pmode
, "test_var"));
2699 tree type
= build_pointer_type (integer_type_node
);
2701 integer_cost
= computation_cost (build_int_cst_type (integer_type_node
,
2704 SET_DECL_RTL (var
, x
);
2705 TREE_STATIC (var
) = 1;
2706 addr
= build1 (ADDR_EXPR
, type
, var
);
2707 symbol_cost
= computation_cost (addr
) + 1;
2710 = computation_cost (build2 (PLUS_EXPR
, type
,
2712 build_int_cst_type (type
, 2000))) + 1;
2713 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2715 fprintf (dump_file
, "force_var_cost:\n");
2716 fprintf (dump_file
, " integer %d\n", (int) integer_cost
);
2717 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
);
2718 fprintf (dump_file
, " address %d\n", (int) address_cost
);
2719 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
);
2720 fprintf (dump_file
, "\n");
2723 costs_initialized
= true;
2728 fd_ivopts_data
= data
;
2729 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
2732 if (SSA_VAR_P (expr
))
2735 if (TREE_INVARIANT (expr
))
2737 if (TREE_CODE (expr
) == INTEGER_CST
)
2738 return integer_cost
;
2740 if (TREE_CODE (expr
) == ADDR_EXPR
)
2742 tree obj
= TREE_OPERAND (expr
, 0);
2744 if (TREE_CODE (obj
) == VAR_DECL
2745 || TREE_CODE (obj
) == PARM_DECL
2746 || TREE_CODE (obj
) == RESULT_DECL
)
2750 return address_cost
;
2753 switch (TREE_CODE (expr
))
2758 op0
= TREE_OPERAND (expr
, 0);
2759 op1
= TREE_OPERAND (expr
, 1);
2761 if (is_gimple_val (op0
))
2764 cost0
= force_var_cost (data
, op0
, NULL
);
2766 if (is_gimple_val (op1
))
2769 cost1
= force_var_cost (data
, op1
, NULL
);
2774 /* Just an arbitrary value, FIXME. */
2775 return target_spill_cost
;
2778 mode
= TYPE_MODE (TREE_TYPE (expr
));
2779 switch (TREE_CODE (expr
))
2783 cost
= add_cost (mode
);
2787 if (cst_and_fits_in_hwi (op0
))
2788 cost
= multiply_by_cost (int_cst_value (op0
), mode
);
2789 else if (cst_and_fits_in_hwi (op1
))
2790 cost
= multiply_by_cost (int_cst_value (op1
), mode
);
2792 return target_spill_cost
;
2802 /* Bound the cost by target_spill_cost. The parts of complicated
2803 computations often are either loop invariant or at least can
2804 be shared between several iv uses, so letting this grow without
2805 limits would not give reasonable results. */
2806 return cost
< target_spill_cost
? cost
: target_spill_cost
;
2809 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
2810 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
2811 to false if the corresponding part is missing. DEPENDS_ON is a set of the
2812 invariants the computation depends on. */
2815 split_address_cost (struct ivopts_data
*data
,
2816 tree addr
, bool *symbol_present
, bool *var_present
,
2817 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
2820 HOST_WIDE_INT bitsize
;
2821 HOST_WIDE_INT bitpos
;
2823 enum machine_mode mode
;
2824 int unsignedp
, volatilep
;
2826 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
2827 &unsignedp
, &volatilep
);
2830 || bitpos
% BITS_PER_UNIT
!= 0
2831 || TREE_CODE (core
) != VAR_DECL
)
2833 *symbol_present
= false;
2834 *var_present
= true;
2835 fd_ivopts_data
= data
;
2836 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
2837 return target_spill_cost
;
2840 *offset
+= bitpos
/ BITS_PER_UNIT
;
2841 if (TREE_STATIC (core
)
2842 || DECL_EXTERNAL (core
))
2844 *symbol_present
= true;
2845 *var_present
= false;
2849 *symbol_present
= false;
2850 *var_present
= true;
2854 /* Estimates cost of expressing difference of addresses E1 - E2 as
2855 var + symbol + offset. The value of offset is added to OFFSET,
2856 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
2857 part is missing. DEPENDS_ON is a set of the invariants the computation
2861 ptr_difference_cost (struct ivopts_data
*data
,
2862 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
2863 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
2865 HOST_WIDE_INT diff
= 0;
2868 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
2870 if (ptr_difference_const (e1
, e2
, &diff
))
2873 *symbol_present
= false;
2874 *var_present
= false;
2878 if (e2
== integer_zero_node
)
2879 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
2880 symbol_present
, var_present
, offset
, depends_on
);
2882 *symbol_present
= false;
2883 *var_present
= true;
2885 cost
= force_var_cost (data
, e1
, depends_on
);
2886 cost
+= force_var_cost (data
, e2
, depends_on
);
2887 cost
+= add_cost (Pmode
);
2892 /* Estimates cost of expressing difference E1 - E2 as
2893 var + symbol + offset. The value of offset is added to OFFSET,
2894 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
2895 part is missing. DEPENDS_ON is a set of the invariants the computation
2899 difference_cost (struct ivopts_data
*data
,
2900 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
2901 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
2904 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
2906 strip_offset (&e1
, offset
);
2908 strip_offset (&e2
, offset
);
2911 if (TREE_CODE (e1
) == ADDR_EXPR
)
2912 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
, offset
,
2914 *symbol_present
= false;
2916 if (operand_equal_p (e1
, e2
, 0))
2918 *var_present
= false;
2921 *var_present
= true;
2923 return force_var_cost (data
, e1
, depends_on
);
2927 cost
= force_var_cost (data
, e2
, depends_on
);
2928 cost
+= multiply_by_cost (-1, mode
);
2933 cost
= force_var_cost (data
, e1
, depends_on
);
2934 cost
+= force_var_cost (data
, e2
, depends_on
);
2935 cost
+= add_cost (mode
);
2940 /* Determines the cost of the computation by that USE is expressed
2941 from induction variable CAND. If ADDRESS_P is true, we just need
2942 to create an address from it, otherwise we want to get it into
2943 register. A set of invariants we depend on is stored in
2944 DEPENDS_ON. AT is the statement at that the value is computed. */
2947 get_computation_cost_at (struct ivopts_data
*data
,
2948 struct iv_use
*use
, struct iv_cand
*cand
,
2949 bool address_p
, bitmap
*depends_on
, tree at
)
2951 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
2953 tree utype
= TREE_TYPE (ubase
), ctype
;
2954 unsigned HOST_WIDE_INT ustepi
, cstepi
, offset
= 0;
2955 HOST_WIDE_INT ratio
, aratio
;
2956 bool var_present
, symbol_present
;
2957 unsigned cost
= 0, n_sums
;
2961 /* Only consider real candidates. */
2965 cbase
= cand
->iv
->base
;
2966 cstep
= cand
->iv
->step
;
2967 ctype
= TREE_TYPE (cbase
);
2969 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
2971 /* We do not have a precision to express the values of use. */
2977 /* Do not try to express address of an object with computation based
2978 on address of a different object. This may cause problems in rtl
2979 level alias analysis (that does not expect this to be happening,
2980 as this is illegal in C), and would be unlikely to be useful
2982 if (use
->iv
->base_object
2983 && cand
->iv
->base_object
2984 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
2988 if (!cst_and_fits_in_hwi (ustep
)
2989 || !cst_and_fits_in_hwi (cstep
))
2992 if (TREE_CODE (ubase
) == INTEGER_CST
2993 && !cst_and_fits_in_hwi (ubase
))
2996 if (TREE_CODE (cbase
) == INTEGER_CST
2997 && !cst_and_fits_in_hwi (cbase
))
3000 ustepi
= int_cst_value (ustep
);
3001 cstepi
= int_cst_value (cstep
);
3003 if (TYPE_PRECISION (utype
) != TYPE_PRECISION (ctype
))
3005 /* TODO -- add direct handling of this case. */
3009 if (!divide (TYPE_PRECISION (utype
), ustepi
, cstepi
, &ratio
))
3012 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3013 or ratio == 1, it is better to handle this like
3015 ubase - ratio * cbase + ratio * var
3017 (also holds in the case ratio == -1, TODO. */
3019 if (TREE_CODE (cbase
) == INTEGER_CST
)
3021 offset
= - ratio
* int_cst_value (cbase
);
3022 cost
+= difference_cost (data
,
3023 ubase
, integer_zero_node
,
3024 &symbol_present
, &var_present
, &offset
,
3027 else if (ratio
== 1)
3029 cost
+= difference_cost (data
,
3031 &symbol_present
, &var_present
, &offset
,
3036 cost
+= force_var_cost (data
, cbase
, depends_on
);
3037 cost
+= add_cost (TYPE_MODE (ctype
));
3038 cost
+= difference_cost (data
,
3039 ubase
, integer_zero_node
,
3040 &symbol_present
, &var_present
, &offset
,
3044 /* If we are after the increment, the value of the candidate is higher by
3046 if (stmt_after_increment (data
->current_loop
, cand
, at
))
3047 offset
-= ratio
* cstepi
;
3049 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3050 (symbol/var/const parts may be omitted). If we are looking for an address,
3051 find the cost of addressing this. */
3053 return cost
+ get_address_cost (symbol_present
, var_present
, offset
, ratio
);
3055 /* Otherwise estimate the costs for computing the expression. */
3056 aratio
= ratio
> 0 ? ratio
: -ratio
;
3057 if (!symbol_present
&& !var_present
&& !offset
)
3060 cost
+= multiply_by_cost (ratio
, TYPE_MODE (ctype
));
3066 cost
+= multiply_by_cost (aratio
, TYPE_MODE (ctype
));
3070 /* Symbol + offset should be compile-time computable. */
3071 && (symbol_present
|| offset
))
3074 return cost
+ n_sums
* add_cost (TYPE_MODE (ctype
));
3078 /* Just get the expression, expand it and measure the cost. */
3079 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
3085 comp
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (comp
)), comp
);
3087 return computation_cost (comp
);
3091 /* Determines the cost of the computation by that USE is expressed
3092 from induction variable CAND. If ADDRESS_P is true, we just need
3093 to create an address from it, otherwise we want to get it into
3094 register. A set of invariants we depend on is stored in
3098 get_computation_cost (struct ivopts_data
*data
,
3099 struct iv_use
*use
, struct iv_cand
*cand
,
3100 bool address_p
, bitmap
*depends_on
)
3102 return get_computation_cost_at (data
,
3103 use
, cand
, address_p
, depends_on
, use
->stmt
);
3106 /* Determines cost of basing replacement of USE on CAND in a generic
3110 determine_use_iv_cost_generic (struct ivopts_data
*data
,
3111 struct iv_use
*use
, struct iv_cand
*cand
)
3114 unsigned cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
);
3116 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
);
3118 return cost
!= INFTY
;
3121 /* Determines cost of basing replacement of USE on CAND in an address. */
3124 determine_use_iv_cost_address (struct ivopts_data
*data
,
3125 struct iv_use
*use
, struct iv_cand
*cand
)
3128 unsigned cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
);
3130 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
);
3132 return cost
!= INFTY
;
3135 /* Computes value of induction variable IV in iteration NITER. */
3138 iv_value (struct iv
*iv
, tree niter
)
3141 tree type
= TREE_TYPE (iv
->base
);
3143 niter
= fold_convert (type
, niter
);
3144 val
= fold (build2 (MULT_EXPR
, type
, iv
->step
, niter
));
3146 return fold (build2 (PLUS_EXPR
, type
, iv
->base
, val
));
3149 /* Computes value of candidate CAND at position AT in iteration NITER. */
3152 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, tree at
, tree niter
)
3154 tree val
= iv_value (cand
->iv
, niter
);
3155 tree type
= TREE_TYPE (cand
->iv
->base
);
3157 if (stmt_after_increment (loop
, cand
, at
))
3158 val
= fold (build2 (PLUS_EXPR
, type
, val
, cand
->iv
->step
));
3163 /* Check whether it is possible to express the condition in USE by comparison
3164 of candidate CAND. If so, store the comparison code to COMPARE and the
3165 value compared with to BOUND. */
3168 may_eliminate_iv (struct loop
*loop
,
3169 struct iv_use
*use
, struct iv_cand
*cand
,
3170 enum tree_code
*compare
, tree
*bound
)
3174 struct tree_niter_desc niter
, new_niter
;
3175 tree wider_type
, type
, base
;
3177 /* For now works only for exits that dominate the loop latch. TODO -- extend
3178 for other conditions inside loop body. */
3179 ex_bb
= bb_for_stmt (use
->stmt
);
3180 if (use
->stmt
!= last_stmt (ex_bb
)
3181 || TREE_CODE (use
->stmt
) != COND_EXPR
)
3183 if (!dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
3186 exit
= EDGE_SUCC (ex_bb
, 0);
3187 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
3188 exit
= EDGE_SUCC (ex_bb
, 1);
3189 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
3192 niter
.niter
= NULL_TREE
;
3193 number_of_iterations_exit (loop
, exit
, &niter
);
3195 || !integer_nonzerop (niter
.assumptions
)
3196 || !integer_zerop (niter
.may_be_zero
))
3199 if (exit
->flags
& EDGE_TRUE_VALUE
)
3204 *bound
= cand_value_at (loop
, cand
, use
->stmt
, niter
.niter
);
3206 /* Let us check there is not some problem with overflows, by checking that
3207 the number of iterations is unchanged. */
3208 base
= cand
->iv
->base
;
3209 type
= TREE_TYPE (base
);
3210 if (stmt_after_increment (loop
, cand
, use
->stmt
))
3211 base
= fold (build2 (PLUS_EXPR
, type
, base
, cand
->iv
->step
));
3213 new_niter
.niter
= NULL_TREE
;
3214 number_of_iterations_cond (TREE_TYPE (cand
->iv
->base
), base
,
3215 cand
->iv
->step
, NE_EXPR
, *bound
, NULL_TREE
,
3217 if (!new_niter
.niter
3218 || !integer_nonzerop (new_niter
.assumptions
)
3219 || !integer_zerop (new_niter
.may_be_zero
))
3222 wider_type
= TREE_TYPE (new_niter
.niter
);
3223 if (TYPE_PRECISION (wider_type
) < TYPE_PRECISION (TREE_TYPE (niter
.niter
)))
3224 wider_type
= TREE_TYPE (niter
.niter
);
3225 if (!operand_equal_p (fold_convert (wider_type
, niter
.niter
),
3226 fold_convert (wider_type
, new_niter
.niter
), 0))
3232 /* Determines cost of basing replacement of USE on CAND in a condition. */
3235 determine_use_iv_cost_condition (struct ivopts_data
*data
,
3236 struct iv_use
*use
, struct iv_cand
*cand
)
3239 enum tree_code compare
;
3241 /* Only consider real candidates. */
3244 set_use_iv_cost (data
, use
, cand
, INFTY
, NULL
);
3248 if (may_eliminate_iv (data
->current_loop
, use
, cand
, &compare
, &bound
))
3250 bitmap depends_on
= NULL
;
3251 unsigned cost
= force_var_cost (data
, bound
, &depends_on
);
3253 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
);
3254 return cost
!= INFTY
;
3257 /* The induction variable elimination failed; just express the original
3258 giv. If it is compared with an invariant, note that we cannot get
3260 if (TREE_CODE (*use
->op_p
) == SSA_NAME
)
3261 record_invariant (data
, *use
->op_p
, true);
3264 record_invariant (data
, TREE_OPERAND (*use
->op_p
, 0), true);
3265 record_invariant (data
, TREE_OPERAND (*use
->op_p
, 1), true);
3268 return determine_use_iv_cost_generic (data
, use
, cand
);
3271 /* Checks whether it is possible to replace the final value of USE by
3272 a direct computation. If so, the formula is stored to *VALUE. */
3275 may_replace_final_value (struct loop
*loop
, struct iv_use
*use
, tree
*value
)
3278 struct tree_niter_desc
*niter
;
3280 exit
= single_dom_exit (loop
);
3284 gcc_assert (dominated_by_p (CDI_DOMINATORS
, exit
->src
,
3285 bb_for_stmt (use
->stmt
)));
3287 niter
= &loop_data (loop
)->niter
;
3289 || !operand_equal_p (niter
->assumptions
, boolean_true_node
, 0)
3290 || !operand_equal_p (niter
->may_be_zero
, boolean_false_node
, 0))
3293 *value
= iv_value (use
->iv
, niter
->niter
);
3298 /* Determines cost of replacing final value of USE using CAND. */
3301 determine_use_iv_cost_outer (struct ivopts_data
*data
,
3302 struct iv_use
*use
, struct iv_cand
*cand
)
3308 struct loop
*loop
= data
->current_loop
;
3312 if (!may_replace_final_value (loop
, use
, &value
))
3314 set_use_iv_cost (data
, use
, cand
, INFTY
, NULL
);
3319 cost
= force_var_cost (data
, value
, &depends_on
);
3321 cost
/= AVG_LOOP_NITER (loop
);
3323 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
);
3324 return cost
!= INFTY
;
3327 exit
= single_dom_exit (loop
);
3330 /* If there is just a single exit, we may use value of the candidate
3331 after we take it to determine the value of use. */
3332 cost
= get_computation_cost_at (data
, use
, cand
, false, &depends_on
,
3333 last_stmt (exit
->src
));
3335 cost
/= AVG_LOOP_NITER (loop
);
3339 /* Otherwise we just need to compute the iv. */
3340 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
);
3343 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
);
3345 return cost
!= INFTY
;
3348 /* Determines cost of basing replacement of USE on CAND. Returns false
3349 if USE cannot be based on CAND. */
3352 determine_use_iv_cost (struct ivopts_data
*data
,
3353 struct iv_use
*use
, struct iv_cand
*cand
)
3357 case USE_NONLINEAR_EXPR
:
3358 return determine_use_iv_cost_generic (data
, use
, cand
);
3361 return determine_use_iv_cost_outer (data
, use
, cand
);
3364 return determine_use_iv_cost_address (data
, use
, cand
);
3367 return determine_use_iv_cost_condition (data
, use
, cand
);
3374 /* Determines costs of basing the use of the iv on an iv candidate. */
3377 determine_use_iv_costs (struct ivopts_data
*data
)
3381 struct iv_cand
*cand
;
3382 bitmap to_clear
= BITMAP_XMALLOC ();
3384 alloc_use_cost_map (data
);
3386 for (i
= 0; i
< n_iv_uses (data
); i
++)
3388 use
= iv_use (data
, i
);
3390 if (data
->consider_all_candidates
)
3392 for (j
= 0; j
< n_iv_cands (data
); j
++)
3394 cand
= iv_cand (data
, j
);
3395 determine_use_iv_cost (data
, use
, cand
);
3402 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
3404 cand
= iv_cand (data
, j
);
3405 if (!determine_use_iv_cost (data
, use
, cand
))
3406 bitmap_set_bit (to_clear
, j
);
3409 /* Remove the candidates for that the cost is infinite from
3410 the list of related candidates. */
3411 bitmap_and_compl_into (use
->related_cands
, to_clear
);
3412 bitmap_clear (to_clear
);
3416 BITMAP_XFREE (to_clear
);
3418 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3420 fprintf (dump_file
, "Use-candidate costs:\n");
3422 for (i
= 0; i
< n_iv_uses (data
); i
++)
3424 use
= iv_use (data
, i
);
3426 fprintf (dump_file
, "Use %d:\n", i
);
3427 fprintf (dump_file
, " cand\tcost\tdepends on\n");
3428 for (j
= 0; j
< use
->n_map_members
; j
++)
3430 if (!use
->cost_map
[j
].cand
3431 || use
->cost_map
[j
].cost
== INFTY
)
3434 fprintf (dump_file
, " %d\t%d\t",
3435 use
->cost_map
[j
].cand
->id
,
3436 use
->cost_map
[j
].cost
);
3437 if (use
->cost_map
[j
].depends_on
)
3438 bitmap_print (dump_file
,
3439 use
->cost_map
[j
].depends_on
, "","");
3440 fprintf (dump_file
, "\n");
3443 fprintf (dump_file
, "\n");
3445 fprintf (dump_file
, "\n");
3449 /* Determines cost of the candidate CAND. */
3452 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
3454 unsigned cost_base
, cost_step
;
3464 /* There are two costs associated with the candidate -- its increment
3465 and its initialization. The second is almost negligible for any loop
3466 that rolls enough, so we take it just very little into account. */
3468 base
= cand
->iv
->base
;
3469 cost_base
= force_var_cost (data
, base
, NULL
);
3470 cost_step
= add_cost (TYPE_MODE (TREE_TYPE (base
)));
3472 cand
->cost
= cost_step
+ cost_base
/ AVG_LOOP_NITER (current_loop
);
3474 /* Prefer the original iv unless we may gain something by replacing it. */
3475 if (cand
->pos
== IP_ORIGINAL
)
3478 /* Prefer not to insert statements into latch unless there are some
3479 already (so that we do not create unnecessary jumps). */
3480 if (cand
->pos
== IP_END
)
3482 bb
= ip_end_pos (data
->current_loop
);
3483 last
= last_stmt (bb
);
3486 || TREE_CODE (last
) == LABEL_EXPR
)
3491 /* Determines costs of computation of the candidates. */
3494 determine_iv_costs (struct ivopts_data
*data
)
3498 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3500 fprintf (dump_file
, "Candidate costs:\n");
3501 fprintf (dump_file
, " cand\tcost\n");
3504 for (i
= 0; i
< n_iv_cands (data
); i
++)
3506 struct iv_cand
*cand
= iv_cand (data
, i
);
3508 determine_iv_cost (data
, cand
);
3510 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3511 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
3514 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3515 fprintf (dump_file
, "\n");
3518 /* Calculates cost for having SIZE induction variables. */
3521 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
3523 return global_cost_for_size (size
,
3524 loop_data (data
->current_loop
)->regs_used
,
3528 /* For each size of the induction variable set determine the penalty. */
3531 determine_set_costs (struct ivopts_data
*data
)
3535 struct loop
*loop
= data
->current_loop
;
3538 /* We use the following model (definitely improvable, especially the
3539 cost function -- TODO):
3541 We estimate the number of registers available (using MD data), name it A.
3543 We estimate the number of registers used by the loop, name it U. This
3544 number is obtained as the number of loop phi nodes (not counting virtual
3545 registers and bivs) + the number of variables from outside of the loop.
3547 We set a reserve R (free regs that are used for temporary computations,
3548 etc.). For now the reserve is a constant 3.
3550 Let I be the number of induction variables.
3552 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3553 make a lot of ivs without a reason).
3554 -- if A - R < U + I <= A, the cost is I * PRES_COST
3555 -- if U + I > A, the cost is I * PRES_COST and
3556 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
3558 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3560 fprintf (dump_file
, "Global costs:\n");
3561 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
3562 fprintf (dump_file
, " target_small_cost %d\n", target_small_cost
);
3563 fprintf (dump_file
, " target_pres_cost %d\n", target_pres_cost
);
3564 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
);
3568 for (phi
= phi_nodes (loop
->header
); phi
; phi
= PHI_CHAIN (phi
))
3570 op
= PHI_RESULT (phi
);
3572 if (!is_gimple_reg (op
))
3575 if (get_iv (data
, op
))
3581 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
3583 struct version_info
*info
= ver_info (data
, j
);
3585 if (info
->inv_id
&& info
->has_nonlin_use
)
3589 loop_data (loop
)->regs_used
= n
;
3590 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3591 fprintf (dump_file
, " regs_used %d\n", n
);
3593 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3595 fprintf (dump_file
, " cost for size:\n");
3596 fprintf (dump_file
, " ivs\tcost\n");
3597 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
3598 fprintf (dump_file
, " %d\t%d\n", j
,
3599 ivopts_global_cost_for_size (data
, j
));
3600 fprintf (dump_file
, "\n");
3604 /* Returns true if A is a cheaper cost pair than B. */
3607 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
3615 if (a
->cost
< b
->cost
)
3618 if (a
->cost
> b
->cost
)
3621 /* In case the costs are the same, prefer the cheaper candidate. */
3622 if (a
->cand
->cost
< b
->cand
->cost
)
3628 /* Computes the cost field of IVS structure. */
3631 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
3635 cost
+= ivs
->cand_use_cost
;
3636 cost
+= ivs
->cand_cost
;
3637 cost
+= ivopts_global_cost_for_size (data
, ivs
->n_regs
);
3642 /* Set USE not to be expressed by any candidate in IVS. */
3645 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
3648 unsigned uid
= use
->id
, cid
, iid
;
3650 struct cost_pair
*cp
;
3653 cp
= ivs
->cand_for_use
[uid
];
3659 ivs
->cand_for_use
[uid
] = NULL
;
3660 ivs
->n_cand_uses
[cid
]--;
3662 if (ivs
->n_cand_uses
[cid
] == 0)
3664 bitmap_clear_bit (ivs
->cands
, cid
);
3665 /* Do not count the pseudocandidates. */
3668 ivs
->cand_cost
-= cp
->cand
->cost
;
3671 ivs
->cand_use_cost
-= cp
->cost
;
3673 deps
= cp
->depends_on
;
3677 EXECUTE_IF_SET_IN_BITMAP (deps
, 0, iid
, bi
)
3679 ivs
->n_invariant_uses
[iid
]--;
3680 if (ivs
->n_invariant_uses
[iid
] == 0)
3685 iv_ca_recount_cost (data
, ivs
);
3688 /* Set cost pair for USE in set IVS to CP. */
3691 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
3692 struct iv_use
*use
, struct cost_pair
*cp
)
3694 unsigned uid
= use
->id
, cid
, iid
;
3698 if (ivs
->cand_for_use
[uid
] == cp
)
3701 if (ivs
->cand_for_use
[uid
])
3702 iv_ca_set_no_cp (data
, ivs
, use
);
3709 ivs
->cand_for_use
[uid
] = cp
;
3710 ivs
->n_cand_uses
[cid
]++;
3711 if (ivs
->n_cand_uses
[cid
] == 1)
3713 bitmap_set_bit (ivs
->cands
, cid
);
3714 /* Do not count the pseudocandidates. */
3717 ivs
->cand_cost
+= cp
->cand
->cost
;
3720 ivs
->cand_use_cost
+= cp
->cost
;
3722 deps
= cp
->depends_on
;
3726 EXECUTE_IF_SET_IN_BITMAP (deps
, 0, iid
, bi
)
3728 ivs
->n_invariant_uses
[iid
]++;
3729 if (ivs
->n_invariant_uses
[iid
] == 1)
3734 iv_ca_recount_cost (data
, ivs
);
3738 /* Extend set IVS by expressing USE by some of the candidates in it
3742 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
3745 struct cost_pair
*best_cp
= NULL
, *cp
;
3749 gcc_assert (ivs
->upto
>= use
->id
);
3751 if (ivs
->upto
== use
->id
)
3757 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
3759 cp
= get_use_iv_cost (data
, use
, iv_cand (data
, i
));
3761 if (cheaper_cost_pair (cp
, best_cp
))
3765 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
3768 /* Get cost for assignment IVS. */
3771 iv_ca_cost (struct iv_ca
*ivs
)
3773 return (ivs
->bad_uses
? INFTY
: ivs
->cost
);
3776 /* Returns true if all dependences of CP are among invariants in IVS. */
3779 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
3784 if (!cp
->depends_on
)
3787 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
3789 if (ivs
->n_invariant_uses
[i
] == 0)
3796 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
3797 it before NEXT_CHANGE. */
3799 static struct iv_ca_delta
*
3800 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
3801 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
3803 struct iv_ca_delta
*change
= xmalloc (sizeof (struct iv_ca_delta
));
3806 change
->old_cp
= old_cp
;
3807 change
->new_cp
= new_cp
;
3808 change
->next_change
= next_change
;
3813 /* Returns candidate by that USE is expressed in IVS. */
3815 static struct cost_pair
*
3816 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
3818 return ivs
->cand_for_use
[use
->id
];
3821 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
3822 reverted instead. */
3825 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
3826 struct iv_ca_delta
*delta
, bool forward
)
3828 struct cost_pair
*from
, *to
;
3830 for (; delta
; delta
= delta
->next_change
)
3834 from
= delta
->old_cp
;
3839 from
= delta
->new_cp
;
3843 gcc_assert (iv_ca_cand_for_use (ivs
, delta
->use
) == from
);
3844 iv_ca_set_cp (data
, ivs
, delta
->use
, to
);
3848 /* Returns true if CAND is used in IVS. */
3851 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
3853 return ivs
->n_cand_uses
[cand
->id
] > 0;
3856 /* Free the list of changes DELTA. */
3859 iv_ca_delta_free (struct iv_ca_delta
**delta
)
3861 struct iv_ca_delta
*act
, *next
;
3863 for (act
= *delta
; act
; act
= next
)
3865 next
= act
->next_change
;
3872 /* Allocates new iv candidates assignment. */
3874 static struct iv_ca
*
3875 iv_ca_new (struct ivopts_data
*data
)
3877 struct iv_ca
*nw
= xmalloc (sizeof (struct iv_ca
));
3881 nw
->cand_for_use
= xcalloc (n_iv_uses (data
), sizeof (struct cost_pair
*));
3882 nw
->n_cand_uses
= xcalloc (n_iv_cands (data
), sizeof (unsigned));
3883 nw
->cands
= BITMAP_XMALLOC ();
3885 nw
->cand_use_cost
= 0;
3887 nw
->n_invariant_uses
= xcalloc (data
->max_inv_id
+ 1, sizeof (unsigned));
3893 /* Free memory occupied by the set IVS. */
3896 iv_ca_free (struct iv_ca
**ivs
)
3898 free ((*ivs
)->cand_for_use
);
3899 free ((*ivs
)->n_cand_uses
);
3900 BITMAP_XFREE ((*ivs
)->cands
);
3901 free ((*ivs
)->n_invariant_uses
);
3906 /* Dumps IVS to FILE. */
3909 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
3911 const char *pref
= " invariants ";
3914 fprintf (file
, " cost %d\n", iv_ca_cost (ivs
));
3915 bitmap_print (file
, ivs
->cands
, " candidates ","\n");
3917 for (i
= 1; i
<= data
->max_inv_id
; i
++)
3918 if (ivs
->n_invariant_uses
[i
])
3920 fprintf (file
, "%s%d", pref
, i
);
3923 fprintf (file
, "\n");
3926 /* Try changing candidate in IVS to CAND for each use. Return cost of the
3927 new set, and store differences in DELTA. */
3930 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
3931 struct iv_cand
*cand
, struct iv_ca_delta
**delta
)
3935 struct cost_pair
*old_cp
, *new_cp
;
3938 for (i
= 0; i
< ivs
->upto
; i
++)
3940 use
= iv_use (data
, i
);
3941 old_cp
= iv_ca_cand_for_use (ivs
, use
);
3944 && old_cp
->cand
== cand
)
3947 new_cp
= get_use_iv_cost (data
, use
, cand
);
3951 if (!iv_ca_has_deps (ivs
, new_cp
))
3954 if (!cheaper_cost_pair (new_cp
, old_cp
))
3957 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
3960 iv_ca_delta_commit (data
, ivs
, *delta
, true);
3961 cost
= iv_ca_cost (ivs
);
3962 iv_ca_delta_commit (data
, ivs
, *delta
, false);
3967 /* Try narrowing set IVS by removing CAND. Return the cost of
3968 the new set and store the differences in DELTA. */
3971 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
3972 struct iv_cand
*cand
, struct iv_ca_delta
**delta
)
3976 struct cost_pair
*old_cp
, *new_cp
, *cp
;
3978 struct iv_cand
*cnd
;
3982 for (i
= 0; i
< n_iv_uses (data
); i
++)
3984 use
= iv_use (data
, i
);
3986 old_cp
= iv_ca_cand_for_use (ivs
, use
);
3987 if (old_cp
->cand
!= cand
)
3992 if (data
->consider_all_candidates
)
3994 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
3999 cnd
= iv_cand (data
, ci
);
4001 cp
= get_use_iv_cost (data
, use
, cnd
);
4004 if (!iv_ca_has_deps (ivs
, cp
))
4007 if (!cheaper_cost_pair (cp
, new_cp
))
4015 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
4020 cnd
= iv_cand (data
, ci
);
4022 cp
= get_use_iv_cost (data
, use
, cnd
);
4025 if (!iv_ca_has_deps (ivs
, cp
))
4028 if (!cheaper_cost_pair (cp
, new_cp
))
4037 iv_ca_delta_free (delta
);
4041 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
4044 iv_ca_delta_commit (data
, ivs
, *delta
, true);
4045 cost
= iv_ca_cost (ivs
);
4046 iv_ca_delta_commit (data
, ivs
, *delta
, false);
4051 /* Tries to extend the sets IVS in the best possible way in order
4052 to express the USE. */
4055 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4058 unsigned best_cost
, act_cost
;
4061 struct iv_cand
*cand
;
4062 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
4063 struct cost_pair
*cp
;
4065 iv_ca_add_use (data
, ivs
, use
);
4066 best_cost
= iv_ca_cost (ivs
);
4068 cp
= iv_ca_cand_for_use (ivs
, use
);
4071 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
4072 iv_ca_set_no_cp (data
, ivs
, use
);
4075 /* First try important candidates. Only if it fails, try the specific ones.
4076 Rationale -- in loops with many variables the best choice often is to use
4077 just one generic biv. If we added here many ivs specific to the uses,
4078 the optimization algorithm later would be likely to get stuck in a local
4079 minimum, thus causing us to create too many ivs. The approach from
4080 few ivs to more seems more likely to be successful -- starting from few
4081 ivs, replacing an expensive use by a specific iv should always be a
4083 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
4085 cand
= iv_cand (data
, i
);
4087 if (iv_ca_cand_used_p (ivs
, cand
))
4090 cp
= get_use_iv_cost (data
, use
, cand
);
4094 iv_ca_set_cp (data
, ivs
, use
, cp
);
4095 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
);
4096 iv_ca_set_no_cp (data
, ivs
, use
);
4097 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
4099 if (act_cost
< best_cost
)
4101 best_cost
= act_cost
;
4103 iv_ca_delta_free (&best_delta
);
4104 best_delta
= act_delta
;
4107 iv_ca_delta_free (&act_delta
);
4110 if (best_cost
== INFTY
)
4112 for (i
= 0; i
< use
->n_map_members
; i
++)
4114 cp
= use
->cost_map
+ i
;
4119 /* Already tried this. */
4120 if (cand
->important
)
4123 if (iv_ca_cand_used_p (ivs
, cand
))
4127 iv_ca_set_cp (data
, ivs
, use
, cp
);
4128 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
);
4129 iv_ca_set_no_cp (data
, ivs
, use
);
4130 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
4133 if (act_cost
< best_cost
)
4135 best_cost
= act_cost
;
4138 iv_ca_delta_free (&best_delta
);
4139 best_delta
= act_delta
;
4142 iv_ca_delta_free (&act_delta
);
4146 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
4147 iv_ca_delta_free (&best_delta
);
4149 return (best_cost
!= INFTY
);
4152 /* Finds an initial assignment of candidates to uses. */
4154 static struct iv_ca
*
4155 get_initial_solution (struct ivopts_data
*data
)
4157 struct iv_ca
*ivs
= iv_ca_new (data
);
4160 for (i
= 0; i
< n_iv_uses (data
); i
++)
4161 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
)))
4170 /* Tries to improve set of induction variables IVS. */
4173 try_improve_iv_set (struct ivopts_data
*data
, struct iv_ca
*ivs
)
4175 unsigned i
, acost
, best_cost
= iv_ca_cost (ivs
);
4176 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
4177 struct iv_cand
*cand
;
4179 /* Try altering the set of induction variables by one. */
4180 for (i
= 0; i
< n_iv_cands (data
); i
++)
4182 cand
= iv_cand (data
, i
);
4184 if (iv_ca_cand_used_p (ivs
, cand
))
4185 acost
= iv_ca_narrow (data
, ivs
, cand
, &act_delta
);
4187 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
);
4189 if (acost
< best_cost
)
4193 iv_ca_delta_free (&best_delta
);
4194 best_delta
= act_delta
;
4197 iv_ca_delta_free (&act_delta
);
4203 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
4204 iv_ca_delta_free (&best_delta
);
4208 /* Attempts to find the optimal set of induction variables. We do simple
4209 greedy heuristic -- we try to replace at most one candidate in the selected
4210 solution and remove the unused ivs while this improves the cost. */
4212 static struct iv_ca
*
4213 find_optimal_iv_set (struct ivopts_data
*data
)
4219 /* Get the initial solution. */
4220 set
= get_initial_solution (data
);
4223 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4224 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
4228 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4230 fprintf (dump_file
, "Initial set of candidates:\n");
4231 iv_ca_dump (data
, dump_file
, set
);
4234 while (try_improve_iv_set (data
, set
))
4236 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4238 fprintf (dump_file
, "Improved to:\n");
4239 iv_ca_dump (data
, dump_file
, set
);
4243 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4244 fprintf (dump_file
, "Final cost %d\n\n", iv_ca_cost (set
));
4246 for (i
= 0; i
< n_iv_uses (data
); i
++)
4248 use
= iv_use (data
, i
);
4249 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
4255 /* Creates a new induction variable corresponding to CAND. */
4258 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
4260 block_stmt_iterator incr_pos
;
4270 incr_pos
= bsi_last (ip_normal_pos (data
->current_loop
));
4274 incr_pos
= bsi_last (ip_end_pos (data
->current_loop
));
4279 /* Mark that the iv is preserved. */
4280 name_info (data
, cand
->var_before
)->preserve_biv
= true;
4281 name_info (data
, cand
->var_after
)->preserve_biv
= true;
4283 /* Rewrite the increment so that it uses var_before directly. */
4284 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
4289 gimple_add_tmp_var (cand
->var_before
);
4290 add_referenced_tmp_var (cand
->var_before
);
4292 base
= unshare_expr (cand
->iv
->base
);
4294 create_iv (base
, cand
->iv
->step
, cand
->var_before
, data
->current_loop
,
4295 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
4298 /* Creates new induction variables described in SET. */
4301 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
4304 struct iv_cand
*cand
;
4307 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
4309 cand
= iv_cand (data
, i
);
4310 create_new_iv (data
, cand
);
4314 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
4315 is true, remove also the ssa name defined by the statement. */
4318 remove_statement (tree stmt
, bool including_defined_name
)
4320 if (TREE_CODE (stmt
) == PHI_NODE
)
4322 if (!including_defined_name
)
4324 /* Prevent the ssa name defined by the statement from being removed. */
4325 SET_PHI_RESULT (stmt
, NULL
);
4327 remove_phi_node (stmt
, NULL_TREE
, bb_for_stmt (stmt
));
4331 block_stmt_iterator bsi
= bsi_for_stmt (stmt
);
4337 /* Rewrites USE (definition of iv used in a nonlinear expression)
4338 using candidate CAND. */
4341 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
4342 struct iv_use
*use
, struct iv_cand
*cand
)
4344 tree comp
= unshare_expr (get_computation (data
->current_loop
,
4346 tree op
, stmts
, tgt
, ass
;
4347 block_stmt_iterator bsi
, pbsi
;
4349 switch (TREE_CODE (use
->stmt
))
4352 tgt
= PHI_RESULT (use
->stmt
);
4354 /* If we should keep the biv, do not replace it. */
4355 if (name_info (data
, tgt
)->preserve_biv
)
4358 pbsi
= bsi
= bsi_start (bb_for_stmt (use
->stmt
));
4359 while (!bsi_end_p (pbsi
)
4360 && TREE_CODE (bsi_stmt (pbsi
)) == LABEL_EXPR
)
4368 tgt
= TREE_OPERAND (use
->stmt
, 0);
4369 bsi
= bsi_for_stmt (use
->stmt
);
4376 op
= force_gimple_operand (comp
, &stmts
, false, SSA_NAME_VAR (tgt
));
4378 if (TREE_CODE (use
->stmt
) == PHI_NODE
)
4381 bsi_insert_after (&bsi
, stmts
, BSI_CONTINUE_LINKING
);
4382 ass
= build2 (MODIFY_EXPR
, TREE_TYPE (tgt
), tgt
, op
);
4383 bsi_insert_after (&bsi
, ass
, BSI_NEW_STMT
);
4384 remove_statement (use
->stmt
, false);
4385 SSA_NAME_DEF_STMT (tgt
) = ass
;
4390 bsi_insert_before (&bsi
, stmts
, BSI_SAME_STMT
);
4391 TREE_OPERAND (use
->stmt
, 1) = op
;
4395 /* Replaces ssa name in index IDX by its basic variable. Callback for
4399 idx_remove_ssa_names (tree base
, tree
*idx
,
4400 void *data ATTRIBUTE_UNUSED
)
4404 if (TREE_CODE (*idx
) == SSA_NAME
)
4405 *idx
= SSA_NAME_VAR (*idx
);
4407 if (TREE_CODE (base
) == ARRAY_REF
)
4409 op
= &TREE_OPERAND (base
, 2);
4411 && TREE_CODE (*op
) == SSA_NAME
)
4412 *op
= SSA_NAME_VAR (*op
);
4413 op
= &TREE_OPERAND (base
, 3);
4415 && TREE_CODE (*op
) == SSA_NAME
)
4416 *op
= SSA_NAME_VAR (*op
);
4422 /* Unshares REF and replaces ssa names inside it by their basic variables. */
4425 unshare_and_remove_ssa_names (tree ref
)
4427 ref
= unshare_expr (ref
);
4428 for_each_index (&ref
, idx_remove_ssa_names
, NULL
);
4433 /* Rewrites base of memory access OP with expression WITH in statement
4434 pointed to by BSI. */
4437 rewrite_address_base (block_stmt_iterator
*bsi
, tree
*op
, tree with
)
4439 tree bvar
, var
, new_var
, new_name
, copy
, name
;
4442 var
= bvar
= get_base_address (*op
);
4444 if (!var
|| TREE_CODE (with
) != SSA_NAME
)
4447 gcc_assert (TREE_CODE (var
) != ALIGN_INDIRECT_REF
);
4448 gcc_assert (TREE_CODE (var
) != MISALIGNED_INDIRECT_REF
);
4449 if (TREE_CODE (var
) == INDIRECT_REF
)
4450 var
= TREE_OPERAND (var
, 0);
4451 if (TREE_CODE (var
) == SSA_NAME
)
4454 var
= SSA_NAME_VAR (var
);
4456 else if (DECL_P (var
))
4461 if (var_ann (var
)->type_mem_tag
)
4462 var
= var_ann (var
)->type_mem_tag
;
4464 /* We need to add a memory tag for the variable. But we do not want
4465 to add it to the temporary used for the computations, since this leads
4466 to problems in redundancy elimination when there are common parts
4467 in two computations referring to the different arrays. So we copy
4468 the variable to a new temporary. */
4469 copy
= build2 (MODIFY_EXPR
, void_type_node
, NULL_TREE
, with
);
4471 new_name
= duplicate_ssa_name (name
, copy
);
4474 new_var
= create_tmp_var (TREE_TYPE (with
), "ruatmp");
4475 add_referenced_tmp_var (new_var
);
4476 var_ann (new_var
)->type_mem_tag
= var
;
4477 new_name
= make_ssa_name (new_var
, copy
);
4479 TREE_OPERAND (copy
, 0) = new_name
;
4480 bsi_insert_before (bsi
, copy
, BSI_SAME_STMT
);
4486 gcc_assert (TREE_CODE (*op
) != ALIGN_INDIRECT_REF
);
4487 gcc_assert (TREE_CODE (*op
) != MISALIGNED_INDIRECT_REF
);
4489 if (TREE_CODE (*op
) == INDIRECT_REF
)
4490 orig
= REF_ORIGINAL (*op
);
4492 orig
= unshare_and_remove_ssa_names (*op
);
4494 *op
= build1 (INDIRECT_REF
, TREE_TYPE (*op
), with
);
4496 /* Record the original reference, for purposes of alias analysis. */
4497 REF_ORIGINAL (*op
) = orig
;
4500 /* Rewrites USE (address that is an iv) using candidate CAND. */
4503 rewrite_use_address (struct ivopts_data
*data
,
4504 struct iv_use
*use
, struct iv_cand
*cand
)
4506 tree comp
= unshare_expr (get_computation (data
->current_loop
,
4508 block_stmt_iterator bsi
= bsi_for_stmt (use
->stmt
);
4510 tree op
= force_gimple_operand (comp
, &stmts
, true, NULL_TREE
);
4513 bsi_insert_before (&bsi
, stmts
, BSI_SAME_STMT
);
4515 rewrite_address_base (&bsi
, use
->op_p
, op
);
4518 /* Rewrites USE (the condition such that one of the arguments is an iv) using
4522 rewrite_use_compare (struct ivopts_data
*data
,
4523 struct iv_use
*use
, struct iv_cand
*cand
)
4526 tree
*op_p
, cond
, op
, stmts
, bound
;
4527 block_stmt_iterator bsi
= bsi_for_stmt (use
->stmt
);
4528 enum tree_code compare
;
4530 if (may_eliminate_iv (data
->current_loop
,
4531 use
, cand
, &compare
, &bound
))
4533 op
= force_gimple_operand (unshare_expr (bound
), &stmts
,
4537 bsi_insert_before (&bsi
, stmts
, BSI_SAME_STMT
);
4539 *use
->op_p
= build2 (compare
, boolean_type_node
,
4540 var_at_stmt (data
->current_loop
,
4541 cand
, use
->stmt
), op
);
4542 modify_stmt (use
->stmt
);
4546 /* The induction variable elimination failed; just express the original
4548 comp
= unshare_expr (get_computation (data
->current_loop
, use
, cand
));
4551 op_p
= &TREE_OPERAND (cond
, 0);
4552 if (TREE_CODE (*op_p
) != SSA_NAME
4553 || zero_p (get_iv (data
, *op_p
)->step
))
4554 op_p
= &TREE_OPERAND (cond
, 1);
4556 op
= force_gimple_operand (comp
, &stmts
, true, SSA_NAME_VAR (*op_p
));
4558 bsi_insert_before (&bsi
, stmts
, BSI_SAME_STMT
);
4563 /* Ensure that operand *OP_P may be used at the end of EXIT without
4564 violating loop closed ssa form. */
4567 protect_loop_closed_ssa_form_use (edge exit
, use_operand_p op_p
)
4570 struct loop
*def_loop
;
4573 use
= USE_FROM_PTR (op_p
);
4574 if (TREE_CODE (use
) != SSA_NAME
)
4577 def_bb
= bb_for_stmt (SSA_NAME_DEF_STMT (use
));
4581 def_loop
= def_bb
->loop_father
;
4582 if (flow_bb_inside_loop_p (def_loop
, exit
->dest
))
4585 /* Try finding a phi node that copies the value out of the loop. */
4586 for (phi
= phi_nodes (exit
->dest
); phi
; phi
= PHI_CHAIN (phi
))
4587 if (PHI_ARG_DEF_FROM_EDGE (phi
, exit
) == use
)
4592 /* Create such a phi node. */
4593 tree new_name
= duplicate_ssa_name (use
, NULL
);
4595 phi
= create_phi_node (new_name
, exit
->dest
);
4596 SSA_NAME_DEF_STMT (new_name
) = phi
;
4597 add_phi_arg (&phi
, use
, exit
);
4600 SET_USE (op_p
, PHI_RESULT (phi
));
4603 /* Ensure that operands of STMT may be used at the end of EXIT without
4604 violating loop closed ssa form. */
4607 protect_loop_closed_ssa_form (edge exit
, tree stmt
)
4611 v_may_def_optype v_may_defs
;
4614 get_stmt_operands (stmt
);
4616 uses
= STMT_USE_OPS (stmt
);
4617 for (i
= 0; i
< NUM_USES (uses
); i
++)
4618 protect_loop_closed_ssa_form_use (exit
, USE_OP_PTR (uses
, i
));
4620 vuses
= STMT_VUSE_OPS (stmt
);
4621 for (i
= 0; i
< NUM_VUSES (vuses
); i
++)
4622 protect_loop_closed_ssa_form_use (exit
, VUSE_OP_PTR (vuses
, i
));
4624 v_may_defs
= STMT_V_MAY_DEF_OPS (stmt
);
4625 for (i
= 0; i
< NUM_V_MAY_DEFS (v_may_defs
); i
++)
4626 protect_loop_closed_ssa_form_use (exit
, V_MAY_DEF_OP_PTR (v_may_defs
, i
));
4629 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
4630 so that they are emitted on the correct place, and so that the loop closed
4631 ssa form is preserved. */
4634 compute_phi_arg_on_exit (edge exit
, tree stmts
, tree op
)
4636 tree_stmt_iterator tsi
;
4637 block_stmt_iterator bsi
;
4638 tree phi
, stmt
, def
, next
;
4640 if (EDGE_COUNT (exit
->dest
->preds
) > 1)
4641 split_loop_exit_edge (exit
);
4643 if (TREE_CODE (stmts
) == STATEMENT_LIST
)
4645 for (tsi
= tsi_start (stmts
); !tsi_end_p (tsi
); tsi_next (&tsi
))
4646 protect_loop_closed_ssa_form (exit
, tsi_stmt (tsi
));
4649 protect_loop_closed_ssa_form (exit
, stmts
);
4651 /* Ensure there is label in exit->dest, so that we can
4653 tree_block_label (exit
->dest
);
4654 bsi
= bsi_after_labels (exit
->dest
);
4655 bsi_insert_after (&bsi
, stmts
, BSI_CONTINUE_LINKING
);
4660 for (phi
= phi_nodes (exit
->dest
); phi
; phi
= next
)
4662 next
= TREE_CHAIN (phi
);
4664 if (PHI_ARG_DEF_FROM_EDGE (phi
, exit
) == op
)
4666 def
= PHI_RESULT (phi
);
4667 remove_statement (phi
, false);
4668 stmt
= build2 (MODIFY_EXPR
, TREE_TYPE (op
),
4670 SSA_NAME_DEF_STMT (def
) = stmt
;
4671 bsi_insert_after (&bsi
, stmt
, BSI_CONTINUE_LINKING
);
4676 /* Rewrites the final value of USE (that is only needed outside of the loop)
4677 using candidate CAND. */
4680 rewrite_use_outer (struct ivopts_data
*data
,
4681 struct iv_use
*use
, struct iv_cand
*cand
)
4684 tree value
, op
, stmts
, tgt
;
4687 switch (TREE_CODE (use
->stmt
))
4690 tgt
= PHI_RESULT (use
->stmt
);
4693 tgt
= TREE_OPERAND (use
->stmt
, 0);
4699 exit
= single_dom_exit (data
->current_loop
);
4705 bool ok
= may_replace_final_value (data
->current_loop
, use
, &value
);
4709 value
= get_computation_at (data
->current_loop
,
4710 use
, cand
, last_stmt (exit
->src
));
4712 value
= unshare_expr (value
);
4713 op
= force_gimple_operand (value
, &stmts
, true, SSA_NAME_VAR (tgt
));
4715 /* If we will preserve the iv anyway and we would need to perform
4716 some computation to replace the final value, do nothing. */
4717 if (stmts
&& name_info (data
, tgt
)->preserve_biv
)
4720 for (phi
= phi_nodes (exit
->dest
); phi
; phi
= PHI_CHAIN (phi
))
4722 use_operand_p use_p
= PHI_ARG_DEF_PTR_FROM_EDGE (phi
, exit
);
4724 if (USE_FROM_PTR (use_p
) == tgt
)
4725 SET_USE (use_p
, op
);
4729 compute_phi_arg_on_exit (exit
, stmts
, op
);
4731 /* Enable removal of the statement. We cannot remove it directly,
4732 since we may still need the aliasing information attached to the
4733 ssa name defined by it. */
4734 name_info (data
, tgt
)->iv
->have_use_for
= false;
4738 /* If the variable is going to be preserved anyway, there is nothing to
4740 if (name_info (data
, tgt
)->preserve_biv
)
4743 /* Otherwise we just need to compute the iv. */
4744 rewrite_use_nonlinear_expr (data
, use
, cand
);
4747 /* Rewrites USE using candidate CAND. */
4750 rewrite_use (struct ivopts_data
*data
,
4751 struct iv_use
*use
, struct iv_cand
*cand
)
4755 case USE_NONLINEAR_EXPR
:
4756 rewrite_use_nonlinear_expr (data
, use
, cand
);
4760 rewrite_use_outer (data
, use
, cand
);
4764 rewrite_use_address (data
, use
, cand
);
4768 rewrite_use_compare (data
, use
, cand
);
4774 modify_stmt (use
->stmt
);
4777 /* Rewrite the uses using the selected induction variables. */
4780 rewrite_uses (struct ivopts_data
*data
)
4783 struct iv_cand
*cand
;
4786 for (i
= 0; i
< n_iv_uses (data
); i
++)
4788 use
= iv_use (data
, i
);
4789 cand
= use
->selected
;
4792 rewrite_use (data
, use
, cand
);
4796 /* Removes the ivs that are not used after rewriting. */
4799 remove_unused_ivs (struct ivopts_data
*data
)
4804 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
4806 struct version_info
*info
;
4808 info
= ver_info (data
, j
);
4810 && !zero_p (info
->iv
->step
)
4812 && !info
->iv
->have_use_for
4813 && !info
->preserve_biv
)
4814 remove_statement (SSA_NAME_DEF_STMT (info
->iv
->ssa_name
), true);
4818 /* Frees data allocated by the optimization of a single loop. */
4821 free_loop_data (struct ivopts_data
*data
)
4826 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
4828 struct version_info
*info
;
4830 info
= ver_info (data
, i
);
4834 info
->has_nonlin_use
= false;
4835 info
->preserve_biv
= false;
4838 bitmap_clear (data
->relevant
);
4839 bitmap_clear (data
->important_candidates
);
4841 for (i
= 0; i
< n_iv_uses (data
); i
++)
4843 struct iv_use
*use
= iv_use (data
, i
);
4846 BITMAP_XFREE (use
->related_cands
);
4847 for (j
= 0; j
< use
->n_map_members
; j
++)
4848 if (use
->cost_map
[j
].depends_on
)
4849 BITMAP_XFREE (use
->cost_map
[j
].depends_on
);
4850 free (use
->cost_map
);
4853 VARRAY_POP_ALL (data
->iv_uses
);
4855 for (i
= 0; i
< n_iv_cands (data
); i
++)
4857 struct iv_cand
*cand
= iv_cand (data
, i
);
4863 VARRAY_POP_ALL (data
->iv_candidates
);
4865 if (data
->version_info_size
< num_ssa_names
)
4867 data
->version_info_size
= 2 * num_ssa_names
;
4868 free (data
->version_info
);
4869 data
->version_info
= xcalloc (data
->version_info_size
,
4870 sizeof (struct version_info
));
4873 data
->max_inv_id
= 0;
4875 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (decl_rtl_to_reset
); i
++)
4877 tree obj
= VARRAY_GENERIC_PTR_NOGC (decl_rtl_to_reset
, i
);
4879 SET_DECL_RTL (obj
, NULL_RTX
);
4881 VARRAY_POP_ALL (decl_rtl_to_reset
);
4884 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
4888 tree_ssa_iv_optimize_finalize (struct loops
*loops
, struct ivopts_data
*data
)
4892 for (i
= 1; i
< loops
->num
; i
++)
4893 if (loops
->parray
[i
])
4895 free (loops
->parray
[i
]->aux
);
4896 loops
->parray
[i
]->aux
= NULL
;
4899 free_loop_data (data
);
4900 free (data
->version_info
);
4901 BITMAP_XFREE (data
->relevant
);
4902 BITMAP_XFREE (data
->important_candidates
);
4904 VARRAY_FREE (decl_rtl_to_reset
);
4905 VARRAY_FREE (data
->iv_uses
);
4906 VARRAY_FREE (data
->iv_candidates
);
4909 /* Optimizes the LOOP. Returns true if anything changed. */
4912 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
4914 bool changed
= false;
4915 struct iv_ca
*iv_ca
;
4918 data
->current_loop
= loop
;
4920 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4922 fprintf (dump_file
, "Processing loop %d\n", loop
->num
);
4924 exit
= single_dom_exit (loop
);
4927 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
4928 exit
->src
->index
, exit
->dest
->index
);
4929 print_generic_expr (dump_file
, last_stmt (exit
->src
), TDF_SLIM
);
4930 fprintf (dump_file
, "\n");
4933 fprintf (dump_file
, "\n");
4936 /* For each ssa name determines whether it behaves as an induction variable
4938 if (!find_induction_variables (data
))
4941 /* Finds interesting uses (item 1). */
4942 find_interesting_uses (data
);
4943 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
4946 /* Finds candidates for the induction variables (item 2). */
4947 find_iv_candidates (data
);
4949 /* Calculates the costs (item 3, part 1). */
4950 determine_use_iv_costs (data
);
4951 determine_iv_costs (data
);
4952 determine_set_costs (data
);
4954 /* Find the optimal set of induction variables (item 3, part 2). */
4955 iv_ca
= find_optimal_iv_set (data
);
4960 /* Create the new induction variables (item 4, part 1). */
4961 create_new_ivs (data
, iv_ca
);
4962 iv_ca_free (&iv_ca
);
4964 /* Rewrite the uses (item 4, part 2). */
4965 rewrite_uses (data
);
4967 /* Remove the ivs that are unused after rewriting. */
4968 remove_unused_ivs (data
);
4970 loop_commit_inserts ();
4972 /* We have changed the structure of induction variables; it might happen
4973 that definitions in the scev database refer to some of them that were
4978 free_loop_data (data
);
4983 /* Main entry point. Optimizes induction variables in LOOPS. */
4986 tree_ssa_iv_optimize (struct loops
*loops
)
4989 struct ivopts_data data
;
4991 tree_ssa_iv_optimize_init (loops
, &data
);
4993 /* Optimize the loops starting with the innermost ones. */
4994 loop
= loops
->tree_root
;
4998 #ifdef ENABLE_CHECKING
4999 verify_loop_closed_ssa ();
5003 /* Scan the loops, inner ones first. */
5004 while (loop
!= loops
->tree_root
)
5006 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5007 flow_loop_dump (loop
, dump_file
, NULL
, 1);
5009 tree_ssa_iv_optimize_loop (&data
, loop
);
5021 #ifdef ENABLE_CHECKING
5022 verify_loop_closed_ssa ();
5026 tree_ssa_iv_optimize_finalize (loops
, &data
);