1 /* If-conversion for vectorizer.
2 Copyright (C) 2004-2022 Free Software Foundation, Inc.
3 Contributed by Devang Patel <dpatel@apple.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass implements a tree level if-conversion of loops. Its
22 initial goal is to help the vectorizer to vectorize loops with
25 A short description of if-conversion:
27 o Decide if a loop is if-convertible or not.
28 o Walk all loop basic blocks in breadth first order (BFS order).
29 o Remove conditional statements (at the end of basic block)
30 and propagate condition into destination basic blocks'
32 o Replace modify expression with conditional modify expression
33 using current basic block's condition.
34 o Merge all basic blocks
35 o Replace phi nodes with conditional modify expr
36 o Merge all basic blocks into header
38 Sample transformation:
43 # i_23 = PHI <0(0), i_18(10)>;
46 if (j_15 > 41) goto <L1>; else goto <L17>;
53 # iftmp.2_4 = PHI <0(8), 42(2)>;
57 if (i_18 <= 15) goto <L19>; else goto <L18>;
67 # i_23 = PHI <0(0), i_18(10)>;
72 iftmp.2_4 = j_15 > 41 ? 42 : 0;
75 if (i_18 <= 15) goto <L19>; else goto <L18>;
85 #include "coretypes.h"
91 #include "tree-pass.h"
95 #include "optabs-query.h"
96 #include "gimple-pretty-print.h"
98 #include "fold-const.h"
99 #include "stor-layout.h"
100 #include "gimple-iterator.h"
101 #include "gimple-fold.h"
102 #include "gimplify.h"
103 #include "gimplify-me.h"
104 #include "tree-cfg.h"
105 #include "tree-into-ssa.h"
106 #include "tree-ssa.h"
108 #include "tree-data-ref.h"
109 #include "tree-scalar-evolution.h"
110 #include "tree-ssa-loop.h"
111 #include "tree-ssa-loop-niter.h"
112 #include "tree-ssa-loop-ivopts.h"
113 #include "tree-ssa-address.h"
115 #include "tree-hash-traits.h"
117 #include "builtins.h"
119 #include "internal-fn.h"
120 #include "fold-const.h"
121 #include "tree-ssa-sccvn.h"
122 #include "tree-cfgcleanup.h"
123 #include "tree-ssa-dse.h"
124 #include "tree-vectorizer.h"
127 /* For lang_hooks.types.type_for_mode. */
128 #include "langhooks.h"
130 /* Only handle PHIs with no more arguments unless we are asked to by
132 #define MAX_PHI_ARG_NUM \
133 ((unsigned) param_max_tree_if_conversion_phi_args)
135 /* True if we've converted a statement that was only executed when some
136 condition C was true, and if for correctness we need to predicate the
137 statement to ensure that it is a no-op when C is false. See
138 predicate_statements for the kinds of predication we support. */
139 static bool need_to_predicate
;
141 /* True if we have to rewrite stmts that may invoke undefined behavior
142 when a condition C was false so it doesn't if it is always executed.
143 See predicate_statements for the kinds of predication we support. */
144 static bool need_to_rewrite_undefined
;
146 /* Indicate if there are any complicated PHIs that need to be handled in
147 if-conversion. Complicated PHI has more than two arguments and can't
148 be degenerated to two arguments PHI. See more information in comment
149 before phi_convertible_by_degenerating_args. */
150 static bool any_complicated_phi
;
152 /* True if we have bitfield accesses we can lower. */
153 static bool need_to_lower_bitfields
;
155 /* True if there is any ifcvting to be done. */
156 static bool need_to_ifcvt
;
158 /* Hash for struct innermost_loop_behavior. It depends on the user to
161 struct innermost_loop_behavior_hash
: nofree_ptr_hash
<innermost_loop_behavior
>
163 static inline hashval_t
hash (const value_type
&);
164 static inline bool equal (const value_type
&,
165 const compare_type
&);
169 innermost_loop_behavior_hash::hash (const value_type
&e
)
173 hash
= iterative_hash_expr (e
->base_address
, 0);
174 hash
= iterative_hash_expr (e
->offset
, hash
);
175 hash
= iterative_hash_expr (e
->init
, hash
);
176 return iterative_hash_expr (e
->step
, hash
);
180 innermost_loop_behavior_hash::equal (const value_type
&e1
,
181 const compare_type
&e2
)
183 if ((e1
->base_address
&& !e2
->base_address
)
184 || (!e1
->base_address
&& e2
->base_address
)
185 || (!e1
->offset
&& e2
->offset
)
186 || (e1
->offset
&& !e2
->offset
)
187 || (!e1
->init
&& e2
->init
)
188 || (e1
->init
&& !e2
->init
)
189 || (!e1
->step
&& e2
->step
)
190 || (e1
->step
&& !e2
->step
))
193 if (e1
->base_address
&& e2
->base_address
194 && !operand_equal_p (e1
->base_address
, e2
->base_address
, 0))
196 if (e1
->offset
&& e2
->offset
197 && !operand_equal_p (e1
->offset
, e2
->offset
, 0))
199 if (e1
->init
&& e2
->init
200 && !operand_equal_p (e1
->init
, e2
->init
, 0))
202 if (e1
->step
&& e2
->step
203 && !operand_equal_p (e1
->step
, e2
->step
, 0))
209 /* List of basic blocks in if-conversion-suitable order. */
210 static basic_block
*ifc_bbs
;
212 /* Hash table to store <DR's innermost loop behavior, DR> pairs. */
213 static hash_map
<innermost_loop_behavior_hash
,
214 data_reference_p
> *innermost_DR_map
;
216 /* Hash table to store <base reference, DR> pairs. */
217 static hash_map
<tree_operand_hash
, data_reference_p
> *baseref_DR_map
;
219 /* List of redundant SSA names: the first should be replaced by the second. */
220 static vec
< std::pair
<tree
, tree
> > redundant_ssa_names
;
222 /* Structure used to predicate basic blocks. This is attached to the
223 ->aux field of the BBs in the loop to be if-converted. */
224 struct bb_predicate
{
226 /* The condition under which this basic block is executed. */
229 /* PREDICATE is gimplified, and the sequence of statements is
230 recorded here, in order to avoid the duplication of computations
231 that occur in previous conditions. See PR44483. */
232 gimple_seq predicate_gimplified_stmts
;
235 /* Returns true when the basic block BB has a predicate. */
238 bb_has_predicate (basic_block bb
)
240 return bb
->aux
!= NULL
;
243 /* Returns the gimplified predicate for basic block BB. */
246 bb_predicate (basic_block bb
)
248 return ((struct bb_predicate
*) bb
->aux
)->predicate
;
251 /* Sets the gimplified predicate COND for basic block BB. */
254 set_bb_predicate (basic_block bb
, tree cond
)
256 gcc_assert ((TREE_CODE (cond
) == TRUTH_NOT_EXPR
257 && is_gimple_val (TREE_OPERAND (cond
, 0)))
258 || is_gimple_val (cond
));
259 ((struct bb_predicate
*) bb
->aux
)->predicate
= cond
;
262 /* Returns the sequence of statements of the gimplification of the
263 predicate for basic block BB. */
265 static inline gimple_seq
266 bb_predicate_gimplified_stmts (basic_block bb
)
268 return ((struct bb_predicate
*) bb
->aux
)->predicate_gimplified_stmts
;
271 /* Sets the sequence of statements STMTS of the gimplification of the
272 predicate for basic block BB. */
275 set_bb_predicate_gimplified_stmts (basic_block bb
, gimple_seq stmts
)
277 ((struct bb_predicate
*) bb
->aux
)->predicate_gimplified_stmts
= stmts
;
280 /* Adds the sequence of statements STMTS to the sequence of statements
281 of the predicate for basic block BB. */
284 add_bb_predicate_gimplified_stmts (basic_block bb
, gimple_seq stmts
)
286 /* We might have updated some stmts in STMTS via force_gimple_operand
287 calling fold_stmt and that producing multiple stmts. Delink immediate
288 uses so update_ssa after loop versioning doesn't get confused for
289 the not yet inserted predicates.
290 ??? This should go away once we reliably avoid updating stmts
292 for (gimple_stmt_iterator gsi
= gsi_start (stmts
);
293 !gsi_end_p (gsi
); gsi_next (&gsi
))
295 gimple
*stmt
= gsi_stmt (gsi
);
296 delink_stmt_imm_use (stmt
);
297 gimple_set_modified (stmt
, true);
299 gimple_seq_add_seq_without_update
300 (&(((struct bb_predicate
*) bb
->aux
)->predicate_gimplified_stmts
), stmts
);
303 /* Initializes to TRUE the predicate of basic block BB. */
306 init_bb_predicate (basic_block bb
)
308 bb
->aux
= XNEW (struct bb_predicate
);
309 set_bb_predicate_gimplified_stmts (bb
, NULL
);
310 set_bb_predicate (bb
, boolean_true_node
);
313 /* Release the SSA_NAMEs associated with the predicate of basic block BB. */
316 release_bb_predicate (basic_block bb
)
318 gimple_seq stmts
= bb_predicate_gimplified_stmts (bb
);
321 /* Ensure that these stmts haven't yet been added to a bb. */
323 for (gimple_stmt_iterator i
= gsi_start (stmts
);
324 !gsi_end_p (i
); gsi_next (&i
))
325 gcc_assert (! gimple_bb (gsi_stmt (i
)));
328 gimple_seq_discard (stmts
);
329 set_bb_predicate_gimplified_stmts (bb
, NULL
);
333 /* Free the predicate of basic block BB. */
336 free_bb_predicate (basic_block bb
)
338 if (!bb_has_predicate (bb
))
341 release_bb_predicate (bb
);
346 /* Reinitialize predicate of BB with the true predicate. */
349 reset_bb_predicate (basic_block bb
)
351 if (!bb_has_predicate (bb
))
352 init_bb_predicate (bb
);
355 release_bb_predicate (bb
);
356 set_bb_predicate (bb
, boolean_true_node
);
360 /* Returns a new SSA_NAME of type TYPE that is assigned the value of
361 the expression EXPR. Inserts the statement created for this
362 computation before GSI and leaves the iterator GSI at the same
366 ifc_temp_var (tree type
, tree expr
, gimple_stmt_iterator
*gsi
)
368 tree new_name
= make_temp_ssa_name (type
, NULL
, "_ifc_");
369 gimple
*stmt
= gimple_build_assign (new_name
, expr
);
370 gimple_set_vuse (stmt
, gimple_vuse (gsi_stmt (*gsi
)));
371 gsi_insert_before (gsi
, stmt
, GSI_SAME_STMT
);
375 /* Return true when COND is a false predicate. */
378 is_false_predicate (tree cond
)
380 return (cond
!= NULL_TREE
381 && (cond
== boolean_false_node
382 || integer_zerop (cond
)));
385 /* Return true when COND is a true predicate. */
388 is_true_predicate (tree cond
)
390 return (cond
== NULL_TREE
391 || cond
== boolean_true_node
392 || integer_onep (cond
));
395 /* Returns true when BB has a predicate that is not trivial: true or
399 is_predicated (basic_block bb
)
401 return !is_true_predicate (bb_predicate (bb
));
404 /* Parses the predicate COND and returns its comparison code and
405 operands OP0 and OP1. */
407 static enum tree_code
408 parse_predicate (tree cond
, tree
*op0
, tree
*op1
)
412 if (TREE_CODE (cond
) == SSA_NAME
413 && is_gimple_assign (s
= SSA_NAME_DEF_STMT (cond
)))
415 if (TREE_CODE_CLASS (gimple_assign_rhs_code (s
)) == tcc_comparison
)
417 *op0
= gimple_assign_rhs1 (s
);
418 *op1
= gimple_assign_rhs2 (s
);
419 return gimple_assign_rhs_code (s
);
422 else if (gimple_assign_rhs_code (s
) == TRUTH_NOT_EXPR
)
424 tree op
= gimple_assign_rhs1 (s
);
425 tree type
= TREE_TYPE (op
);
426 enum tree_code code
= parse_predicate (op
, op0
, op1
);
428 return code
== ERROR_MARK
? ERROR_MARK
429 : invert_tree_comparison (code
, HONOR_NANS (type
));
435 if (COMPARISON_CLASS_P (cond
))
437 *op0
= TREE_OPERAND (cond
, 0);
438 *op1
= TREE_OPERAND (cond
, 1);
439 return TREE_CODE (cond
);
445 /* Returns the fold of predicate C1 OR C2 at location LOC. */
448 fold_or_predicates (location_t loc
, tree c1
, tree c2
)
450 tree op1a
, op1b
, op2a
, op2b
;
451 enum tree_code code1
= parse_predicate (c1
, &op1a
, &op1b
);
452 enum tree_code code2
= parse_predicate (c2
, &op2a
, &op2b
);
454 if (code1
!= ERROR_MARK
&& code2
!= ERROR_MARK
)
456 tree t
= maybe_fold_or_comparisons (boolean_type_node
, code1
, op1a
, op1b
,
462 return fold_build2_loc (loc
, TRUTH_OR_EXPR
, boolean_type_node
, c1
, c2
);
465 /* Returns either a COND_EXPR or the folded expression if the folded
466 expression is a MIN_EXPR, a MAX_EXPR, an ABS_EXPR,
467 a constant or a SSA_NAME. */
470 fold_build_cond_expr (tree type
, tree cond
, tree rhs
, tree lhs
)
472 /* If COND is comparison r != 0 and r has boolean type, convert COND
473 to SSA_NAME to accept by vect bool pattern. */
474 if (TREE_CODE (cond
) == NE_EXPR
)
476 tree op0
= TREE_OPERAND (cond
, 0);
477 tree op1
= TREE_OPERAND (cond
, 1);
478 if (TREE_CODE (op0
) == SSA_NAME
479 && TREE_CODE (TREE_TYPE (op0
)) == BOOLEAN_TYPE
480 && (integer_zerop (op1
)))
484 gimple_match_op
cexpr (gimple_match_cond::UNCOND
, COND_EXPR
,
485 type
, cond
, rhs
, lhs
);
486 if (cexpr
.resimplify (NULL
, follow_all_ssa_edges
))
488 if (gimple_simplified_result_is_gimple_val (&cexpr
))
490 else if (cexpr
.code
== ABS_EXPR
)
491 return build1 (ABS_EXPR
, type
, cexpr
.ops
[0]);
492 else if (cexpr
.code
== MIN_EXPR
493 || cexpr
.code
== MAX_EXPR
)
494 return build2 ((tree_code
)cexpr
.code
, type
, cexpr
.ops
[0], cexpr
.ops
[1]);
497 return build3 (COND_EXPR
, type
, cond
, rhs
, lhs
);
500 /* Add condition NC to the predicate list of basic block BB. LOOP is
501 the loop to be if-converted. Use predicate of cd-equivalent block
502 for join bb if it exists: we call basic blocks bb1 and bb2
503 cd-equivalent if they are executed under the same condition. */
506 add_to_predicate_list (class loop
*loop
, basic_block bb
, tree nc
)
511 if (is_true_predicate (nc
))
514 /* If dominance tells us this basic block is always executed,
515 don't record any predicates for it. */
516 if (dominated_by_p (CDI_DOMINATORS
, loop
->latch
, bb
))
519 dom_bb
= get_immediate_dominator (CDI_DOMINATORS
, bb
);
520 /* We use notion of cd equivalence to get simpler predicate for
521 join block, e.g. if join block has 2 predecessors with predicates
522 p1 & p2 and p1 & !p2, we'd like to get p1 for it instead of
523 p1 & p2 | p1 & !p2. */
524 if (dom_bb
!= loop
->header
525 && get_immediate_dominator (CDI_POST_DOMINATORS
, dom_bb
) == bb
)
527 gcc_assert (flow_bb_inside_loop_p (loop
, dom_bb
));
528 bc
= bb_predicate (dom_bb
);
529 if (!is_true_predicate (bc
))
530 set_bb_predicate (bb
, bc
);
532 gcc_assert (is_true_predicate (bb_predicate (bb
)));
533 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
534 fprintf (dump_file
, "Use predicate of bb#%d for bb#%d\n",
535 dom_bb
->index
, bb
->index
);
539 if (!is_predicated (bb
))
543 bc
= bb_predicate (bb
);
544 bc
= fold_or_predicates (EXPR_LOCATION (bc
), nc
, bc
);
545 if (is_true_predicate (bc
))
547 reset_bb_predicate (bb
);
552 /* Allow a TRUTH_NOT_EXPR around the main predicate. */
553 if (TREE_CODE (bc
) == TRUTH_NOT_EXPR
)
554 tp
= &TREE_OPERAND (bc
, 0);
557 if (!is_gimple_val (*tp
))
560 *tp
= force_gimple_operand (*tp
, &stmts
, true, NULL_TREE
);
561 add_bb_predicate_gimplified_stmts (bb
, stmts
);
563 set_bb_predicate (bb
, bc
);
566 /* Add the condition COND to the previous condition PREV_COND, and add
567 this to the predicate list of the destination of edge E. LOOP is
568 the loop to be if-converted. */
571 add_to_dst_predicate_list (class loop
*loop
, edge e
,
572 tree prev_cond
, tree cond
)
574 if (!flow_bb_inside_loop_p (loop
, e
->dest
))
577 if (!is_true_predicate (prev_cond
))
578 cond
= fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
,
581 if (!dominated_by_p (CDI_DOMINATORS
, loop
->latch
, e
->dest
))
582 add_to_predicate_list (loop
, e
->dest
, cond
);
585 /* Return true if one of the successor edges of BB exits LOOP. */
588 bb_with_exit_edge_p (class loop
*loop
, basic_block bb
)
593 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
594 if (loop_exit_edge_p (loop
, e
))
600 /* Given PHI which has more than two arguments, this function checks if
601 it's if-convertible by degenerating its arguments. Specifically, if
602 below two conditions are satisfied:
604 1) Number of PHI arguments with different values equals to 2 and one
605 argument has the only occurrence.
606 2) The edge corresponding to the unique argument isn't critical edge.
608 Such PHI can be handled as PHIs have only two arguments. For example,
611 res = PHI <A_1(e1), A_1(e2), A_2(e3)>;
613 can be transformed into:
615 res = (predicate of e3) ? A_2 : A_1;
617 Return TRUE if it is the case, FALSE otherwise. */
620 phi_convertible_by_degenerating_args (gphi
*phi
)
623 tree arg
, t1
= NULL
, t2
= NULL
;
624 unsigned int i
, i1
= 0, i2
= 0, n1
= 0, n2
= 0;
625 unsigned int num_args
= gimple_phi_num_args (phi
);
627 gcc_assert (num_args
> 2);
629 for (i
= 0; i
< num_args
; i
++)
631 arg
= gimple_phi_arg_def (phi
, i
);
632 if (t1
== NULL
|| operand_equal_p (t1
, arg
, 0))
638 else if (t2
== NULL
|| operand_equal_p (t2
, arg
, 0))
648 if (n1
!= 1 && n2
!= 1)
651 /* Check if the edge corresponding to the unique arg is critical. */
652 e
= gimple_phi_arg_edge (phi
, (n1
== 1) ? i1
: i2
);
653 if (EDGE_COUNT (e
->src
->succs
) > 1)
659 /* Return true when PHI is if-convertible. PHI is part of loop LOOP
660 and it belongs to basic block BB. Note at this point, it is sure
661 that PHI is if-convertible. This function updates global variable
662 ANY_COMPLICATED_PHI if PHI is complicated. */
665 if_convertible_phi_p (class loop
*loop
, basic_block bb
, gphi
*phi
)
667 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
669 fprintf (dump_file
, "-------------------------\n");
670 print_gimple_stmt (dump_file
, phi
, 0, TDF_SLIM
);
673 if (bb
!= loop
->header
674 && gimple_phi_num_args (phi
) > 2
675 && !phi_convertible_by_degenerating_args (phi
))
676 any_complicated_phi
= true;
681 /* Records the status of a data reference. This struct is attached to
682 each DR->aux field. */
685 bool rw_unconditionally
;
686 bool w_unconditionally
;
687 bool written_at_least_once
;
691 tree base_w_predicate
;
694 #define IFC_DR(DR) ((struct ifc_dr *) (DR)->aux)
695 #define DR_BASE_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->written_at_least_once)
696 #define DR_RW_UNCONDITIONALLY(DR) (IFC_DR (DR)->rw_unconditionally)
697 #define DR_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->w_unconditionally)
699 /* Iterates over DR's and stores refs, DR and base refs, DR pairs in
700 HASH tables. While storing them in HASH table, it checks if the
701 reference is unconditionally read or written and stores that as a flag
702 information. For base reference it checks if it is written atlest once
703 unconditionally and stores it as flag information along with DR.
704 In other words for every data reference A in STMT there exist other
705 accesses to a data reference with the same base with predicates that
706 add up (OR-up) to the true predicate: this ensures that the data
707 reference A is touched (read or written) on every iteration of the
708 if-converted loop. */
710 hash_memrefs_baserefs_and_store_DRs_read_written_info (data_reference_p a
)
713 data_reference_p
*master_dr
, *base_master_dr
;
714 tree base_ref
= DR_BASE_OBJECT (a
);
715 innermost_loop_behavior
*innermost
= &DR_INNERMOST (a
);
716 tree ca
= bb_predicate (gimple_bb (DR_STMT (a
)));
719 master_dr
= &innermost_DR_map
->get_or_insert (innermost
, &exist1
);
725 IFC_DR (*master_dr
)->w_predicate
726 = fold_or_predicates (UNKNOWN_LOCATION
, ca
,
727 IFC_DR (*master_dr
)->w_predicate
);
728 if (is_true_predicate (IFC_DR (*master_dr
)->w_predicate
))
729 DR_W_UNCONDITIONALLY (*master_dr
) = true;
731 IFC_DR (*master_dr
)->rw_predicate
732 = fold_or_predicates (UNKNOWN_LOCATION
, ca
,
733 IFC_DR (*master_dr
)->rw_predicate
);
734 if (is_true_predicate (IFC_DR (*master_dr
)->rw_predicate
))
735 DR_RW_UNCONDITIONALLY (*master_dr
) = true;
739 base_master_dr
= &baseref_DR_map
->get_or_insert (base_ref
, &exist2
);
742 IFC_DR (*base_master_dr
)->base_w_predicate
743 = fold_or_predicates (UNKNOWN_LOCATION
, ca
,
744 IFC_DR (*base_master_dr
)->base_w_predicate
);
745 if (is_true_predicate (IFC_DR (*base_master_dr
)->base_w_predicate
))
746 DR_BASE_W_UNCONDITIONALLY (*base_master_dr
) = true;
750 /* Return TRUE if can prove the index IDX of an array reference REF is
751 within array bound. Return false otherwise. */
754 idx_within_array_bound (tree ref
, tree
*idx
, void *dta
)
756 wi::overflow_type overflow
;
757 widest_int niter
, valid_niter
, delta
, wi_step
;
760 class loop
*loop
= (class loop
*) dta
;
762 /* Only support within-bound access for array references. */
763 if (TREE_CODE (ref
) != ARRAY_REF
)
766 /* For arrays that might have flexible sizes, it is not guaranteed that they
767 do not extend over their declared size. */
768 if (array_ref_flexible_size_p (ref
))
771 ev
= analyze_scalar_evolution (loop
, *idx
);
772 ev
= instantiate_parameters (loop
, ev
);
773 init
= initial_condition (ev
);
774 step
= evolution_part_in_loop_num (ev
, loop
->num
);
776 if (!init
|| TREE_CODE (init
) != INTEGER_CST
777 || (step
&& TREE_CODE (step
) != INTEGER_CST
))
780 low
= array_ref_low_bound (ref
);
781 high
= array_ref_up_bound (ref
);
783 /* The case of nonconstant bounds could be handled, but it would be
785 if (TREE_CODE (low
) != INTEGER_CST
786 || !high
|| TREE_CODE (high
) != INTEGER_CST
)
789 /* Check if the intial idx is within bound. */
790 if (wi::to_widest (init
) < wi::to_widest (low
)
791 || wi::to_widest (init
) > wi::to_widest (high
))
794 /* The idx is always within bound. */
795 if (!step
|| integer_zerop (step
))
798 if (!max_loop_iterations (loop
, &niter
))
801 if (wi::to_widest (step
) < 0)
803 delta
= wi::to_widest (init
) - wi::to_widest (low
);
804 wi_step
= -wi::to_widest (step
);
808 delta
= wi::to_widest (high
) - wi::to_widest (init
);
809 wi_step
= wi::to_widest (step
);
812 valid_niter
= wi::div_floor (delta
, wi_step
, SIGNED
, &overflow
);
813 /* The iteration space of idx is within array bound. */
814 if (!overflow
&& niter
<= valid_niter
)
820 /* Return TRUE if ref is a within bound array reference. */
823 ref_within_array_bound (gimple
*stmt
, tree ref
)
825 class loop
*loop
= loop_containing_stmt (stmt
);
827 gcc_assert (loop
!= NULL
);
828 return for_each_index (&ref
, idx_within_array_bound
, loop
);
832 /* Given a memory reference expression T, return TRUE if base object
833 it refers to is writable. The base object of a memory reference
834 is the main object being referenced, which is returned by function
838 base_object_writable (tree ref
)
840 tree base_tree
= get_base_address (ref
);
843 && DECL_P (base_tree
)
844 && decl_binds_to_current_def_p (base_tree
)
845 && !TREE_READONLY (base_tree
));
848 /* Return true when the memory references of STMT won't trap in the
849 if-converted code. There are two things that we have to check for:
851 - writes to memory occur to writable memory: if-conversion of
852 memory writes transforms the conditional memory writes into
853 unconditional writes, i.e. "if (cond) A[i] = foo" is transformed
854 into "A[i] = cond ? foo : A[i]", and as the write to memory may not
855 be executed at all in the original code, it may be a readonly
856 memory. To check that A is not const-qualified, we check that
857 there exists at least an unconditional write to A in the current
860 - reads or writes to memory are valid memory accesses for every
861 iteration. To check that the memory accesses are correctly formed
862 and that we are allowed to read and write in these locations, we
863 check that the memory accesses to be if-converted occur at every
864 iteration unconditionally.
866 Returns true for the memory reference in STMT, same memory reference
867 is read or written unconditionally atleast once and the base memory
868 reference is written unconditionally once. This is to check reference
869 will not write fault. Also retuns true if the memory reference is
870 unconditionally read once then we are conditionally writing to memory
871 which is defined as read and write and is bound to the definition
874 ifcvt_memrefs_wont_trap (gimple
*stmt
, vec
<data_reference_p
> drs
)
876 /* If DR didn't see a reference here we can't use it to tell
877 whether the ref traps or not. */
878 if (gimple_uid (stmt
) == 0)
881 data_reference_p
*master_dr
, *base_master_dr
;
882 data_reference_p a
= drs
[gimple_uid (stmt
) - 1];
884 tree base
= DR_BASE_OBJECT (a
);
885 innermost_loop_behavior
*innermost
= &DR_INNERMOST (a
);
887 gcc_assert (DR_STMT (a
) == stmt
);
888 gcc_assert (DR_BASE_ADDRESS (a
) || DR_OFFSET (a
)
889 || DR_INIT (a
) || DR_STEP (a
));
891 master_dr
= innermost_DR_map
->get (innermost
);
892 gcc_assert (master_dr
!= NULL
);
894 base_master_dr
= baseref_DR_map
->get (base
);
896 /* If a is unconditionally written to it doesn't trap. */
897 if (DR_W_UNCONDITIONALLY (*master_dr
))
900 /* If a is unconditionally accessed then ...
902 Even a is conditional access, we can treat it as an unconditional
903 one if it's an array reference and all its index are within array
905 if (DR_RW_UNCONDITIONALLY (*master_dr
)
906 || ref_within_array_bound (stmt
, DR_REF (a
)))
908 /* an unconditional read won't trap. */
912 /* an unconditionaly write won't trap if the base is written
913 to unconditionally. */
915 && DR_BASE_W_UNCONDITIONALLY (*base_master_dr
))
916 return flag_store_data_races
;
917 /* or the base is known to be not readonly. */
918 else if (base_object_writable (DR_REF (a
)))
919 return flag_store_data_races
;
925 /* Return true if STMT could be converted into a masked load or store
926 (conditional load or store based on a mask computed from bb predicate). */
929 ifcvt_can_use_mask_load_store (gimple
*stmt
)
931 /* Check whether this is a load or store. */
932 tree lhs
= gimple_assign_lhs (stmt
);
935 if (gimple_store_p (stmt
))
937 if (!is_gimple_val (gimple_assign_rhs1 (stmt
)))
942 else if (gimple_assign_load_p (stmt
))
945 ref
= gimple_assign_rhs1 (stmt
);
950 if (may_be_nonaddressable_p (ref
))
953 /* Mask should be integer mode of the same size as the load/store
955 machine_mode mode
= TYPE_MODE (TREE_TYPE (lhs
));
956 if (!int_mode_for_mode (mode
).exists () || VECTOR_MODE_P (mode
))
959 if (can_vec_mask_load_store_p (mode
, VOIDmode
, is_load
))
965 /* Return true if STMT could be converted from an operation that is
966 unconditional to one that is conditional on a bb predicate mask. */
969 ifcvt_can_predicate (gimple
*stmt
)
971 basic_block bb
= gimple_bb (stmt
);
973 if (!(flag_tree_loop_vectorize
|| bb
->loop_father
->force_vectorize
)
974 || bb
->loop_father
->dont_vectorize
975 || gimple_has_volatile_ops (stmt
))
978 if (gimple_assign_single_p (stmt
))
979 return ifcvt_can_use_mask_load_store (stmt
);
981 tree_code code
= gimple_assign_rhs_code (stmt
);
982 tree lhs_type
= TREE_TYPE (gimple_assign_lhs (stmt
));
983 tree rhs_type
= TREE_TYPE (gimple_assign_rhs1 (stmt
));
984 if (!types_compatible_p (lhs_type
, rhs_type
))
986 internal_fn cond_fn
= get_conditional_internal_fn (code
);
987 return (cond_fn
!= IFN_LAST
988 && vectorized_internal_fn_supported_p (cond_fn
, lhs_type
));
991 /* Return true when STMT is if-convertible.
993 GIMPLE_ASSIGN statement is not if-convertible if,
996 - LHS is not var decl. */
999 if_convertible_gimple_assign_stmt_p (gimple
*stmt
,
1000 vec
<data_reference_p
> refs
)
1002 tree lhs
= gimple_assign_lhs (stmt
);
1004 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1006 fprintf (dump_file
, "-------------------------\n");
1007 print_gimple_stmt (dump_file
, stmt
, 0, TDF_SLIM
);
1010 if (!is_gimple_reg_type (TREE_TYPE (lhs
)))
1013 /* Some of these constrains might be too conservative. */
1014 if (stmt_ends_bb_p (stmt
)
1015 || gimple_has_volatile_ops (stmt
)
1016 || (TREE_CODE (lhs
) == SSA_NAME
1017 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs
))
1018 || gimple_has_side_effects (stmt
))
1020 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1021 fprintf (dump_file
, "stmt not suitable for ifcvt\n");
1025 /* tree-into-ssa.cc uses GF_PLF_1, so avoid it, because
1026 in between if_convertible_loop_p and combine_blocks
1027 we can perform loop versioning. */
1028 gimple_set_plf (stmt
, GF_PLF_2
, false);
1030 if ((! gimple_vuse (stmt
)
1031 || gimple_could_trap_p_1 (stmt
, false, false)
1032 || ! ifcvt_memrefs_wont_trap (stmt
, refs
))
1033 && gimple_could_trap_p (stmt
))
1035 if (ifcvt_can_predicate (stmt
))
1037 gimple_set_plf (stmt
, GF_PLF_2
, true);
1038 need_to_predicate
= true;
1041 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1042 fprintf (dump_file
, "tree could trap...\n");
1045 else if ((INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
1046 || POINTER_TYPE_P (TREE_TYPE (lhs
)))
1047 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs
))
1048 && arith_code_with_undefined_signed_overflow
1049 (gimple_assign_rhs_code (stmt
)))
1050 /* We have to rewrite stmts with undefined overflow. */
1051 need_to_rewrite_undefined
= true;
1053 /* When if-converting stores force versioning, likewise if we
1054 ended up generating store data races. */
1055 if (gimple_vdef (stmt
))
1056 need_to_predicate
= true;
1061 /* Return true when STMT is if-convertible.
1063 A statement is if-convertible if:
1064 - it is an if-convertible GIMPLE_ASSIGN,
1065 - it is a GIMPLE_LABEL or a GIMPLE_COND,
1066 - it is builtins call. */
1069 if_convertible_stmt_p (gimple
*stmt
, vec
<data_reference_p
> refs
)
1071 switch (gimple_code (stmt
))
1079 return if_convertible_gimple_assign_stmt_p (stmt
, refs
);
1083 tree fndecl
= gimple_call_fndecl (stmt
);
1086 int flags
= gimple_call_flags (stmt
);
1087 if ((flags
& ECF_CONST
)
1088 && !(flags
& ECF_LOOPING_CONST_OR_PURE
)
1089 /* We can only vectorize some builtins at the moment,
1090 so restrict if-conversion to those. */
1091 && fndecl_built_in_p (fndecl
))
1098 /* Don't know what to do with 'em so don't do anything. */
1099 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1101 fprintf (dump_file
, "don't know what to do\n");
1102 print_gimple_stmt (dump_file
, stmt
, 0, TDF_SLIM
);
1108 /* Assumes that BB has more than 1 predecessors.
1109 Returns false if at least one successor is not on critical edge
1110 and true otherwise. */
1113 all_preds_critical_p (basic_block bb
)
1118 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1119 if (EDGE_COUNT (e
->src
->succs
) == 1)
1124 /* Return true when BB is if-convertible. This routine does not check
1125 basic block's statements and phis.
1127 A basic block is not if-convertible if:
1128 - it is non-empty and it is after the exit block (in BFS order),
1129 - it is after the exit block but before the latch,
1130 - its edges are not normal.
1132 EXIT_BB is the basic block containing the exit of the LOOP. BB is
1136 if_convertible_bb_p (class loop
*loop
, basic_block bb
, basic_block exit_bb
)
1141 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1142 fprintf (dump_file
, "----------[%d]-------------\n", bb
->index
);
1144 if (EDGE_COUNT (bb
->succs
) > 2)
1147 gimple
*last
= last_stmt (bb
);
1148 if (gcall
*call
= safe_dyn_cast
<gcall
*> (last
))
1149 if (gimple_call_ctrl_altering_p (call
))
1154 if (bb
!= loop
->latch
)
1156 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1157 fprintf (dump_file
, "basic block after exit bb but before latch\n");
1160 else if (!empty_block_p (bb
))
1162 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1163 fprintf (dump_file
, "non empty basic block after exit bb\n");
1166 else if (bb
== loop
->latch
1168 && !dominated_by_p (CDI_DOMINATORS
, bb
, exit_bb
))
1170 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1171 fprintf (dump_file
, "latch is not dominated by exit_block\n");
1176 /* Be less adventurous and handle only normal edges. */
1177 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1178 if (e
->flags
& (EDGE_EH
| EDGE_ABNORMAL
| EDGE_IRREDUCIBLE_LOOP
))
1180 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1181 fprintf (dump_file
, "Difficult to handle edges\n");
1188 /* Return true when all predecessor blocks of BB are visited. The
1189 VISITED bitmap keeps track of the visited blocks. */
1192 pred_blocks_visited_p (basic_block bb
, bitmap
*visited
)
1196 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1197 if (!bitmap_bit_p (*visited
, e
->src
->index
))
1203 /* Get body of a LOOP in suitable order for if-conversion. It is
1204 caller's responsibility to deallocate basic block list.
1205 If-conversion suitable order is, breadth first sort (BFS) order
1206 with an additional constraint: select a block only if all its
1207 predecessors are already selected. */
1209 static basic_block
*
1210 get_loop_body_in_if_conv_order (const class loop
*loop
)
1212 basic_block
*blocks
, *blocks_in_bfs_order
;
1215 unsigned int index
= 0;
1216 unsigned int visited_count
= 0;
1218 gcc_assert (loop
->num_nodes
);
1219 gcc_assert (loop
->latch
!= EXIT_BLOCK_PTR_FOR_FN (cfun
));
1221 blocks
= XCNEWVEC (basic_block
, loop
->num_nodes
);
1222 visited
= BITMAP_ALLOC (NULL
);
1224 blocks_in_bfs_order
= get_loop_body_in_bfs_order (loop
);
1227 while (index
< loop
->num_nodes
)
1229 bb
= blocks_in_bfs_order
[index
];
1231 if (bb
->flags
& BB_IRREDUCIBLE_LOOP
)
1233 free (blocks_in_bfs_order
);
1234 BITMAP_FREE (visited
);
1239 if (!bitmap_bit_p (visited
, bb
->index
))
1241 if (pred_blocks_visited_p (bb
, &visited
)
1242 || bb
== loop
->header
)
1244 /* This block is now visited. */
1245 bitmap_set_bit (visited
, bb
->index
);
1246 blocks
[visited_count
++] = bb
;
1252 if (index
== loop
->num_nodes
1253 && visited_count
!= loop
->num_nodes
)
1257 free (blocks_in_bfs_order
);
1258 BITMAP_FREE (visited
);
1262 /* Returns true when the analysis of the predicates for all the basic
1263 blocks in LOOP succeeded.
1265 predicate_bbs first allocates the predicates of the basic blocks.
1266 These fields are then initialized with the tree expressions
1267 representing the predicates under which a basic block is executed
1268 in the LOOP. As the loop->header is executed at each iteration, it
1269 has the "true" predicate. Other statements executed under a
1270 condition are predicated with that condition, for example
1277 S1 will be predicated with "x", and
1278 S2 will be predicated with "!x". */
1281 predicate_bbs (loop_p loop
)
1285 for (i
= 0; i
< loop
->num_nodes
; i
++)
1286 init_bb_predicate (ifc_bbs
[i
]);
1288 for (i
= 0; i
< loop
->num_nodes
; i
++)
1290 basic_block bb
= ifc_bbs
[i
];
1294 /* The loop latch and loop exit block are always executed and
1295 have no extra conditions to be processed: skip them. */
1296 if (bb
== loop
->latch
1297 || bb_with_exit_edge_p (loop
, bb
))
1299 reset_bb_predicate (bb
);
1303 cond
= bb_predicate (bb
);
1304 stmt
= last_stmt (bb
);
1305 if (stmt
&& gimple_code (stmt
) == GIMPLE_COND
)
1308 edge true_edge
, false_edge
;
1309 location_t loc
= gimple_location (stmt
);
1311 /* gcc.dg/fold-bopcond-1.c shows that despite all forwprop passes
1312 conditions can remain unfolded because of multiple uses so
1313 try to re-fold here, especially to get precision changing
1314 conversions sorted out. Do not simply fold the stmt since
1315 this is analysis only. When conditions were embedded in
1316 COND_EXPRs those were folded separately before folding the
1317 COND_EXPR but as they are now outside we have to make sure
1318 to fold them. Do it here - another opportunity would be to
1319 fold predicates as they are inserted. */
1320 gimple_match_op
cexpr (gimple_match_cond::UNCOND
,
1321 gimple_cond_code (stmt
),
1323 gimple_cond_lhs (stmt
),
1324 gimple_cond_rhs (stmt
));
1325 if (cexpr
.resimplify (NULL
, follow_all_ssa_edges
)
1326 && cexpr
.code
.is_tree_code ()
1327 && TREE_CODE_CLASS ((tree_code
)cexpr
.code
) == tcc_comparison
)
1328 c
= build2_loc (loc
, (tree_code
)cexpr
.code
, boolean_type_node
,
1329 cexpr
.ops
[0], cexpr
.ops
[1]);
1331 c
= build2_loc (loc
, gimple_cond_code (stmt
),
1333 gimple_cond_lhs (stmt
),
1334 gimple_cond_rhs (stmt
));
1336 /* Add new condition into destination's predicate list. */
1337 extract_true_false_edges_from_block (gimple_bb (stmt
),
1338 &true_edge
, &false_edge
);
1340 /* If C is true, then TRUE_EDGE is taken. */
1341 add_to_dst_predicate_list (loop
, true_edge
, unshare_expr (cond
),
1344 /* If C is false, then FALSE_EDGE is taken. */
1345 c2
= build1_loc (loc
, TRUTH_NOT_EXPR
, boolean_type_node
,
1347 add_to_dst_predicate_list (loop
, false_edge
,
1348 unshare_expr (cond
), c2
);
1353 /* If current bb has only one successor, then consider it as an
1354 unconditional goto. */
1355 if (single_succ_p (bb
))
1357 basic_block bb_n
= single_succ (bb
);
1359 /* The successor bb inherits the predicate of its
1360 predecessor. If there is no predicate in the predecessor
1361 bb, then consider the successor bb as always executed. */
1362 if (cond
== NULL_TREE
)
1363 cond
= boolean_true_node
;
1365 add_to_predicate_list (loop
, bb_n
, cond
);
1369 /* The loop header is always executed. */
1370 reset_bb_predicate (loop
->header
);
1371 gcc_assert (bb_predicate_gimplified_stmts (loop
->header
) == NULL
1372 && bb_predicate_gimplified_stmts (loop
->latch
) == NULL
);
1375 /* Build region by adding loop pre-header and post-header blocks. */
1377 static vec
<basic_block
>
1378 build_region (class loop
*loop
)
1380 vec
<basic_block
> region
= vNULL
;
1381 basic_block exit_bb
= NULL
;
1383 gcc_assert (ifc_bbs
);
1384 /* The first element is loop pre-header. */
1385 region
.safe_push (loop_preheader_edge (loop
)->src
);
1387 for (unsigned int i
= 0; i
< loop
->num_nodes
; i
++)
1389 basic_block bb
= ifc_bbs
[i
];
1390 region
.safe_push (bb
);
1391 /* Find loop postheader. */
1394 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1395 if (loop_exit_edge_p (loop
, e
))
1401 /* The last element is loop post-header. */
1402 gcc_assert (exit_bb
);
1403 region
.safe_push (exit_bb
);
1407 /* Return true when LOOP is if-convertible. This is a helper function
1408 for if_convertible_loop_p. REFS and DDRS are initialized and freed
1409 in if_convertible_loop_p. */
1412 if_convertible_loop_p_1 (class loop
*loop
, vec
<data_reference_p
> *refs
)
1415 basic_block exit_bb
= NULL
;
1416 vec
<basic_block
> region
;
1418 calculate_dominance_info (CDI_DOMINATORS
);
1420 for (i
= 0; i
< loop
->num_nodes
; i
++)
1422 basic_block bb
= ifc_bbs
[i
];
1424 if (!if_convertible_bb_p (loop
, bb
, exit_bb
))
1427 if (bb_with_exit_edge_p (loop
, bb
))
1431 for (i
= 0; i
< loop
->num_nodes
; i
++)
1433 basic_block bb
= ifc_bbs
[i
];
1434 gimple_stmt_iterator gsi
;
1436 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1437 switch (gimple_code (gsi_stmt (gsi
)))
1444 gimple_set_uid (gsi_stmt (gsi
), 0);
1451 data_reference_p dr
;
1454 = new hash_map
<innermost_loop_behavior_hash
, data_reference_p
>;
1455 baseref_DR_map
= new hash_map
<tree_operand_hash
, data_reference_p
>;
1457 /* Compute post-dominator tree locally. */
1458 region
= build_region (loop
);
1459 calculate_dominance_info_for_region (CDI_POST_DOMINATORS
, region
);
1461 predicate_bbs (loop
);
1463 /* Free post-dominator tree since it is not used after predication. */
1464 free_dominance_info_for_region (cfun
, CDI_POST_DOMINATORS
, region
);
1467 for (i
= 0; refs
->iterate (i
, &dr
); i
++)
1469 tree ref
= DR_REF (dr
);
1471 dr
->aux
= XNEW (struct ifc_dr
);
1472 DR_BASE_W_UNCONDITIONALLY (dr
) = false;
1473 DR_RW_UNCONDITIONALLY (dr
) = false;
1474 DR_W_UNCONDITIONALLY (dr
) = false;
1475 IFC_DR (dr
)->rw_predicate
= boolean_false_node
;
1476 IFC_DR (dr
)->w_predicate
= boolean_false_node
;
1477 IFC_DR (dr
)->base_w_predicate
= boolean_false_node
;
1478 if (gimple_uid (DR_STMT (dr
)) == 0)
1479 gimple_set_uid (DR_STMT (dr
), i
+ 1);
1481 /* If DR doesn't have innermost loop behavior or it's a compound
1482 memory reference, we synthesize its innermost loop behavior
1484 if (TREE_CODE (ref
) == COMPONENT_REF
1485 || TREE_CODE (ref
) == IMAGPART_EXPR
1486 || TREE_CODE (ref
) == REALPART_EXPR
1487 || !(DR_BASE_ADDRESS (dr
) || DR_OFFSET (dr
)
1488 || DR_INIT (dr
) || DR_STEP (dr
)))
1490 while (TREE_CODE (ref
) == COMPONENT_REF
1491 || TREE_CODE (ref
) == IMAGPART_EXPR
1492 || TREE_CODE (ref
) == REALPART_EXPR
)
1493 ref
= TREE_OPERAND (ref
, 0);
1495 memset (&DR_INNERMOST (dr
), 0, sizeof (DR_INNERMOST (dr
)));
1496 DR_BASE_ADDRESS (dr
) = ref
;
1498 hash_memrefs_baserefs_and_store_DRs_read_written_info (dr
);
1501 for (i
= 0; i
< loop
->num_nodes
; i
++)
1503 basic_block bb
= ifc_bbs
[i
];
1504 gimple_stmt_iterator itr
;
1506 /* Check the if-convertibility of statements in predicated BBs. */
1507 if (!dominated_by_p (CDI_DOMINATORS
, loop
->latch
, bb
))
1508 for (itr
= gsi_start_bb (bb
); !gsi_end_p (itr
); gsi_next (&itr
))
1509 if (!if_convertible_stmt_p (gsi_stmt (itr
), *refs
))
1513 /* Checking PHIs needs to be done after stmts, as the fact whether there
1514 are any masked loads or stores affects the tests. */
1515 for (i
= 0; i
< loop
->num_nodes
; i
++)
1517 basic_block bb
= ifc_bbs
[i
];
1520 for (itr
= gsi_start_phis (bb
); !gsi_end_p (itr
); gsi_next (&itr
))
1521 if (!if_convertible_phi_p (loop
, bb
, itr
.phi ()))
1526 fprintf (dump_file
, "Applying if-conversion\n");
1531 /* Return true when LOOP is if-convertible.
1532 LOOP is if-convertible if:
1534 - it has two or more basic blocks,
1535 - it has only one exit,
1536 - loop header is not the exit edge,
1537 - if its basic blocks and phi nodes are if convertible. */
1540 if_convertible_loop_p (class loop
*loop
, vec
<data_reference_p
> *refs
)
1546 /* Handle only innermost loop. */
1547 if (!loop
|| loop
->inner
)
1549 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1550 fprintf (dump_file
, "not innermost loop\n");
1554 /* If only one block, no need for if-conversion. */
1555 if (loop
->num_nodes
<= 2)
1557 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1558 fprintf (dump_file
, "less than 2 basic blocks\n");
1562 /* More than one loop exit is too much to handle. */
1563 if (!single_exit (loop
))
1565 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1566 fprintf (dump_file
, "multiple exits\n");
1570 /* If one of the loop header's edge is an exit edge then do not
1571 apply if-conversion. */
1572 FOR_EACH_EDGE (e
, ei
, loop
->header
->succs
)
1573 if (loop_exit_edge_p (loop
, e
))
1576 res
= if_convertible_loop_p_1 (loop
, refs
);
1578 delete innermost_DR_map
;
1579 innermost_DR_map
= NULL
;
1581 delete baseref_DR_map
;
1582 baseref_DR_map
= NULL
;
1587 /* Return reduc_1 if has_nop.
1590 tmp1 = (unsigned type) reduc_1;
1592 reduc_3 = (signed type) tmp2. */
1594 strip_nop_cond_scalar_reduction (bool has_nop
, tree op
)
1599 if (TREE_CODE (op
) != SSA_NAME
)
1602 gassign
*stmt
= safe_dyn_cast
<gassign
*> (SSA_NAME_DEF_STMT (op
));
1604 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt
))
1605 || !tree_nop_conversion_p (TREE_TYPE (op
), TREE_TYPE
1606 (gimple_assign_rhs1 (stmt
))))
1609 return gimple_assign_rhs1 (stmt
);
1612 /* Returns true if def-stmt for phi argument ARG is simple increment/decrement
1613 which is in predicated basic block.
1614 In fact, the following PHI pattern is searching:
1616 reduc_1 = PHI <..., reduc_2>
1620 reduc_2 = PHI <reduc_1, reduc_3>
1622 ARG_0 and ARG_1 are correspondent PHI arguments.
1623 REDUC, OP0 and OP1 contain reduction stmt and its operands.
1624 EXTENDED is true if PHI has > 2 arguments. */
1627 is_cond_scalar_reduction (gimple
*phi
, gimple
**reduc
, tree arg_0
, tree arg_1
,
1628 tree
*op0
, tree
*op1
, bool extended
, bool* has_nop
,
1631 tree lhs
, r_op1
, r_op2
, r_nop1
, r_nop2
;
1633 gimple
*header_phi
= NULL
;
1634 enum tree_code reduction_op
;
1635 basic_block bb
= gimple_bb (phi
);
1636 class loop
*loop
= bb
->loop_father
;
1637 edge latch_e
= loop_latch_edge (loop
);
1638 imm_use_iterator imm_iter
;
1639 use_operand_p use_p
;
1642 bool result
= *has_nop
= false;
1643 if (TREE_CODE (arg_0
) != SSA_NAME
|| TREE_CODE (arg_1
) != SSA_NAME
)
1646 if (!extended
&& gimple_code (SSA_NAME_DEF_STMT (arg_0
)) == GIMPLE_PHI
)
1649 header_phi
= SSA_NAME_DEF_STMT (arg_0
);
1650 stmt
= SSA_NAME_DEF_STMT (arg_1
);
1652 else if (gimple_code (SSA_NAME_DEF_STMT (arg_1
)) == GIMPLE_PHI
)
1655 header_phi
= SSA_NAME_DEF_STMT (arg_1
);
1656 stmt
= SSA_NAME_DEF_STMT (arg_0
);
1660 if (gimple_bb (header_phi
) != loop
->header
)
1663 if (PHI_ARG_DEF_FROM_EDGE (header_phi
, latch_e
) != PHI_RESULT (phi
))
1666 if (gimple_code (stmt
) != GIMPLE_ASSIGN
1667 || gimple_has_volatile_ops (stmt
))
1670 if (!flow_bb_inside_loop_p (loop
, gimple_bb (stmt
)))
1673 if (!is_predicated (gimple_bb (stmt
)))
1676 /* Check that stmt-block is predecessor of phi-block. */
1677 FOR_EACH_EDGE (e
, ei
, gimple_bb (stmt
)->succs
)
1686 if (!has_single_use (lhs
))
1689 reduction_op
= gimple_assign_rhs_code (stmt
);
1691 /* Catch something like below
1694 reduc_1 = PHI <..., reduc_2>
1697 tmp1 = (unsigned type) reduc_1;
1699 reduc_3 = (signed type) tmp2;
1701 reduc_2 = PHI <reduc_1, reduc_3>
1705 reduc_2 = PHI <0, reduc_3>
1706 tmp1 = (unsigned type)reduce_1;
1707 ifcvt = cond_expr ? rhs2 : 0
1708 tmp2 = tmp1 +/- ifcvt;
1709 reduce_1 = (signed type)tmp2; */
1711 if (CONVERT_EXPR_CODE_P (reduction_op
))
1713 lhs
= gimple_assign_rhs1 (stmt
);
1714 if (TREE_CODE (lhs
) != SSA_NAME
1715 || !has_single_use (lhs
))
1719 stmt
= SSA_NAME_DEF_STMT (lhs
);
1720 if (gimple_bb (stmt
) != gimple_bb (*nop_reduc
)
1721 || !is_gimple_assign (stmt
))
1725 reduction_op
= gimple_assign_rhs_code (stmt
);
1728 if (reduction_op
!= PLUS_EXPR
1729 && reduction_op
!= MINUS_EXPR
1730 && reduction_op
!= MULT_EXPR
1731 && reduction_op
!= BIT_IOR_EXPR
1732 && reduction_op
!= BIT_XOR_EXPR
1733 && reduction_op
!= BIT_AND_EXPR
)
1735 r_op1
= gimple_assign_rhs1 (stmt
);
1736 r_op2
= gimple_assign_rhs2 (stmt
);
1738 r_nop1
= strip_nop_cond_scalar_reduction (*has_nop
, r_op1
);
1739 r_nop2
= strip_nop_cond_scalar_reduction (*has_nop
, r_op2
);
1741 /* Make R_OP1 to hold reduction variable. */
1742 if (r_nop2
== PHI_RESULT (header_phi
)
1743 && commutative_tree_code (reduction_op
))
1745 std::swap (r_op1
, r_op2
);
1746 std::swap (r_nop1
, r_nop2
);
1748 else if (r_nop1
!= PHI_RESULT (header_phi
))
1753 /* Check that R_NOP1 is used in nop_stmt or in PHI only. */
1754 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, r_nop1
)
1756 gimple
*use_stmt
= USE_STMT (use_p
);
1757 if (is_gimple_debug (use_stmt
))
1759 if (use_stmt
== SSA_NAME_DEF_STMT (r_op1
))
1761 if (use_stmt
!= phi
)
1766 /* Check that R_OP1 is used in reduction stmt or in PHI only. */
1767 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, r_op1
)
1769 gimple
*use_stmt
= USE_STMT (use_p
);
1770 if (is_gimple_debug (use_stmt
))
1772 if (use_stmt
== stmt
)
1774 if (gimple_code (use_stmt
) != GIMPLE_PHI
)
1778 *op0
= r_op1
; *op1
= r_op2
;
1783 /* Converts conditional scalar reduction into unconditional form, e.g.
1785 if (_5 != 0) goto bb_5 else goto bb_6
1791 # res_2 = PHI <res_13(4), res_6(5)>
1794 will be converted into sequence
1795 _ifc__1 = _5 != 0 ? 1 : 0;
1796 res_2 = res_13 + _ifc__1;
1797 Argument SWAP tells that arguments of conditional expression should be
1799 Returns rhs of resulting PHI assignment. */
1802 convert_scalar_cond_reduction (gimple
*reduc
, gimple_stmt_iterator
*gsi
,
1803 tree cond
, tree op0
, tree op1
, bool swap
,
1804 bool has_nop
, gimple
* nop_reduc
)
1806 gimple_stmt_iterator stmt_it
;
1809 tree rhs1
= gimple_assign_rhs1 (reduc
);
1810 tree tmp
= make_temp_ssa_name (TREE_TYPE (rhs1
), NULL
, "_ifc_");
1812 enum tree_code reduction_op
= gimple_assign_rhs_code (reduc
);
1813 tree op_nochange
= neutral_op_for_reduction (TREE_TYPE (rhs1
), reduction_op
, NULL
);
1814 gimple_seq stmts
= NULL
;
1816 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1818 fprintf (dump_file
, "Found cond scalar reduction.\n");
1819 print_gimple_stmt (dump_file
, reduc
, 0, TDF_SLIM
);
1822 /* Build cond expression using COND and constant operand
1823 of reduction rhs. */
1824 c
= fold_build_cond_expr (TREE_TYPE (rhs1
),
1825 unshare_expr (cond
),
1826 swap
? op_nochange
: op1
,
1827 swap
? op1
: op_nochange
);
1829 /* Create assignment stmt and insert it at GSI. */
1830 new_assign
= gimple_build_assign (tmp
, c
);
1831 gsi_insert_before (gsi
, new_assign
, GSI_SAME_STMT
);
1832 /* Build rhs for unconditional increment/decrement/logic_operation. */
1833 rhs
= gimple_build (&stmts
, reduction_op
,
1834 TREE_TYPE (rhs1
), op0
, tmp
);
1838 rhs
= gimple_convert (&stmts
,
1839 TREE_TYPE (gimple_assign_lhs (nop_reduc
)), rhs
);
1840 stmt_it
= gsi_for_stmt (nop_reduc
);
1841 gsi_remove (&stmt_it
, true);
1842 release_defs (nop_reduc
);
1844 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
1846 /* Delete original reduction stmt. */
1847 stmt_it
= gsi_for_stmt (reduc
);
1848 gsi_remove (&stmt_it
, true);
1849 release_defs (reduc
);
1853 /* Produce condition for all occurrences of ARG in PHI node. */
1856 gen_phi_arg_condition (gphi
*phi
, vec
<int> *occur
,
1857 gimple_stmt_iterator
*gsi
)
1861 tree cond
= NULL_TREE
;
1865 len
= occur
->length ();
1866 gcc_assert (len
> 0);
1867 for (i
= 0; i
< len
; i
++)
1869 e
= gimple_phi_arg_edge (phi
, (*occur
)[i
]);
1870 c
= bb_predicate (e
->src
);
1871 if (is_true_predicate (c
))
1876 c
= force_gimple_operand_gsi (gsi
, unshare_expr (c
),
1877 true, NULL_TREE
, true, GSI_SAME_STMT
);
1878 if (cond
!= NULL_TREE
)
1880 /* Must build OR expression. */
1881 cond
= fold_or_predicates (EXPR_LOCATION (c
), c
, cond
);
1882 cond
= force_gimple_operand_gsi (gsi
, unshare_expr (cond
), true,
1883 NULL_TREE
, true, GSI_SAME_STMT
);
1888 gcc_assert (cond
!= NULL_TREE
);
1892 /* Replace a scalar PHI node with a COND_EXPR using COND as condition.
1893 This routine can handle PHI nodes with more than two arguments.
1896 S1: A = PHI <x1(1), x2(5)>
1898 S2: A = cond ? x1 : x2;
1900 The generated code is inserted at GSI that points to the top of
1901 basic block's statement list.
1902 If PHI node has more than two arguments a chain of conditional
1903 expression is produced. */
1907 predicate_scalar_phi (gphi
*phi
, gimple_stmt_iterator
*gsi
)
1909 gimple
*new_stmt
= NULL
, *reduc
, *nop_reduc
;
1910 tree rhs
, res
, arg0
, arg1
, op0
, op1
, scev
;
1912 unsigned int index0
;
1913 unsigned int max
, args_len
;
1919 res
= gimple_phi_result (phi
);
1920 if (virtual_operand_p (res
))
1923 if ((rhs
= degenerate_phi_result (phi
))
1924 || ((scev
= analyze_scalar_evolution (gimple_bb (phi
)->loop_father
,
1926 && !chrec_contains_undetermined (scev
)
1928 && (rhs
= gimple_phi_arg_def (phi
, 0))))
1930 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1932 fprintf (dump_file
, "Degenerate phi!\n");
1933 print_gimple_stmt (dump_file
, phi
, 0, TDF_SLIM
);
1935 new_stmt
= gimple_build_assign (res
, rhs
);
1936 gsi_insert_before (gsi
, new_stmt
, GSI_SAME_STMT
);
1937 update_stmt (new_stmt
);
1941 bb
= gimple_bb (phi
);
1942 if (EDGE_COUNT (bb
->preds
) == 2)
1944 /* Predicate ordinary PHI node with 2 arguments. */
1945 edge first_edge
, second_edge
;
1946 basic_block true_bb
;
1947 first_edge
= EDGE_PRED (bb
, 0);
1948 second_edge
= EDGE_PRED (bb
, 1);
1949 cond
= bb_predicate (first_edge
->src
);
1950 if (TREE_CODE (cond
) == TRUTH_NOT_EXPR
)
1951 std::swap (first_edge
, second_edge
);
1952 if (EDGE_COUNT (first_edge
->src
->succs
) > 1)
1954 cond
= bb_predicate (second_edge
->src
);
1955 if (TREE_CODE (cond
) == TRUTH_NOT_EXPR
)
1956 cond
= TREE_OPERAND (cond
, 0);
1958 first_edge
= second_edge
;
1961 cond
= bb_predicate (first_edge
->src
);
1962 /* Gimplify the condition to a valid cond-expr conditonal operand. */
1963 cond
= force_gimple_operand_gsi (gsi
, unshare_expr (cond
), true,
1964 NULL_TREE
, true, GSI_SAME_STMT
);
1965 true_bb
= first_edge
->src
;
1966 if (EDGE_PRED (bb
, 1)->src
== true_bb
)
1968 arg0
= gimple_phi_arg_def (phi
, 1);
1969 arg1
= gimple_phi_arg_def (phi
, 0);
1973 arg0
= gimple_phi_arg_def (phi
, 0);
1974 arg1
= gimple_phi_arg_def (phi
, 1);
1976 if (is_cond_scalar_reduction (phi
, &reduc
, arg0
, arg1
,
1977 &op0
, &op1
, false, &has_nop
,
1980 /* Convert reduction stmt into vectorizable form. */
1981 rhs
= convert_scalar_cond_reduction (reduc
, gsi
, cond
, op0
, op1
,
1982 true_bb
!= gimple_bb (reduc
),
1983 has_nop
, nop_reduc
);
1984 redundant_ssa_names
.safe_push (std::make_pair (res
, rhs
));
1987 /* Build new RHS using selected condition and arguments. */
1988 rhs
= fold_build_cond_expr (TREE_TYPE (res
), unshare_expr (cond
),
1990 new_stmt
= gimple_build_assign (res
, rhs
);
1991 gsi_insert_before (gsi
, new_stmt
, GSI_SAME_STMT
);
1992 gimple_stmt_iterator new_gsi
= gsi_for_stmt (new_stmt
);
1993 if (fold_stmt (&new_gsi
, follow_all_ssa_edges
))
1995 new_stmt
= gsi_stmt (new_gsi
);
1996 update_stmt (new_stmt
);
1999 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2001 fprintf (dump_file
, "new phi replacement stmt\n");
2002 print_gimple_stmt (dump_file
, new_stmt
, 0, TDF_SLIM
);
2007 /* Create hashmap for PHI node which contain vector of argument indexes
2008 having the same value. */
2010 hash_map
<tree_operand_hash
, auto_vec
<int> > phi_arg_map
;
2011 unsigned int num_args
= gimple_phi_num_args (phi
);
2013 /* Vector of different PHI argument values. */
2014 auto_vec
<tree
> args (num_args
);
2016 /* Compute phi_arg_map. */
2017 for (i
= 0; i
< num_args
; i
++)
2021 arg
= gimple_phi_arg_def (phi
, i
);
2022 if (!phi_arg_map
.get (arg
))
2023 args
.quick_push (arg
);
2024 phi_arg_map
.get_or_insert (arg
).safe_push (i
);
2027 /* Determine element with max number of occurrences. */
2030 args_len
= args
.length ();
2031 for (i
= 0; i
< args_len
; i
++)
2034 if ((len
= phi_arg_map
.get (args
[i
])->length ()) > max
)
2041 /* Put element with max number of occurences to the end of ARGS. */
2042 if (max_ind
!= -1 && max_ind
+1 != (int) args_len
)
2043 std::swap (args
[args_len
- 1], args
[max_ind
]);
2045 /* Handle one special case when number of arguments with different values
2046 is equal 2 and one argument has the only occurrence. Such PHI can be
2047 handled as if would have only 2 arguments. */
2048 if (args_len
== 2 && phi_arg_map
.get (args
[0])->length () == 1)
2051 indexes
= phi_arg_map
.get (args
[0]);
2052 index0
= (*indexes
)[0];
2055 e
= gimple_phi_arg_edge (phi
, index0
);
2056 cond
= bb_predicate (e
->src
);
2057 if (TREE_CODE (cond
) == TRUTH_NOT_EXPR
)
2060 cond
= TREE_OPERAND (cond
, 0);
2062 /* Gimplify the condition to a valid cond-expr conditonal operand. */
2063 cond
= force_gimple_operand_gsi (gsi
, unshare_expr (cond
), true,
2064 NULL_TREE
, true, GSI_SAME_STMT
);
2065 if (!(is_cond_scalar_reduction (phi
, &reduc
, arg0
, arg1
,
2066 &op0
, &op1
, true, &has_nop
, &nop_reduc
)))
2067 rhs
= fold_build_cond_expr (TREE_TYPE (res
), unshare_expr (cond
),
2072 /* Convert reduction stmt into vectorizable form. */
2073 rhs
= convert_scalar_cond_reduction (reduc
, gsi
, cond
, op0
, op1
,
2074 swap
,has_nop
, nop_reduc
);
2075 redundant_ssa_names
.safe_push (std::make_pair (res
, rhs
));
2077 new_stmt
= gimple_build_assign (res
, rhs
);
2078 gsi_insert_before (gsi
, new_stmt
, GSI_SAME_STMT
);
2079 update_stmt (new_stmt
);
2085 tree type
= TREE_TYPE (gimple_phi_result (phi
));
2088 for (i
= 0; i
< args_len
; i
++)
2091 indexes
= phi_arg_map
.get (args
[i
]);
2092 if (i
!= args_len
- 1)
2093 lhs
= make_temp_ssa_name (type
, NULL
, "_ifc_");
2096 cond
= gen_phi_arg_condition (phi
, indexes
, gsi
);
2097 rhs
= fold_build_cond_expr (type
, unshare_expr (cond
),
2099 new_stmt
= gimple_build_assign (lhs
, rhs
);
2100 gsi_insert_before (gsi
, new_stmt
, GSI_SAME_STMT
);
2101 update_stmt (new_stmt
);
2106 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2108 fprintf (dump_file
, "new extended phi replacement stmt\n");
2109 print_gimple_stmt (dump_file
, new_stmt
, 0, TDF_SLIM
);
2113 /* Replaces in LOOP all the scalar phi nodes other than those in the
2114 LOOP->header block with conditional modify expressions. */
2117 predicate_all_scalar_phis (class loop
*loop
)
2120 unsigned int orig_loop_num_nodes
= loop
->num_nodes
;
2123 for (i
= 1; i
< orig_loop_num_nodes
; i
++)
2126 gimple_stmt_iterator gsi
;
2127 gphi_iterator phi_gsi
;
2130 if (bb
== loop
->header
)
2133 phi_gsi
= gsi_start_phis (bb
);
2134 if (gsi_end_p (phi_gsi
))
2137 gsi
= gsi_after_labels (bb
);
2138 while (!gsi_end_p (phi_gsi
))
2140 phi
= phi_gsi
.phi ();
2141 if (virtual_operand_p (gimple_phi_result (phi
)))
2142 gsi_next (&phi_gsi
);
2145 predicate_scalar_phi (phi
, &gsi
);
2146 remove_phi_node (&phi_gsi
, false);
2152 /* Insert in each basic block of LOOP the statements produced by the
2153 gimplification of the predicates. */
2156 insert_gimplified_predicates (loop_p loop
)
2160 for (i
= 0; i
< loop
->num_nodes
; i
++)
2162 basic_block bb
= ifc_bbs
[i
];
2164 if (!is_predicated (bb
))
2165 gcc_assert (bb_predicate_gimplified_stmts (bb
) == NULL
);
2166 if (!is_predicated (bb
))
2168 /* Do not insert statements for a basic block that is not
2169 predicated. Also make sure that the predicate of the
2170 basic block is set to true. */
2171 reset_bb_predicate (bb
);
2175 stmts
= bb_predicate_gimplified_stmts (bb
);
2178 if (need_to_predicate
)
2180 /* Insert the predicate of the BB just after the label,
2181 as the if-conversion of memory writes will use this
2183 gimple_stmt_iterator gsi
= gsi_after_labels (bb
);
2184 gsi_insert_seq_before (&gsi
, stmts
, GSI_SAME_STMT
);
2188 /* Insert the predicate of the BB at the end of the BB
2189 as this would reduce the register pressure: the only
2190 use of this predicate will be in successor BBs. */
2191 gimple_stmt_iterator gsi
= gsi_last_bb (bb
);
2194 || stmt_ends_bb_p (gsi_stmt (gsi
)))
2195 gsi_insert_seq_before (&gsi
, stmts
, GSI_SAME_STMT
);
2197 gsi_insert_seq_after (&gsi
, stmts
, GSI_SAME_STMT
);
2200 /* Once the sequence is code generated, set it to NULL. */
2201 set_bb_predicate_gimplified_stmts (bb
, NULL
);
2206 /* Helper function for predicate_statements. Returns index of existent
2207 mask if it was created for given SIZE and -1 otherwise. */
2210 mask_exists (int size
, const vec
<int> &vec
)
2214 FOR_EACH_VEC_ELT (vec
, ix
, v
)
2220 /* Helper function for predicate_statements. STMT is a memory read or
2221 write and it needs to be predicated by MASK. Return a statement
2225 predicate_load_or_store (gimple_stmt_iterator
*gsi
, gassign
*stmt
, tree mask
)
2229 tree lhs
= gimple_assign_lhs (stmt
);
2230 tree rhs
= gimple_assign_rhs1 (stmt
);
2231 tree ref
= TREE_CODE (lhs
) == SSA_NAME
? rhs
: lhs
;
2232 mark_addressable (ref
);
2233 tree addr
= force_gimple_operand_gsi (gsi
, build_fold_addr_expr (ref
),
2234 true, NULL_TREE
, true, GSI_SAME_STMT
);
2235 tree ptr
= build_int_cst (reference_alias_ptr_type (ref
),
2236 get_object_alignment (ref
));
2237 /* Copy points-to info if possible. */
2238 if (TREE_CODE (addr
) == SSA_NAME
&& !SSA_NAME_PTR_INFO (addr
))
2239 copy_ref_info (build2 (MEM_REF
, TREE_TYPE (ref
), addr
, ptr
),
2241 if (TREE_CODE (lhs
) == SSA_NAME
)
2244 = gimple_build_call_internal (IFN_MASK_LOAD
, 3, addr
,
2246 gimple_call_set_lhs (new_stmt
, lhs
);
2247 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
2252 = gimple_build_call_internal (IFN_MASK_STORE
, 4, addr
, ptr
,
2254 gimple_move_vops (new_stmt
, stmt
);
2256 gimple_call_set_nothrow (new_stmt
, true);
2260 /* STMT uses OP_LHS. Check whether it is equivalent to:
2262 ... = OP_MASK ? OP_LHS : X;
2264 Return X if so, otherwise return null. OP_MASK is an SSA_NAME that is
2265 known to have value OP_COND. */
2268 check_redundant_cond_expr (gimple
*stmt
, tree op_mask
, tree op_cond
,
2271 gassign
*assign
= dyn_cast
<gassign
*> (stmt
);
2272 if (!assign
|| gimple_assign_rhs_code (assign
) != COND_EXPR
)
2275 tree use_cond
= gimple_assign_rhs1 (assign
);
2276 tree if_true
= gimple_assign_rhs2 (assign
);
2277 tree if_false
= gimple_assign_rhs3 (assign
);
2279 if ((use_cond
== op_mask
|| operand_equal_p (use_cond
, op_cond
, 0))
2280 && if_true
== op_lhs
)
2283 if (inverse_conditions_p (use_cond
, op_cond
) && if_false
== op_lhs
)
2289 /* Return true if VALUE is available for use at STMT. SSA_NAMES is
2290 the set of SSA names defined earlier in STMT's block. */
2293 value_available_p (gimple
*stmt
, hash_set
<tree_ssa_name_hash
> *ssa_names
,
2296 if (is_gimple_min_invariant (value
))
2299 if (TREE_CODE (value
) == SSA_NAME
)
2301 if (SSA_NAME_IS_DEFAULT_DEF (value
))
2304 basic_block def_bb
= gimple_bb (SSA_NAME_DEF_STMT (value
));
2305 basic_block use_bb
= gimple_bb (stmt
);
2306 return (def_bb
== use_bb
2307 ? ssa_names
->contains (value
)
2308 : dominated_by_p (CDI_DOMINATORS
, use_bb
, def_bb
));
2314 /* Helper function for predicate_statements. STMT is a potentially-trapping
2315 arithmetic operation that needs to be predicated by MASK, an SSA_NAME that
2316 has value COND. Return a statement that does so. SSA_NAMES is the set of
2317 SSA names defined earlier in STMT's block. */
2320 predicate_rhs_code (gassign
*stmt
, tree mask
, tree cond
,
2321 hash_set
<tree_ssa_name_hash
> *ssa_names
)
2323 tree lhs
= gimple_assign_lhs (stmt
);
2324 tree_code code
= gimple_assign_rhs_code (stmt
);
2325 unsigned int nops
= gimple_num_ops (stmt
);
2326 internal_fn cond_fn
= get_conditional_internal_fn (code
);
2328 /* Construct the arguments to the conditional internal function. */
2329 auto_vec
<tree
, 8> args
;
2330 args
.safe_grow (nops
+ 1, true);
2332 for (unsigned int i
= 1; i
< nops
; ++i
)
2333 args
[i
] = gimple_op (stmt
, i
);
2334 args
[nops
] = NULL_TREE
;
2336 /* Look for uses of the result to see whether they are COND_EXPRs that can
2337 be folded into the conditional call. */
2338 imm_use_iterator imm_iter
;
2340 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, lhs
)
2342 tree new_else
= check_redundant_cond_expr (use_stmt
, mask
, cond
, lhs
);
2343 if (new_else
&& value_available_p (stmt
, ssa_names
, new_else
))
2346 args
[nops
] = new_else
;
2347 if (operand_equal_p (new_else
, args
[nops
], 0))
2351 LHS = IFN_COND (MASK, ..., ELSE);
2352 X = MASK ? LHS : ELSE;
2354 which makes X equivalent to LHS. */
2355 tree use_lhs
= gimple_assign_lhs (use_stmt
);
2356 redundant_ssa_names
.safe_push (std::make_pair (use_lhs
, lhs
));
2361 args
[nops
] = targetm
.preferred_else_value (cond_fn
, TREE_TYPE (lhs
),
2362 nops
- 1, &args
[1]);
2364 /* Create and insert the call. */
2365 gcall
*new_stmt
= gimple_build_call_internal_vec (cond_fn
, args
);
2366 gimple_call_set_lhs (new_stmt
, lhs
);
2367 gimple_call_set_nothrow (new_stmt
, true);
2372 /* Predicate each write to memory in LOOP.
2374 This function transforms control flow constructs containing memory
2377 | for (i = 0; i < N; i++)
2381 into the following form that does not contain control flow:
2383 | for (i = 0; i < N; i++)
2384 | A[i] = cond ? expr : A[i];
2386 The original CFG looks like this:
2393 | if (i < N) goto bb_5 else goto bb_2
2397 | cond = some_computation;
2398 | if (cond) goto bb_3 else goto bb_4
2410 insert_gimplified_predicates inserts the computation of the COND
2411 expression at the beginning of the destination basic block:
2418 | if (i < N) goto bb_5 else goto bb_2
2422 | cond = some_computation;
2423 | if (cond) goto bb_3 else goto bb_4
2427 | cond = some_computation;
2436 predicate_statements is then predicating the memory write as follows:
2443 | if (i < N) goto bb_5 else goto bb_2
2447 | if (cond) goto bb_3 else goto bb_4
2451 | cond = some_computation;
2452 | A[i] = cond ? expr : A[i];
2460 and finally combine_blocks removes the basic block boundaries making
2461 the loop vectorizable:
2465 | if (i < N) goto bb_5 else goto bb_1
2469 | cond = some_computation;
2470 | A[i] = cond ? expr : A[i];
2471 | if (i < N) goto bb_5 else goto bb_4
2480 predicate_statements (loop_p loop
)
2482 unsigned int i
, orig_loop_num_nodes
= loop
->num_nodes
;
2483 auto_vec
<int, 1> vect_sizes
;
2484 auto_vec
<tree
, 1> vect_masks
;
2485 hash_set
<tree_ssa_name_hash
> ssa_names
;
2487 for (i
= 1; i
< orig_loop_num_nodes
; i
++)
2489 gimple_stmt_iterator gsi
;
2490 basic_block bb
= ifc_bbs
[i
];
2491 tree cond
= bb_predicate (bb
);
2495 if (is_true_predicate (cond
))
2499 if (TREE_CODE (cond
) == TRUTH_NOT_EXPR
)
2502 cond
= TREE_OPERAND (cond
, 0);
2505 vect_sizes
.truncate (0);
2506 vect_masks
.truncate (0);
2508 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);)
2510 gassign
*stmt
= dyn_cast
<gassign
*> (gsi_stmt (gsi
));
2514 else if (is_false_predicate (cond
)
2515 && gimple_vdef (stmt
))
2517 unlink_stmt_vdef (stmt
);
2518 gsi_remove (&gsi
, true);
2519 release_defs (stmt
);
2522 else if (gimple_plf (stmt
, GF_PLF_2
))
2524 tree lhs
= gimple_assign_lhs (stmt
);
2527 gimple_seq stmts
= NULL
;
2528 machine_mode mode
= TYPE_MODE (TREE_TYPE (lhs
));
2529 /* We checked before setting GF_PLF_2 that an equivalent
2530 integer mode exists. */
2531 int bitsize
= GET_MODE_BITSIZE (mode
).to_constant ();
2532 if (!vect_sizes
.is_empty ()
2533 && (index
= mask_exists (bitsize
, vect_sizes
)) != -1)
2534 /* Use created mask. */
2535 mask
= vect_masks
[index
];
2538 if (COMPARISON_CLASS_P (cond
))
2539 mask
= gimple_build (&stmts
, TREE_CODE (cond
),
2541 TREE_OPERAND (cond
, 0),
2542 TREE_OPERAND (cond
, 1));
2549 = constant_boolean_node (true, TREE_TYPE (mask
));
2550 mask
= gimple_build (&stmts
, BIT_XOR_EXPR
,
2551 TREE_TYPE (mask
), mask
, true_val
);
2553 gsi_insert_seq_before (&gsi
, stmts
, GSI_SAME_STMT
);
2555 /* Save mask and its size for further use. */
2556 vect_sizes
.safe_push (bitsize
);
2557 vect_masks
.safe_push (mask
);
2559 if (gimple_assign_single_p (stmt
))
2560 new_stmt
= predicate_load_or_store (&gsi
, stmt
, mask
);
2562 new_stmt
= predicate_rhs_code (stmt
, mask
, cond
, &ssa_names
);
2564 gsi_replace (&gsi
, new_stmt
, true);
2566 else if (((lhs
= gimple_assign_lhs (stmt
)), true)
2567 && (INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
2568 || POINTER_TYPE_P (TREE_TYPE (lhs
)))
2569 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs
))
2570 && arith_code_with_undefined_signed_overflow
2571 (gimple_assign_rhs_code (stmt
)))
2573 gsi_remove (&gsi
, true);
2574 gimple_seq stmts
= rewrite_to_defined_overflow (stmt
);
2576 for (gimple_stmt_iterator gsi2
= gsi_start (stmts
);
2579 gassign
*stmt2
= as_a
<gassign
*> (gsi_stmt (gsi2
));
2580 gsi_remove (&gsi2
, false);
2583 gsi_insert_before (&gsi
, stmt2
, GSI_NEW_STMT
);
2587 gsi_insert_after (&gsi
, stmt2
, GSI_NEW_STMT
);
2590 else if (gimple_vdef (stmt
))
2592 tree lhs
= gimple_assign_lhs (stmt
);
2593 tree rhs
= gimple_assign_rhs1 (stmt
);
2594 tree type
= TREE_TYPE (lhs
);
2596 lhs
= ifc_temp_var (type
, unshare_expr (lhs
), &gsi
);
2597 rhs
= ifc_temp_var (type
, unshare_expr (rhs
), &gsi
);
2599 std::swap (lhs
, rhs
);
2600 cond
= force_gimple_operand_gsi (&gsi
, unshare_expr (cond
), true,
2601 NULL_TREE
, true, GSI_SAME_STMT
);
2602 rhs
= fold_build_cond_expr (type
, unshare_expr (cond
), rhs
, lhs
);
2603 gimple_assign_set_rhs1 (stmt
, ifc_temp_var (type
, rhs
, &gsi
));
2606 lhs
= gimple_get_lhs (gsi_stmt (gsi
));
2607 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
2608 ssa_names
.add (lhs
);
2615 /* Remove all GIMPLE_CONDs and GIMPLE_LABELs of all the basic blocks
2616 other than the exit and latch of the LOOP. Also resets the
2617 GIMPLE_DEBUG information. */
2620 remove_conditions_and_labels (loop_p loop
)
2622 gimple_stmt_iterator gsi
;
2625 for (i
= 0; i
< loop
->num_nodes
; i
++)
2627 basic_block bb
= ifc_bbs
[i
];
2629 if (bb_with_exit_edge_p (loop
, bb
)
2630 || bb
== loop
->latch
)
2633 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); )
2634 switch (gimple_code (gsi_stmt (gsi
)))
2638 gsi_remove (&gsi
, true);
2642 /* ??? Should there be conditional GIMPLE_DEBUG_BINDs? */
2643 if (gimple_debug_bind_p (gsi_stmt (gsi
)))
2645 gimple_debug_bind_reset_value (gsi_stmt (gsi
));
2646 update_stmt (gsi_stmt (gsi
));
2657 /* Combine all the basic blocks from LOOP into one or two super basic
2658 blocks. Replace PHI nodes with conditional modify expressions. */
2661 combine_blocks (class loop
*loop
)
2663 basic_block bb
, exit_bb
, merge_target_bb
;
2664 unsigned int orig_loop_num_nodes
= loop
->num_nodes
;
2669 remove_conditions_and_labels (loop
);
2670 insert_gimplified_predicates (loop
);
2671 predicate_all_scalar_phis (loop
);
2673 if (need_to_predicate
|| need_to_rewrite_undefined
)
2674 predicate_statements (loop
);
2676 /* Merge basic blocks. */
2678 bool *predicated
= XNEWVEC (bool, orig_loop_num_nodes
);
2679 for (i
= 0; i
< orig_loop_num_nodes
; i
++)
2682 predicated
[i
] = !is_true_predicate (bb_predicate (bb
));
2683 free_bb_predicate (bb
);
2684 if (bb_with_exit_edge_p (loop
, bb
))
2686 gcc_assert (exit_bb
== NULL
);
2690 gcc_assert (exit_bb
!= loop
->latch
);
2692 merge_target_bb
= loop
->header
;
2694 /* Get at the virtual def valid for uses starting at the first block
2695 we merge into the header. Without a virtual PHI the loop has the
2696 same virtual use on all stmts. */
2697 gphi
*vphi
= get_virtual_phi (loop
->header
);
2698 tree last_vdef
= NULL_TREE
;
2701 last_vdef
= gimple_phi_result (vphi
);
2702 for (gimple_stmt_iterator gsi
= gsi_start_bb (loop
->header
);
2703 ! gsi_end_p (gsi
); gsi_next (&gsi
))
2704 if (gimple_vdef (gsi_stmt (gsi
)))
2705 last_vdef
= gimple_vdef (gsi_stmt (gsi
));
2707 for (i
= 1; i
< orig_loop_num_nodes
; i
++)
2709 gimple_stmt_iterator gsi
;
2710 gimple_stmt_iterator last
;
2714 if (bb
== exit_bb
|| bb
== loop
->latch
)
2717 /* We release virtual PHIs late because we have to propagate them
2718 out using the current VUSE. The def might be the one used
2720 vphi
= get_virtual_phi (bb
);
2723 /* When there's just loads inside the loop a stray virtual
2724 PHI merging the uses can appear, update last_vdef from
2727 last_vdef
= gimple_phi_arg_def (vphi
, 0);
2728 imm_use_iterator iter
;
2729 use_operand_p use_p
;
2731 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, gimple_phi_result (vphi
))
2733 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
2734 SET_USE (use_p
, last_vdef
);
2736 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (vphi
)))
2737 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (last_vdef
) = 1;
2738 gsi
= gsi_for_stmt (vphi
);
2739 remove_phi_node (&gsi
, true);
2742 /* Make stmts member of loop->header and clear range info from all stmts
2743 in BB which is now no longer executed conditional on a predicate we
2744 could have derived it from. */
2745 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2747 gimple
*stmt
= gsi_stmt (gsi
);
2748 gimple_set_bb (stmt
, merge_target_bb
);
2749 /* Update virtual operands. */
2752 use_operand_p use_p
= ssa_vuse_operand (stmt
);
2754 && USE_FROM_PTR (use_p
) != last_vdef
)
2755 SET_USE (use_p
, last_vdef
);
2756 if (gimple_vdef (stmt
))
2757 last_vdef
= gimple_vdef (stmt
);
2760 /* If this is the first load we arrive at update last_vdef
2761 so we handle stray PHIs correctly. */
2762 last_vdef
= gimple_vuse (stmt
);
2767 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_DEF
)
2768 reset_flow_sensitive_info (op
);
2772 /* Update stmt list. */
2773 last
= gsi_last_bb (merge_target_bb
);
2774 gsi_insert_seq_after_without_update (&last
, bb_seq (bb
), GSI_NEW_STMT
);
2775 set_bb_seq (bb
, NULL
);
2778 /* Fixup virtual operands in the exit block. */
2780 && exit_bb
!= loop
->header
)
2782 /* We release virtual PHIs late because we have to propagate them
2783 out using the current VUSE. The def might be the one used
2785 vphi
= get_virtual_phi (exit_bb
);
2788 /* When there's just loads inside the loop a stray virtual
2789 PHI merging the uses can appear, update last_vdef from
2792 last_vdef
= gimple_phi_arg_def (vphi
, 0);
2793 imm_use_iterator iter
;
2794 use_operand_p use_p
;
2796 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, gimple_phi_result (vphi
))
2798 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
2799 SET_USE (use_p
, last_vdef
);
2801 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (vphi
)))
2802 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (last_vdef
) = 1;
2803 gimple_stmt_iterator gsi
= gsi_for_stmt (vphi
);
2804 remove_phi_node (&gsi
, true);
2808 /* Now remove all the edges in the loop, except for those from the exit
2809 block and delete the blocks we elided. */
2810 for (i
= 1; i
< orig_loop_num_nodes
; i
++)
2814 for (ei
= ei_start (bb
->preds
); (e
= ei_safe_edge (ei
));)
2816 if (e
->src
== exit_bb
)
2822 for (i
= 1; i
< orig_loop_num_nodes
; i
++)
2826 if (bb
== exit_bb
|| bb
== loop
->latch
)
2829 delete_basic_block (bb
);
2832 /* Re-connect the exit block. */
2833 if (exit_bb
!= NULL
)
2835 if (exit_bb
!= loop
->header
)
2837 /* Connect this node to loop header. */
2838 make_single_succ_edge (loop
->header
, exit_bb
, EDGE_FALLTHRU
);
2839 set_immediate_dominator (CDI_DOMINATORS
, exit_bb
, loop
->header
);
2842 /* Redirect non-exit edges to loop->latch. */
2843 FOR_EACH_EDGE (e
, ei
, exit_bb
->succs
)
2845 if (!loop_exit_edge_p (loop
, e
))
2846 redirect_edge_and_branch (e
, loop
->latch
);
2848 set_immediate_dominator (CDI_DOMINATORS
, loop
->latch
, exit_bb
);
2852 /* If the loop does not have an exit, reconnect header and latch. */
2853 make_edge (loop
->header
, loop
->latch
, EDGE_FALLTHRU
);
2854 set_immediate_dominator (CDI_DOMINATORS
, loop
->latch
, loop
->header
);
2857 /* If possible, merge loop header to the block with the exit edge.
2858 This reduces the number of basic blocks to two, to please the
2859 vectorizer that handles only loops with two nodes. */
2861 && exit_bb
!= loop
->header
)
2863 if (can_merge_blocks_p (loop
->header
, exit_bb
))
2864 merge_blocks (loop
->header
, exit_bb
);
2872 /* Version LOOP before if-converting it; the original loop
2873 will be if-converted, the new copy of the loop will not,
2874 and the LOOP_VECTORIZED internal call will be guarding which
2875 loop to execute. The vectorizer pass will fold this
2876 internal call into either true or false.
2878 Note that this function intentionally invalidates profile. Both edges
2879 out of LOOP_VECTORIZED must have 100% probability so the profile remains
2880 consistent after the condition is folded in the vectorizer. */
2883 version_loop_for_if_conversion (class loop
*loop
, vec
<gimple
*> *preds
)
2885 basic_block cond_bb
;
2886 tree cond
= make_ssa_name (boolean_type_node
);
2887 class loop
*new_loop
;
2889 gimple_stmt_iterator gsi
;
2890 unsigned int save_length
= 0;
2892 g
= gimple_build_call_internal (IFN_LOOP_VECTORIZED
, 2,
2893 build_int_cst (integer_type_node
, loop
->num
),
2895 gimple_call_set_lhs (g
, cond
);
2897 void **saved_preds
= NULL
;
2898 if (any_complicated_phi
|| need_to_predicate
)
2900 /* Save BB->aux around loop_version as that uses the same field. */
2901 save_length
= loop
->inner
? loop
->inner
->num_nodes
: loop
->num_nodes
;
2902 saved_preds
= XALLOCAVEC (void *, save_length
);
2903 for (unsigned i
= 0; i
< save_length
; i
++)
2904 saved_preds
[i
] = ifc_bbs
[i
]->aux
;
2907 initialize_original_copy_tables ();
2908 /* At this point we invalidate porfile confistency until IFN_LOOP_VECTORIZED
2909 is re-merged in the vectorizer. */
2910 new_loop
= loop_version (loop
, cond
, &cond_bb
,
2911 profile_probability::always (),
2912 profile_probability::always (),
2913 profile_probability::always (),
2914 profile_probability::always (), true);
2915 free_original_copy_tables ();
2917 if (any_complicated_phi
|| need_to_predicate
)
2918 for (unsigned i
= 0; i
< save_length
; i
++)
2919 ifc_bbs
[i
]->aux
= saved_preds
[i
];
2921 if (new_loop
== NULL
)
2924 new_loop
->dont_vectorize
= true;
2925 new_loop
->force_vectorize
= false;
2926 gsi
= gsi_last_bb (cond_bb
);
2927 gimple_call_set_arg (g
, 1, build_int_cst (integer_type_node
, new_loop
->num
));
2929 preds
->safe_push (g
);
2930 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
2931 update_ssa (TODO_update_ssa_no_phi
);
2935 /* Return true when LOOP satisfies the follow conditions that will
2936 allow it to be recognized by the vectorizer for outer-loop
2938 - The loop is not the root node of the loop tree.
2939 - The loop has exactly one inner loop.
2940 - The loop has a single exit.
2941 - The loop header has a single successor, which is the inner
2943 - Each of the inner and outer loop latches have a single
2945 - The loop exit block has a single predecessor, which is the
2946 inner loop's exit block. */
2949 versionable_outer_loop_p (class loop
*loop
)
2951 if (!loop_outer (loop
)
2952 || loop
->dont_vectorize
2954 || loop
->inner
->next
2955 || !single_exit (loop
)
2956 || !single_succ_p (loop
->header
)
2957 || single_succ (loop
->header
) != loop
->inner
->header
2958 || !single_pred_p (loop
->latch
)
2959 || !single_pred_p (loop
->inner
->latch
))
2962 basic_block outer_exit
= single_pred (loop
->latch
);
2963 basic_block inner_exit
= single_pred (loop
->inner
->latch
);
2965 if (!single_pred_p (outer_exit
) || single_pred (outer_exit
) != inner_exit
)
2969 fprintf (dump_file
, "Found vectorizable outer loop for versioning\n");
2974 /* Performs splitting of critical edges. Skip splitting and return false
2975 if LOOP will not be converted because:
2977 - LOOP is not well formed.
2978 - LOOP has PHI with more than MAX_PHI_ARG_NUM arguments.
2980 Last restriction is valid only if AGGRESSIVE_IF_CONV is false. */
2983 ifcvt_split_critical_edges (class loop
*loop
, bool aggressive_if_conv
)
2987 unsigned int num
= loop
->num_nodes
;
2992 auto_vec
<edge
> critical_edges
;
2994 /* Loop is not well formed. */
2998 body
= get_loop_body (loop
);
2999 for (i
= 0; i
< num
; i
++)
3002 if (!aggressive_if_conv
3004 && EDGE_COUNT (bb
->preds
) > MAX_PHI_ARG_NUM
)
3006 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3008 "BB %d has complicated PHI with more than %u args.\n",
3009 bb
->index
, MAX_PHI_ARG_NUM
);
3014 if (bb
== loop
->latch
|| bb_with_exit_edge_p (loop
, bb
))
3017 stmt
= last_stmt (bb
);
3018 /* Skip basic blocks not ending with conditional branch. */
3019 if (!stmt
|| gimple_code (stmt
) != GIMPLE_COND
)
3022 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
3023 if (EDGE_CRITICAL_P (e
) && e
->dest
->loop_father
== loop
)
3024 critical_edges
.safe_push (e
);
3028 while (critical_edges
.length () > 0)
3030 e
= critical_edges
.pop ();
3031 /* Don't split if bb can be predicated along non-critical edge. */
3032 if (EDGE_COUNT (e
->dest
->preds
) > 2 || all_preds_critical_p (e
->dest
))
3039 /* Delete redundant statements produced by predication which prevents
3040 loop vectorization. */
3043 ifcvt_local_dce (class loop
*loop
)
3048 gimple_stmt_iterator gsi
;
3049 auto_vec
<gimple
*> worklist
;
3050 enum gimple_code code
;
3051 use_operand_p use_p
;
3052 imm_use_iterator imm_iter
;
3054 /* The loop has a single BB only. */
3055 basic_block bb
= loop
->header
;
3056 tree latch_vdef
= NULL_TREE
;
3058 worklist
.create (64);
3059 /* Consider all phi as live statements. */
3060 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
3062 phi
= gsi_stmt (gsi
);
3063 gimple_set_plf (phi
, GF_PLF_2
, true);
3064 worklist
.safe_push (phi
);
3065 if (virtual_operand_p (gimple_phi_result (phi
)))
3066 latch_vdef
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
3068 /* Consider load/store statements, CALL and COND as live. */
3069 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
3071 stmt
= gsi_stmt (gsi
);
3072 if (is_gimple_debug (stmt
))
3074 gimple_set_plf (stmt
, GF_PLF_2
, true);
3077 if (gimple_store_p (stmt
) || gimple_assign_load_p (stmt
))
3079 gimple_set_plf (stmt
, GF_PLF_2
, true);
3080 worklist
.safe_push (stmt
);
3083 code
= gimple_code (stmt
);
3084 if (code
== GIMPLE_COND
|| code
== GIMPLE_CALL
)
3086 gimple_set_plf (stmt
, GF_PLF_2
, true);
3087 worklist
.safe_push (stmt
);
3090 gimple_set_plf (stmt
, GF_PLF_2
, false);
3092 if (code
== GIMPLE_ASSIGN
)
3094 tree lhs
= gimple_assign_lhs (stmt
);
3095 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
3097 stmt1
= USE_STMT (use_p
);
3098 if (!is_gimple_debug (stmt1
) && gimple_bb (stmt1
) != bb
)
3100 gimple_set_plf (stmt
, GF_PLF_2
, true);
3101 worklist
.safe_push (stmt
);
3107 /* Propagate liveness through arguments of live stmt. */
3108 while (worklist
.length () > 0)
3111 use_operand_p use_p
;
3114 stmt
= worklist
.pop ();
3115 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
3117 use
= USE_FROM_PTR (use_p
);
3118 if (TREE_CODE (use
) != SSA_NAME
)
3120 stmt1
= SSA_NAME_DEF_STMT (use
);
3121 if (gimple_bb (stmt1
) != bb
|| gimple_plf (stmt1
, GF_PLF_2
))
3123 gimple_set_plf (stmt1
, GF_PLF_2
, true);
3124 worklist
.safe_push (stmt1
);
3127 /* Delete dead statements. */
3128 gsi
= gsi_last_bb (bb
);
3129 while (!gsi_end_p (gsi
))
3131 gimple_stmt_iterator gsiprev
= gsi
;
3132 gsi_prev (&gsiprev
);
3133 stmt
= gsi_stmt (gsi
);
3134 if (gimple_store_p (stmt
) && gimple_vdef (stmt
))
3136 tree lhs
= gimple_get_lhs (stmt
);
3138 ao_ref_init (&write
, lhs
);
3140 if (dse_classify_store (&write
, stmt
, false, NULL
, NULL
, latch_vdef
)
3142 delete_dead_or_redundant_assignment (&gsi
, "dead");
3147 if (gimple_plf (stmt
, GF_PLF_2
))
3152 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3154 fprintf (dump_file
, "Delete dead stmt in bb#%d\n", bb
->index
);
3155 print_gimple_stmt (dump_file
, stmt
, 0, TDF_SLIM
);
3157 gsi_remove (&gsi
, true);
3158 release_defs (stmt
);
3163 /* Return true if VALUE is already available on edge PE. */
3166 ifcvt_available_on_edge_p (edge pe
, tree value
)
3168 if (is_gimple_min_invariant (value
))
3171 if (TREE_CODE (value
) == SSA_NAME
)
3173 basic_block def_bb
= gimple_bb (SSA_NAME_DEF_STMT (value
));
3174 if (!def_bb
|| dominated_by_p (CDI_DOMINATORS
, pe
->dest
, def_bb
))
3181 /* Return true if STMT can be hoisted from if-converted loop LOOP to
3185 ifcvt_can_hoist (class loop
*loop
, edge pe
, gimple
*stmt
)
3187 if (auto *call
= dyn_cast
<gcall
*> (stmt
))
3189 if (gimple_call_internal_p (call
)
3190 && internal_fn_mask_index (gimple_call_internal_fn (call
)) >= 0)
3193 else if (auto *assign
= dyn_cast
<gassign
*> (stmt
))
3195 if (gimple_assign_rhs_code (assign
) == COND_EXPR
)
3201 if (gimple_has_side_effects (stmt
)
3202 || gimple_could_trap_p (stmt
)
3203 || stmt_could_throw_p (cfun
, stmt
)
3204 || gimple_vdef (stmt
)
3205 || gimple_vuse (stmt
))
3208 int num_args
= gimple_num_args (stmt
);
3209 if (pe
!= loop_preheader_edge (loop
))
3211 for (int i
= 0; i
< num_args
; ++i
)
3212 if (!ifcvt_available_on_edge_p (pe
, gimple_arg (stmt
, i
)))
3217 for (int i
= 0; i
< num_args
; ++i
)
3218 if (!expr_invariant_in_loop_p (loop
, gimple_arg (stmt
, i
)))
3225 /* Hoist invariant statements from LOOP to edge PE. */
3228 ifcvt_hoist_invariants (class loop
*loop
, edge pe
)
3230 gimple_stmt_iterator hoist_gsi
= {};
3231 unsigned int num_blocks
= loop
->num_nodes
;
3232 basic_block
*body
= get_loop_body (loop
);
3233 for (unsigned int i
= 0; i
< num_blocks
; ++i
)
3234 for (gimple_stmt_iterator gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
);)
3236 gimple
*stmt
= gsi_stmt (gsi
);
3237 if (ifcvt_can_hoist (loop
, pe
, stmt
))
3239 /* Once we've hoisted one statement, insert other statements
3241 gsi_remove (&gsi
, false);
3243 gsi_insert_after (&hoist_gsi
, stmt
, GSI_NEW_STMT
);
3246 gsi_insert_on_edge_immediate (pe
, stmt
);
3247 hoist_gsi
= gsi_for_stmt (stmt
);
3256 /* Returns the DECL_FIELD_BIT_OFFSET of the bitfield accesse in stmt iff its
3257 type mode is not BLKmode. If BITPOS is not NULL it will hold the poly_int64
3258 value of the DECL_FIELD_BIT_OFFSET of the bitfield access and STRUCT_EXPR,
3259 if not NULL, will hold the tree representing the base struct of this
3263 get_bitfield_rep (gassign
*stmt
, bool write
, tree
*bitpos
,
3266 tree comp_ref
= write
? gimple_assign_lhs (stmt
)
3267 : gimple_assign_rhs1 (stmt
);
3269 tree field_decl
= TREE_OPERAND (comp_ref
, 1);
3270 tree rep_decl
= DECL_BIT_FIELD_REPRESENTATIVE (field_decl
);
3272 /* Bail out if the representative is BLKmode as we will not be able to
3274 if (TYPE_MODE (TREE_TYPE (rep_decl
)) == E_BLKmode
)
3277 /* Bail out if the DECL_SIZE of the field_decl isn't the same as the BF's
3279 unsigned HOST_WIDE_INT bf_prec
3280 = TYPE_PRECISION (TREE_TYPE (gimple_assign_lhs (stmt
)));
3281 if (compare_tree_int (DECL_SIZE (field_decl
), bf_prec
) != 0)
3285 *struct_expr
= TREE_OPERAND (comp_ref
, 0);
3289 /* To calculate the bitposition of the BITFIELD_REF we have to determine
3290 where our bitfield starts in relation to the container REP_DECL. The
3291 DECL_FIELD_OFFSET of the original bitfield's member FIELD_DECL tells
3292 us how many bytes from the start of the structure there are until the
3293 start of the group of bitfield members the FIELD_DECL belongs to,
3294 whereas DECL_FIELD_BIT_OFFSET will tell us how many bits from that
3295 position our actual bitfield member starts. For the container
3296 REP_DECL adding DECL_FIELD_OFFSET and DECL_FIELD_BIT_OFFSET will tell
3297 us the distance between the start of the structure and the start of
3298 the container, though the first is in bytes and the later other in
3299 bits. With this in mind we calculate the bit position of our new
3300 BITFIELD_REF by subtracting the number of bits between the start of
3301 the structure and the container from the number of bits from the start
3302 of the structure and the actual bitfield member. */
3303 tree bf_pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
3304 DECL_FIELD_OFFSET (field_decl
),
3305 build_int_cst (bitsizetype
, BITS_PER_UNIT
));
3306 bf_pos
= fold_build2 (PLUS_EXPR
, bitsizetype
, bf_pos
,
3307 DECL_FIELD_BIT_OFFSET (field_decl
));
3308 tree rep_pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
3309 DECL_FIELD_OFFSET (rep_decl
),
3310 build_int_cst (bitsizetype
, BITS_PER_UNIT
));
3311 rep_pos
= fold_build2 (PLUS_EXPR
, bitsizetype
, rep_pos
,
3312 DECL_FIELD_BIT_OFFSET (rep_decl
));
3314 *bitpos
= fold_build2 (MINUS_EXPR
, bitsizetype
, bf_pos
, rep_pos
);
3321 /* Lowers the bitfield described by DATA.
3328 __ifc_1 = struct.<representative>;
3329 __ifc_2 = BIT_INSERT_EXPR (__ifc_1, _1, bitpos);
3330 struct.<representative> = __ifc_2;
3338 __ifc_1 = struct.<representative>;
3339 _1 = BIT_FIELD_REF (__ifc_1, bitsize, bitpos);
3341 where representative is a legal load that contains the bitfield value,
3342 bitsize is the size of the bitfield and bitpos the offset to the start of
3343 the bitfield within the representative. */
3346 lower_bitfield (gassign
*stmt
, bool write
)
3350 tree rep_decl
= get_bitfield_rep (stmt
, write
, &bitpos
, &struct_expr
);
3351 tree rep_type
= TREE_TYPE (rep_decl
);
3352 tree bf_type
= TREE_TYPE (gimple_assign_lhs (stmt
));
3354 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3355 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3357 fprintf (dump_file
, "Lowering:\n");
3358 print_gimple_stmt (dump_file
, stmt
, 0, TDF_SLIM
);
3359 fprintf (dump_file
, "to:\n");
3362 /* REP_COMP_REF is a COMPONENT_REF for the representative. NEW_VAL is it's
3363 defining SSA_NAME. */
3364 tree rep_comp_ref
= build3 (COMPONENT_REF
, rep_type
, struct_expr
, rep_decl
,
3366 tree new_val
= ifc_temp_var (rep_type
, rep_comp_ref
, &gsi
);
3368 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3369 print_gimple_stmt (dump_file
, SSA_NAME_DEF_STMT (new_val
), 0, TDF_SLIM
);
3373 new_val
= ifc_temp_var (rep_type
,
3374 build3 (BIT_INSERT_EXPR
, rep_type
, new_val
,
3375 unshare_expr (gimple_assign_rhs1 (stmt
)),
3378 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3379 print_gimple_stmt (dump_file
, SSA_NAME_DEF_STMT (new_val
), 0, TDF_SLIM
);
3381 gimple
*new_stmt
= gimple_build_assign (unshare_expr (rep_comp_ref
),
3383 gimple_move_vops (new_stmt
, stmt
);
3384 gsi_insert_before (&gsi
, new_stmt
, GSI_SAME_STMT
);
3386 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3387 print_gimple_stmt (dump_file
, new_stmt
, 0, TDF_SLIM
);
3391 tree bfr
= build3 (BIT_FIELD_REF
, bf_type
, new_val
,
3392 build_int_cst (bitsizetype
, TYPE_PRECISION (bf_type
)),
3394 new_val
= ifc_temp_var (bf_type
, bfr
, &gsi
);
3396 gimple
*new_stmt
= gimple_build_assign (gimple_assign_lhs (stmt
),
3398 gimple_move_vops (new_stmt
, stmt
);
3399 gsi_insert_before (&gsi
, new_stmt
, GSI_SAME_STMT
);
3401 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3402 print_gimple_stmt (dump_file
, new_stmt
, 0, TDF_SLIM
);
3405 gsi_remove (&gsi
, true);
3408 /* Return TRUE if there are bitfields to lower in this LOOP. Fill TO_LOWER
3409 with data structures representing these bitfields. */
3412 bitfields_to_lower_p (class loop
*loop
,
3413 vec
<gassign
*> &reads_to_lower
,
3414 vec
<gassign
*> &writes_to_lower
)
3416 gimple_stmt_iterator gsi
;
3418 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3420 fprintf (dump_file
, "Analyzing loop %d for bitfields:\n", loop
->num
);
3423 for (unsigned i
= 0; i
< loop
->num_nodes
; ++i
)
3425 basic_block bb
= ifc_bbs
[i
];
3426 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
3428 gassign
*stmt
= dyn_cast
<gassign
*> (gsi_stmt (gsi
));
3432 tree op
= gimple_assign_lhs (stmt
);
3433 bool write
= TREE_CODE (op
) == COMPONENT_REF
;
3436 op
= gimple_assign_rhs1 (stmt
);
3438 if (TREE_CODE (op
) != COMPONENT_REF
)
3441 if (DECL_BIT_FIELD_TYPE (TREE_OPERAND (op
, 1)))
3443 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3444 print_gimple_stmt (dump_file
, stmt
, 0, TDF_SLIM
);
3446 if (!INTEGRAL_TYPE_P (TREE_TYPE (op
)))
3448 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3449 fprintf (dump_file
, "\t Bitfield NO OK to lower,"
3450 " field type is not Integral.\n");
3454 if (!get_bitfield_rep (stmt
, write
, NULL
, NULL
))
3456 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3457 fprintf (dump_file
, "\t Bitfield NOT OK to lower,"
3458 " representative is BLKmode.\n");
3462 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3463 fprintf (dump_file
, "\tBitfield OK to lower.\n");
3465 writes_to_lower
.safe_push (stmt
);
3467 reads_to_lower
.safe_push (stmt
);
3471 return !reads_to_lower
.is_empty () || !writes_to_lower
.is_empty ();
3475 /* If-convert LOOP when it is legal. For the moment this pass has no
3476 profitability analysis. Returns non-zero todo flags when something
3480 tree_if_conversion (class loop
*loop
, vec
<gimple
*> *preds
)
3482 unsigned int todo
= 0;
3483 bool aggressive_if_conv
;
3485 auto_vec
<gassign
*, 4> reads_to_lower
;
3486 auto_vec
<gassign
*, 4> writes_to_lower
;
3489 vec
<data_reference_p
> refs
;
3494 need_to_lower_bitfields
= false;
3495 need_to_ifcvt
= false;
3496 need_to_predicate
= false;
3497 need_to_rewrite_undefined
= false;
3498 any_complicated_phi
= false;
3501 /* Apply more aggressive if-conversion when loop or its outer loop were
3502 marked with simd pragma. When that's the case, we try to if-convert
3503 loop containing PHIs with more than MAX_PHI_ARG_NUM arguments. */
3504 aggressive_if_conv
= loop
->force_vectorize
;
3505 if (!aggressive_if_conv
)
3507 class loop
*outer_loop
= loop_outer (loop
);
3508 if (outer_loop
&& outer_loop
->force_vectorize
)
3509 aggressive_if_conv
= true;
3512 if (!single_exit (loop
))
3515 /* If there are more than two BBs in the loop then there is at least one if
3517 if (loop
->num_nodes
> 2
3518 && !ifcvt_split_critical_edges (loop
, aggressive_if_conv
))
3521 ifc_bbs
= get_loop_body_in_if_conv_order (loop
);
3524 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3525 fprintf (dump_file
, "Irreducible loop\n");
3529 if (find_data_references_in_loop (loop
, &refs
) == chrec_dont_know
)
3532 if (loop
->num_nodes
> 2)
3534 need_to_ifcvt
= true;
3536 if (!if_convertible_loop_p (loop
, &refs
) || !dbg_cnt (if_conversion_tree
))
3539 if ((need_to_predicate
|| any_complicated_phi
)
3540 && ((!flag_tree_loop_vectorize
&& !loop
->force_vectorize
)
3541 || loop
->dont_vectorize
))
3545 if ((flag_tree_loop_vectorize
|| loop
->force_vectorize
)
3546 && !loop
->dont_vectorize
)
3547 need_to_lower_bitfields
= bitfields_to_lower_p (loop
, reads_to_lower
,
3550 if (!need_to_ifcvt
&& !need_to_lower_bitfields
)
3553 /* The edge to insert invariant stmts on. */
3554 pe
= loop_preheader_edge (loop
);
3556 /* Since we have no cost model, always version loops unless the user
3557 specified -ftree-loop-if-convert or unless versioning is required.
3558 Either version this loop, or if the pattern is right for outer-loop
3559 vectorization, version the outer loop. In the latter case we will
3560 still if-convert the original inner loop. */
3561 if (need_to_lower_bitfields
3562 || need_to_predicate
3563 || any_complicated_phi
3564 || flag_tree_loop_if_convert
!= 1)
3567 = (versionable_outer_loop_p (loop_outer (loop
))
3568 ? loop_outer (loop
) : loop
);
3569 class loop
*nloop
= version_loop_for_if_conversion (vloop
, preds
);
3574 /* If versionable_outer_loop_p decided to version the
3575 outer loop, version also the inner loop of the non-vectorized
3576 loop copy. So we transform:
3580 if (LOOP_VECTORIZED (1, 3))
3586 loop3 (copy of loop1)
3587 if (LOOP_VECTORIZED (4, 5))
3588 loop4 (copy of loop2)
3590 loop5 (copy of loop4) */
3591 gcc_assert (nloop
->inner
&& nloop
->inner
->next
== NULL
);
3592 rloop
= nloop
->inner
;
3595 /* If we versioned loop then make sure to insert invariant
3596 stmts before the .LOOP_VECTORIZED check since the vectorizer
3597 will re-use that for things like runtime alias versioning
3598 whose condition can end up using those invariants. */
3599 pe
= single_pred_edge (gimple_bb (preds
->last ()));
3602 if (need_to_lower_bitfields
)
3604 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3606 fprintf (dump_file
, "-------------------------\n");
3607 fprintf (dump_file
, "Start lowering bitfields\n");
3609 while (!reads_to_lower
.is_empty ())
3610 lower_bitfield (reads_to_lower
.pop (), false);
3611 while (!writes_to_lower
.is_empty ())
3612 lower_bitfield (writes_to_lower
.pop (), true);
3614 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3616 fprintf (dump_file
, "Done lowering bitfields\n");
3617 fprintf (dump_file
, "-------------------------\n");
3622 /* Now all statements are if-convertible. Combine all the basic
3623 blocks into one huge basic block doing the if-conversion
3625 combine_blocks (loop
);
3628 /* Perform local CSE, this esp. helps the vectorizer analysis if loads
3629 and stores are involved. CSE only the loop body, not the entry
3630 PHIs, those are to be kept in sync with the non-if-converted copy.
3631 ??? We'll still keep dead stores though. */
3632 exit_bbs
= BITMAP_ALLOC (NULL
);
3633 bitmap_set_bit (exit_bbs
, single_exit (loop
)->dest
->index
);
3634 bitmap_set_bit (exit_bbs
, loop
->latch
->index
);
3636 std::pair
<tree
, tree
> *name_pair
;
3637 unsigned ssa_names_idx
;
3638 FOR_EACH_VEC_ELT (redundant_ssa_names
, ssa_names_idx
, name_pair
)
3639 replace_uses_by (name_pair
->first
, name_pair
->second
);
3640 redundant_ssa_names
.release ();
3642 todo
|= do_rpo_vn (cfun
, loop_preheader_edge (loop
), exit_bbs
);
3644 /* Delete dead predicate computations. */
3645 ifcvt_local_dce (loop
);
3646 BITMAP_FREE (exit_bbs
);
3648 ifcvt_hoist_invariants (loop
, pe
);
3650 todo
|= TODO_cleanup_cfg
;
3653 data_reference_p dr
;
3655 for (i
= 0; refs
.iterate (i
, &dr
); i
++)
3664 for (i
= 0; i
< loop
->num_nodes
; i
++)
3665 free_bb_predicate (ifc_bbs
[i
]);
3673 reads_to_lower
.truncate (0);
3674 writes_to_lower
.truncate (0);
3681 /* Tree if-conversion pass management. */
3685 const pass_data pass_data_if_conversion
=
3687 GIMPLE_PASS
, /* type */
3689 OPTGROUP_NONE
, /* optinfo_flags */
3690 TV_TREE_LOOP_IFCVT
, /* tv_id */
3691 ( PROP_cfg
| PROP_ssa
), /* properties_required */
3692 0, /* properties_provided */
3693 0, /* properties_destroyed */
3694 0, /* todo_flags_start */
3695 0, /* todo_flags_finish */
3698 class pass_if_conversion
: public gimple_opt_pass
3701 pass_if_conversion (gcc::context
*ctxt
)
3702 : gimple_opt_pass (pass_data_if_conversion
, ctxt
)
3705 /* opt_pass methods: */
3706 bool gate (function
*) final override
;
3707 unsigned int execute (function
*) final override
;
3709 }; // class pass_if_conversion
3712 pass_if_conversion::gate (function
*fun
)
3714 return (((flag_tree_loop_vectorize
|| fun
->has_force_vectorize_loops
)
3715 && flag_tree_loop_if_convert
!= 0)
3716 || flag_tree_loop_if_convert
== 1);
3720 pass_if_conversion::execute (function
*fun
)
3724 if (number_of_loops (fun
) <= 1)
3727 auto_vec
<gimple
*> preds
;
3728 for (auto loop
: loops_list (cfun
, 0))
3729 if (flag_tree_loop_if_convert
== 1
3730 || ((flag_tree_loop_vectorize
|| loop
->force_vectorize
)
3731 && !loop
->dont_vectorize
))
3732 todo
|= tree_if_conversion (loop
, &preds
);
3736 free_numbers_of_iterations_estimates (fun
);
3743 FOR_EACH_BB_FN (bb
, fun
)
3744 gcc_assert (!bb
->aux
);
3747 /* Perform IL update now, it might elide some loops. */
3748 if (todo
& TODO_cleanup_cfg
)
3750 cleanup_tree_cfg ();
3751 if (need_ssa_update_p (fun
))
3752 todo
|= TODO_update_ssa
;
3754 if (todo
& TODO_update_ssa_any
)
3755 update_ssa (todo
& TODO_update_ssa_any
);
3757 /* If if-conversion elided the loop fall back to the original one. */
3758 for (unsigned i
= 0; i
< preds
.length (); ++i
)
3760 gimple
*g
= preds
[i
];
3763 unsigned ifcvt_loop
= tree_to_uhwi (gimple_call_arg (g
, 0));
3764 unsigned orig_loop
= tree_to_uhwi (gimple_call_arg (g
, 1));
3765 if (!get_loop (fun
, ifcvt_loop
) || !get_loop (fun
, orig_loop
))
3768 fprintf (dump_file
, "If-converted loop vanished\n");
3769 fold_loop_internal_call (g
, boolean_false_node
);
3779 make_pass_if_conversion (gcc::context
*ctxt
)
3781 return new pass_if_conversion (ctxt
);