1 /* If-conversion for vectorizer.
2 Copyright (C) 2004-2018 Free Software Foundation, Inc.
3 Contributed by Devang Patel <dpatel@apple.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass implements a tree level if-conversion of loops. Its
22 initial goal is to help the vectorizer to vectorize loops with
25 A short description of if-conversion:
27 o Decide if a loop is if-convertible or not.
28 o Walk all loop basic blocks in breadth first order (BFS order).
29 o Remove conditional statements (at the end of basic block)
30 and propagate condition into destination basic blocks'
32 o Replace modify expression with conditional modify expression
33 using current basic block's condition.
34 o Merge all basic blocks
35 o Replace phi nodes with conditional modify expr
36 o Merge all basic blocks into header
38 Sample transformation:
43 # i_23 = PHI <0(0), i_18(10)>;
46 if (j_15 > 41) goto <L1>; else goto <L17>;
53 # iftmp.2_4 = PHI <0(8), 42(2)>;
57 if (i_18 <= 15) goto <L19>; else goto <L18>;
67 # i_23 = PHI <0(0), i_18(10)>;
72 iftmp.2_4 = j_15 > 41 ? 42 : 0;
75 if (i_18 <= 15) goto <L19>; else goto <L18>;
85 #include "coretypes.h"
91 #include "tree-pass.h"
94 #include "optabs-query.h"
95 #include "gimple-pretty-print.h"
97 #include "fold-const.h"
98 #include "stor-layout.h"
99 #include "gimple-fold.h"
100 #include "gimplify.h"
101 #include "gimple-iterator.h"
102 #include "gimplify-me.h"
103 #include "tree-cfg.h"
104 #include "tree-into-ssa.h"
105 #include "tree-ssa.h"
107 #include "tree-data-ref.h"
108 #include "tree-scalar-evolution.h"
109 #include "tree-ssa-loop.h"
110 #include "tree-ssa-loop-niter.h"
111 #include "tree-ssa-loop-ivopts.h"
112 #include "tree-ssa-address.h"
114 #include "tree-hash-traits.h"
116 #include "builtins.h"
119 #include "internal-fn.h"
120 #include "fold-const.h"
122 /* Only handle PHIs with no more arguments unless we are asked to by
124 #define MAX_PHI_ARG_NUM \
125 ((unsigned) PARAM_VALUE (PARAM_MAX_TREE_IF_CONVERSION_PHI_ARGS))
127 /* True if we've converted a statement that was only executed when some
128 condition C was true, and if for correctness we need to predicate the
129 statement to ensure that it is a no-op when C is false. See
130 predicate_statements for the kinds of predication we support. */
131 static bool need_to_predicate
;
133 /* Indicate if there are any complicated PHIs that need to be handled in
134 if-conversion. Complicated PHI has more than two arguments and can't
135 be degenerated to two arguments PHI. See more information in comment
136 before phi_convertible_by_degenerating_args. */
137 static bool any_complicated_phi
;
139 /* Hash for struct innermost_loop_behavior. It depends on the user to
142 struct innermost_loop_behavior_hash
: nofree_ptr_hash
<innermost_loop_behavior
>
144 static inline hashval_t
hash (const value_type
&);
145 static inline bool equal (const value_type
&,
146 const compare_type
&);
150 innermost_loop_behavior_hash::hash (const value_type
&e
)
154 hash
= iterative_hash_expr (e
->base_address
, 0);
155 hash
= iterative_hash_expr (e
->offset
, hash
);
156 hash
= iterative_hash_expr (e
->init
, hash
);
157 return iterative_hash_expr (e
->step
, hash
);
161 innermost_loop_behavior_hash::equal (const value_type
&e1
,
162 const compare_type
&e2
)
164 if ((e1
->base_address
&& !e2
->base_address
)
165 || (!e1
->base_address
&& e2
->base_address
)
166 || (!e1
->offset
&& e2
->offset
)
167 || (e1
->offset
&& !e2
->offset
)
168 || (!e1
->init
&& e2
->init
)
169 || (e1
->init
&& !e2
->init
)
170 || (!e1
->step
&& e2
->step
)
171 || (e1
->step
&& !e2
->step
))
174 if (e1
->base_address
&& e2
->base_address
175 && !operand_equal_p (e1
->base_address
, e2
->base_address
, 0))
177 if (e1
->offset
&& e2
->offset
178 && !operand_equal_p (e1
->offset
, e2
->offset
, 0))
180 if (e1
->init
&& e2
->init
181 && !operand_equal_p (e1
->init
, e2
->init
, 0))
183 if (e1
->step
&& e2
->step
184 && !operand_equal_p (e1
->step
, e2
->step
, 0))
190 /* List of basic blocks in if-conversion-suitable order. */
191 static basic_block
*ifc_bbs
;
193 /* Hash table to store <DR's innermost loop behavior, DR> pairs. */
194 static hash_map
<innermost_loop_behavior_hash
,
195 data_reference_p
> *innermost_DR_map
;
197 /* Hash table to store <base reference, DR> pairs. */
198 static hash_map
<tree_operand_hash
, data_reference_p
> *baseref_DR_map
;
200 /* List of redundant SSA names: the first should be replaced by the second. */
201 static vec
< std::pair
<tree
, tree
> > redundant_ssa_names
;
203 /* Structure used to predicate basic blocks. This is attached to the
204 ->aux field of the BBs in the loop to be if-converted. */
205 struct bb_predicate
{
207 /* The condition under which this basic block is executed. */
210 /* PREDICATE is gimplified, and the sequence of statements is
211 recorded here, in order to avoid the duplication of computations
212 that occur in previous conditions. See PR44483. */
213 gimple_seq predicate_gimplified_stmts
;
216 /* Returns true when the basic block BB has a predicate. */
219 bb_has_predicate (basic_block bb
)
221 return bb
->aux
!= NULL
;
224 /* Returns the gimplified predicate for basic block BB. */
227 bb_predicate (basic_block bb
)
229 return ((struct bb_predicate
*) bb
->aux
)->predicate
;
232 /* Sets the gimplified predicate COND for basic block BB. */
235 set_bb_predicate (basic_block bb
, tree cond
)
237 gcc_assert ((TREE_CODE (cond
) == TRUTH_NOT_EXPR
238 && is_gimple_condexpr (TREE_OPERAND (cond
, 0)))
239 || is_gimple_condexpr (cond
));
240 ((struct bb_predicate
*) bb
->aux
)->predicate
= cond
;
243 /* Returns the sequence of statements of the gimplification of the
244 predicate for basic block BB. */
246 static inline gimple_seq
247 bb_predicate_gimplified_stmts (basic_block bb
)
249 return ((struct bb_predicate
*) bb
->aux
)->predicate_gimplified_stmts
;
252 /* Sets the sequence of statements STMTS of the gimplification of the
253 predicate for basic block BB. */
256 set_bb_predicate_gimplified_stmts (basic_block bb
, gimple_seq stmts
)
258 ((struct bb_predicate
*) bb
->aux
)->predicate_gimplified_stmts
= stmts
;
261 /* Adds the sequence of statements STMTS to the sequence of statements
262 of the predicate for basic block BB. */
265 add_bb_predicate_gimplified_stmts (basic_block bb
, gimple_seq stmts
)
267 /* We might have updated some stmts in STMTS via force_gimple_operand
268 calling fold_stmt and that producing multiple stmts. Delink immediate
269 uses so update_ssa after loop versioning doesn't get confused for
270 the not yet inserted predicates.
271 ??? This should go away once we reliably avoid updating stmts
273 for (gimple_stmt_iterator gsi
= gsi_start (stmts
);
274 !gsi_end_p (gsi
); gsi_next (&gsi
))
276 gimple
*stmt
= gsi_stmt (gsi
);
277 delink_stmt_imm_use (stmt
);
278 gimple_set_modified (stmt
, true);
280 gimple_seq_add_seq_without_update
281 (&(((struct bb_predicate
*) bb
->aux
)->predicate_gimplified_stmts
), stmts
);
284 /* Initializes to TRUE the predicate of basic block BB. */
287 init_bb_predicate (basic_block bb
)
289 bb
->aux
= XNEW (struct bb_predicate
);
290 set_bb_predicate_gimplified_stmts (bb
, NULL
);
291 set_bb_predicate (bb
, boolean_true_node
);
294 /* Release the SSA_NAMEs associated with the predicate of basic block BB. */
297 release_bb_predicate (basic_block bb
)
299 gimple_seq stmts
= bb_predicate_gimplified_stmts (bb
);
302 /* Ensure that these stmts haven't yet been added to a bb. */
304 for (gimple_stmt_iterator i
= gsi_start (stmts
);
305 !gsi_end_p (i
); gsi_next (&i
))
306 gcc_assert (! gimple_bb (gsi_stmt (i
)));
309 gimple_seq_discard (stmts
);
310 set_bb_predicate_gimplified_stmts (bb
, NULL
);
314 /* Free the predicate of basic block BB. */
317 free_bb_predicate (basic_block bb
)
319 if (!bb_has_predicate (bb
))
322 release_bb_predicate (bb
);
327 /* Reinitialize predicate of BB with the true predicate. */
330 reset_bb_predicate (basic_block bb
)
332 if (!bb_has_predicate (bb
))
333 init_bb_predicate (bb
);
336 release_bb_predicate (bb
);
337 set_bb_predicate (bb
, boolean_true_node
);
341 /* Returns a new SSA_NAME of type TYPE that is assigned the value of
342 the expression EXPR. Inserts the statement created for this
343 computation before GSI and leaves the iterator GSI at the same
347 ifc_temp_var (tree type
, tree expr
, gimple_stmt_iterator
*gsi
)
349 tree new_name
= make_temp_ssa_name (type
, NULL
, "_ifc_");
350 gimple
*stmt
= gimple_build_assign (new_name
, expr
);
351 gimple_set_vuse (stmt
, gimple_vuse (gsi_stmt (*gsi
)));
352 gsi_insert_before (gsi
, stmt
, GSI_SAME_STMT
);
356 /* Return true when COND is a false predicate. */
359 is_false_predicate (tree cond
)
361 return (cond
!= NULL_TREE
362 && (cond
== boolean_false_node
363 || integer_zerop (cond
)));
366 /* Return true when COND is a true predicate. */
369 is_true_predicate (tree cond
)
371 return (cond
== NULL_TREE
372 || cond
== boolean_true_node
373 || integer_onep (cond
));
376 /* Returns true when BB has a predicate that is not trivial: true or
380 is_predicated (basic_block bb
)
382 return !is_true_predicate (bb_predicate (bb
));
385 /* Parses the predicate COND and returns its comparison code and
386 operands OP0 and OP1. */
388 static enum tree_code
389 parse_predicate (tree cond
, tree
*op0
, tree
*op1
)
393 if (TREE_CODE (cond
) == SSA_NAME
394 && is_gimple_assign (s
= SSA_NAME_DEF_STMT (cond
)))
396 if (TREE_CODE_CLASS (gimple_assign_rhs_code (s
)) == tcc_comparison
)
398 *op0
= gimple_assign_rhs1 (s
);
399 *op1
= gimple_assign_rhs2 (s
);
400 return gimple_assign_rhs_code (s
);
403 else if (gimple_assign_rhs_code (s
) == TRUTH_NOT_EXPR
)
405 tree op
= gimple_assign_rhs1 (s
);
406 tree type
= TREE_TYPE (op
);
407 enum tree_code code
= parse_predicate (op
, op0
, op1
);
409 return code
== ERROR_MARK
? ERROR_MARK
410 : invert_tree_comparison (code
, HONOR_NANS (type
));
416 if (COMPARISON_CLASS_P (cond
))
418 *op0
= TREE_OPERAND (cond
, 0);
419 *op1
= TREE_OPERAND (cond
, 1);
420 return TREE_CODE (cond
);
426 /* Returns the fold of predicate C1 OR C2 at location LOC. */
429 fold_or_predicates (location_t loc
, tree c1
, tree c2
)
431 tree op1a
, op1b
, op2a
, op2b
;
432 enum tree_code code1
= parse_predicate (c1
, &op1a
, &op1b
);
433 enum tree_code code2
= parse_predicate (c2
, &op2a
, &op2b
);
435 if (code1
!= ERROR_MARK
&& code2
!= ERROR_MARK
)
437 tree t
= maybe_fold_or_comparisons (code1
, op1a
, op1b
,
443 return fold_build2_loc (loc
, TRUTH_OR_EXPR
, boolean_type_node
, c1
, c2
);
446 /* Returns either a COND_EXPR or the folded expression if the folded
447 expression is a MIN_EXPR, a MAX_EXPR, an ABS_EXPR,
448 a constant or a SSA_NAME. */
451 fold_build_cond_expr (tree type
, tree cond
, tree rhs
, tree lhs
)
453 tree rhs1
, lhs1
, cond_expr
;
455 /* If COND is comparison r != 0 and r has boolean type, convert COND
456 to SSA_NAME to accept by vect bool pattern. */
457 if (TREE_CODE (cond
) == NE_EXPR
)
459 tree op0
= TREE_OPERAND (cond
, 0);
460 tree op1
= TREE_OPERAND (cond
, 1);
461 if (TREE_CODE (op0
) == SSA_NAME
462 && TREE_CODE (TREE_TYPE (op0
)) == BOOLEAN_TYPE
463 && (integer_zerop (op1
)))
466 cond_expr
= fold_ternary (COND_EXPR
, type
, cond
, rhs
, lhs
);
468 if (cond_expr
== NULL_TREE
)
469 return build3 (COND_EXPR
, type
, cond
, rhs
, lhs
);
471 STRIP_USELESS_TYPE_CONVERSION (cond_expr
);
473 if (is_gimple_val (cond_expr
))
476 if (TREE_CODE (cond_expr
) == ABS_EXPR
)
478 rhs1
= TREE_OPERAND (cond_expr
, 1);
479 STRIP_USELESS_TYPE_CONVERSION (rhs1
);
480 if (is_gimple_val (rhs1
))
481 return build1 (ABS_EXPR
, type
, rhs1
);
484 if (TREE_CODE (cond_expr
) == MIN_EXPR
485 || TREE_CODE (cond_expr
) == MAX_EXPR
)
487 lhs1
= TREE_OPERAND (cond_expr
, 0);
488 STRIP_USELESS_TYPE_CONVERSION (lhs1
);
489 rhs1
= TREE_OPERAND (cond_expr
, 1);
490 STRIP_USELESS_TYPE_CONVERSION (rhs1
);
491 if (is_gimple_val (rhs1
) && is_gimple_val (lhs1
))
492 return build2 (TREE_CODE (cond_expr
), type
, lhs1
, rhs1
);
494 return build3 (COND_EXPR
, type
, cond
, rhs
, lhs
);
497 /* Add condition NC to the predicate list of basic block BB. LOOP is
498 the loop to be if-converted. Use predicate of cd-equivalent block
499 for join bb if it exists: we call basic blocks bb1 and bb2
500 cd-equivalent if they are executed under the same condition. */
503 add_to_predicate_list (struct loop
*loop
, basic_block bb
, tree nc
)
508 if (is_true_predicate (nc
))
511 /* If dominance tells us this basic block is always executed,
512 don't record any predicates for it. */
513 if (dominated_by_p (CDI_DOMINATORS
, loop
->latch
, bb
))
516 dom_bb
= get_immediate_dominator (CDI_DOMINATORS
, bb
);
517 /* We use notion of cd equivalence to get simpler predicate for
518 join block, e.g. if join block has 2 predecessors with predicates
519 p1 & p2 and p1 & !p2, we'd like to get p1 for it instead of
520 p1 & p2 | p1 & !p2. */
521 if (dom_bb
!= loop
->header
522 && get_immediate_dominator (CDI_POST_DOMINATORS
, dom_bb
) == bb
)
524 gcc_assert (flow_bb_inside_loop_p (loop
, dom_bb
));
525 bc
= bb_predicate (dom_bb
);
526 if (!is_true_predicate (bc
))
527 set_bb_predicate (bb
, bc
);
529 gcc_assert (is_true_predicate (bb_predicate (bb
)));
530 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
531 fprintf (dump_file
, "Use predicate of bb#%d for bb#%d\n",
532 dom_bb
->index
, bb
->index
);
536 if (!is_predicated (bb
))
540 bc
= bb_predicate (bb
);
541 bc
= fold_or_predicates (EXPR_LOCATION (bc
), nc
, bc
);
542 if (is_true_predicate (bc
))
544 reset_bb_predicate (bb
);
549 /* Allow a TRUTH_NOT_EXPR around the main predicate. */
550 if (TREE_CODE (bc
) == TRUTH_NOT_EXPR
)
551 tp
= &TREE_OPERAND (bc
, 0);
554 if (!is_gimple_condexpr (*tp
))
557 *tp
= force_gimple_operand_1 (*tp
, &stmts
, is_gimple_condexpr
, NULL_TREE
);
558 add_bb_predicate_gimplified_stmts (bb
, stmts
);
560 set_bb_predicate (bb
, bc
);
563 /* Add the condition COND to the previous condition PREV_COND, and add
564 this to the predicate list of the destination of edge E. LOOP is
565 the loop to be if-converted. */
568 add_to_dst_predicate_list (struct loop
*loop
, edge e
,
569 tree prev_cond
, tree cond
)
571 if (!flow_bb_inside_loop_p (loop
, e
->dest
))
574 if (!is_true_predicate (prev_cond
))
575 cond
= fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
,
578 if (!dominated_by_p (CDI_DOMINATORS
, loop
->latch
, e
->dest
))
579 add_to_predicate_list (loop
, e
->dest
, cond
);
582 /* Return true if one of the successor edges of BB exits LOOP. */
585 bb_with_exit_edge_p (struct loop
*loop
, basic_block bb
)
590 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
591 if (loop_exit_edge_p (loop
, e
))
597 /* Given PHI which has more than two arguments, this function checks if
598 it's if-convertible by degenerating its arguments. Specifically, if
599 below two conditions are satisfied:
601 1) Number of PHI arguments with different values equals to 2 and one
602 argument has the only occurrence.
603 2) The edge corresponding to the unique argument isn't critical edge.
605 Such PHI can be handled as PHIs have only two arguments. For example,
608 res = PHI <A_1(e1), A_1(e2), A_2(e3)>;
610 can be transformed into:
612 res = (predicate of e3) ? A_2 : A_1;
614 Return TRUE if it is the case, FALSE otherwise. */
617 phi_convertible_by_degenerating_args (gphi
*phi
)
620 tree arg
, t1
= NULL
, t2
= NULL
;
621 unsigned int i
, i1
= 0, i2
= 0, n1
= 0, n2
= 0;
622 unsigned int num_args
= gimple_phi_num_args (phi
);
624 gcc_assert (num_args
> 2);
626 for (i
= 0; i
< num_args
; i
++)
628 arg
= gimple_phi_arg_def (phi
, i
);
629 if (t1
== NULL
|| operand_equal_p (t1
, arg
, 0))
635 else if (t2
== NULL
|| operand_equal_p (t2
, arg
, 0))
645 if (n1
!= 1 && n2
!= 1)
648 /* Check if the edge corresponding to the unique arg is critical. */
649 e
= gimple_phi_arg_edge (phi
, (n1
== 1) ? i1
: i2
);
650 if (EDGE_COUNT (e
->src
->succs
) > 1)
656 /* Return true when PHI is if-convertible. PHI is part of loop LOOP
657 and it belongs to basic block BB. Note at this point, it is sure
658 that PHI is if-convertible. This function updates global variable
659 ANY_COMPLICATED_PHI if PHI is complicated. */
662 if_convertible_phi_p (struct loop
*loop
, basic_block bb
, gphi
*phi
)
664 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
666 fprintf (dump_file
, "-------------------------\n");
667 print_gimple_stmt (dump_file
, phi
, 0, TDF_SLIM
);
670 if (bb
!= loop
->header
671 && gimple_phi_num_args (phi
) > 2
672 && !phi_convertible_by_degenerating_args (phi
))
673 any_complicated_phi
= true;
678 /* Records the status of a data reference. This struct is attached to
679 each DR->aux field. */
682 bool rw_unconditionally
;
683 bool w_unconditionally
;
684 bool written_at_least_once
;
688 tree base_w_predicate
;
691 #define IFC_DR(DR) ((struct ifc_dr *) (DR)->aux)
692 #define DR_BASE_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->written_at_least_once)
693 #define DR_RW_UNCONDITIONALLY(DR) (IFC_DR (DR)->rw_unconditionally)
694 #define DR_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->w_unconditionally)
696 /* Iterates over DR's and stores refs, DR and base refs, DR pairs in
697 HASH tables. While storing them in HASH table, it checks if the
698 reference is unconditionally read or written and stores that as a flag
699 information. For base reference it checks if it is written atlest once
700 unconditionally and stores it as flag information along with DR.
701 In other words for every data reference A in STMT there exist other
702 accesses to a data reference with the same base with predicates that
703 add up (OR-up) to the true predicate: this ensures that the data
704 reference A is touched (read or written) on every iteration of the
705 if-converted loop. */
707 hash_memrefs_baserefs_and_store_DRs_read_written_info (data_reference_p a
)
710 data_reference_p
*master_dr
, *base_master_dr
;
711 tree base_ref
= DR_BASE_OBJECT (a
);
712 innermost_loop_behavior
*innermost
= &DR_INNERMOST (a
);
713 tree ca
= bb_predicate (gimple_bb (DR_STMT (a
)));
716 master_dr
= &innermost_DR_map
->get_or_insert (innermost
, &exist1
);
722 IFC_DR (*master_dr
)->w_predicate
723 = fold_or_predicates (UNKNOWN_LOCATION
, ca
,
724 IFC_DR (*master_dr
)->w_predicate
);
725 if (is_true_predicate (IFC_DR (*master_dr
)->w_predicate
))
726 DR_W_UNCONDITIONALLY (*master_dr
) = true;
728 IFC_DR (*master_dr
)->rw_predicate
729 = fold_or_predicates (UNKNOWN_LOCATION
, ca
,
730 IFC_DR (*master_dr
)->rw_predicate
);
731 if (is_true_predicate (IFC_DR (*master_dr
)->rw_predicate
))
732 DR_RW_UNCONDITIONALLY (*master_dr
) = true;
736 base_master_dr
= &baseref_DR_map
->get_or_insert (base_ref
, &exist2
);
739 IFC_DR (*base_master_dr
)->base_w_predicate
740 = fold_or_predicates (UNKNOWN_LOCATION
, ca
,
741 IFC_DR (*base_master_dr
)->base_w_predicate
);
742 if (is_true_predicate (IFC_DR (*base_master_dr
)->base_w_predicate
))
743 DR_BASE_W_UNCONDITIONALLY (*base_master_dr
) = true;
747 /* Return TRUE if can prove the index IDX of an array reference REF is
748 within array bound. Return false otherwise. */
751 idx_within_array_bound (tree ref
, tree
*idx
, void *dta
)
753 wi::overflow_type overflow
;
754 widest_int niter
, valid_niter
, delta
, wi_step
;
757 struct loop
*loop
= (struct loop
*) dta
;
759 /* Only support within-bound access for array references. */
760 if (TREE_CODE (ref
) != ARRAY_REF
)
763 /* For arrays at the end of the structure, we are not guaranteed that they
764 do not really extend over their declared size. However, for arrays of
765 size greater than one, this is unlikely to be intended. */
766 if (array_at_struct_end_p (ref
))
769 ev
= analyze_scalar_evolution (loop
, *idx
);
770 ev
= instantiate_parameters (loop
, ev
);
771 init
= initial_condition (ev
);
772 step
= evolution_part_in_loop_num (ev
, loop
->num
);
774 if (!init
|| TREE_CODE (init
) != INTEGER_CST
775 || (step
&& TREE_CODE (step
) != INTEGER_CST
))
778 low
= array_ref_low_bound (ref
);
779 high
= array_ref_up_bound (ref
);
781 /* The case of nonconstant bounds could be handled, but it would be
783 if (TREE_CODE (low
) != INTEGER_CST
784 || !high
|| TREE_CODE (high
) != INTEGER_CST
)
787 /* Check if the intial idx is within bound. */
788 if (wi::to_widest (init
) < wi::to_widest (low
)
789 || wi::to_widest (init
) > wi::to_widest (high
))
792 /* The idx is always within bound. */
793 if (!step
|| integer_zerop (step
))
796 if (!max_loop_iterations (loop
, &niter
))
799 if (wi::to_widest (step
) < 0)
801 delta
= wi::to_widest (init
) - wi::to_widest (low
);
802 wi_step
= -wi::to_widest (step
);
806 delta
= wi::to_widest (high
) - wi::to_widest (init
);
807 wi_step
= wi::to_widest (step
);
810 valid_niter
= wi::div_floor (delta
, wi_step
, SIGNED
, &overflow
);
811 /* The iteration space of idx is within array bound. */
812 if (!overflow
&& niter
<= valid_niter
)
818 /* Return TRUE if ref is a within bound array reference. */
821 ref_within_array_bound (gimple
*stmt
, tree ref
)
823 struct loop
*loop
= loop_containing_stmt (stmt
);
825 gcc_assert (loop
!= NULL
);
826 return for_each_index (&ref
, idx_within_array_bound
, loop
);
830 /* Given a memory reference expression T, return TRUE if base object
831 it refers to is writable. The base object of a memory reference
832 is the main object being referenced, which is returned by function
836 base_object_writable (tree ref
)
838 tree base_tree
= get_base_address (ref
);
841 && DECL_P (base_tree
)
842 && decl_binds_to_current_def_p (base_tree
)
843 && !TREE_READONLY (base_tree
));
846 /* Return true when the memory references of STMT won't trap in the
847 if-converted code. There are two things that we have to check for:
849 - writes to memory occur to writable memory: if-conversion of
850 memory writes transforms the conditional memory writes into
851 unconditional writes, i.e. "if (cond) A[i] = foo" is transformed
852 into "A[i] = cond ? foo : A[i]", and as the write to memory may not
853 be executed at all in the original code, it may be a readonly
854 memory. To check that A is not const-qualified, we check that
855 there exists at least an unconditional write to A in the current
858 - reads or writes to memory are valid memory accesses for every
859 iteration. To check that the memory accesses are correctly formed
860 and that we are allowed to read and write in these locations, we
861 check that the memory accesses to be if-converted occur at every
862 iteration unconditionally.
864 Returns true for the memory reference in STMT, same memory reference
865 is read or written unconditionally atleast once and the base memory
866 reference is written unconditionally once. This is to check reference
867 will not write fault. Also retuns true if the memory reference is
868 unconditionally read once then we are conditionally writing to memory
869 which is defined as read and write and is bound to the definition
872 ifcvt_memrefs_wont_trap (gimple
*stmt
, vec
<data_reference_p
> drs
)
874 /* If DR didn't see a reference here we can't use it to tell
875 whether the ref traps or not. */
876 if (gimple_uid (stmt
) == 0)
879 data_reference_p
*master_dr
, *base_master_dr
;
880 data_reference_p a
= drs
[gimple_uid (stmt
) - 1];
882 tree base
= DR_BASE_OBJECT (a
);
883 innermost_loop_behavior
*innermost
= &DR_INNERMOST (a
);
885 gcc_assert (DR_STMT (a
) == stmt
);
886 gcc_assert (DR_BASE_ADDRESS (a
) || DR_OFFSET (a
)
887 || DR_INIT (a
) || DR_STEP (a
));
889 master_dr
= innermost_DR_map
->get (innermost
);
890 gcc_assert (master_dr
!= NULL
);
892 base_master_dr
= baseref_DR_map
->get (base
);
894 /* If a is unconditionally written to it doesn't trap. */
895 if (DR_W_UNCONDITIONALLY (*master_dr
))
898 /* If a is unconditionally accessed then ...
900 Even a is conditional access, we can treat it as an unconditional
901 one if it's an array reference and all its index are within array
903 if (DR_RW_UNCONDITIONALLY (*master_dr
)
904 || ref_within_array_bound (stmt
, DR_REF (a
)))
906 /* an unconditional read won't trap. */
910 /* an unconditionaly write won't trap if the base is written
911 to unconditionally. */
913 && DR_BASE_W_UNCONDITIONALLY (*base_master_dr
))
914 return PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES
);
915 /* or the base is known to be not readonly. */
916 else if (base_object_writable (DR_REF (a
)))
917 return PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES
);
923 /* Return true if STMT could be converted into a masked load or store
924 (conditional load or store based on a mask computed from bb predicate). */
927 ifcvt_can_use_mask_load_store (gimple
*stmt
)
929 /* Check whether this is a load or store. */
930 tree lhs
= gimple_assign_lhs (stmt
);
933 if (gimple_store_p (stmt
))
935 if (!is_gimple_val (gimple_assign_rhs1 (stmt
)))
940 else if (gimple_assign_load_p (stmt
))
943 ref
= gimple_assign_rhs1 (stmt
);
948 if (may_be_nonaddressable_p (ref
))
951 /* Mask should be integer mode of the same size as the load/store
953 machine_mode mode
= TYPE_MODE (TREE_TYPE (lhs
));
954 if (!int_mode_for_mode (mode
).exists () || VECTOR_MODE_P (mode
))
957 if (can_vec_mask_load_store_p (mode
, VOIDmode
, is_load
))
963 /* Return true if STMT could be converted from an operation that is
964 unconditional to one that is conditional on a bb predicate mask. */
967 ifcvt_can_predicate (gimple
*stmt
)
969 basic_block bb
= gimple_bb (stmt
);
971 if (!(flag_tree_loop_vectorize
|| bb
->loop_father
->force_vectorize
)
972 || bb
->loop_father
->dont_vectorize
973 || gimple_has_volatile_ops (stmt
))
976 if (gimple_assign_single_p (stmt
))
977 return ifcvt_can_use_mask_load_store (stmt
);
979 tree_code code
= gimple_assign_rhs_code (stmt
);
980 tree lhs_type
= TREE_TYPE (gimple_assign_lhs (stmt
));
981 tree rhs_type
= TREE_TYPE (gimple_assign_rhs1 (stmt
));
982 if (!types_compatible_p (lhs_type
, rhs_type
))
984 internal_fn cond_fn
= get_conditional_internal_fn (code
);
985 return (cond_fn
!= IFN_LAST
986 && vectorized_internal_fn_supported_p (cond_fn
, lhs_type
));
989 /* Return true when STMT is if-convertible.
991 GIMPLE_ASSIGN statement is not if-convertible if,
994 - LHS is not var decl. */
997 if_convertible_gimple_assign_stmt_p (gimple
*stmt
,
998 vec
<data_reference_p
> refs
)
1000 tree lhs
= gimple_assign_lhs (stmt
);
1002 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1004 fprintf (dump_file
, "-------------------------\n");
1005 print_gimple_stmt (dump_file
, stmt
, 0, TDF_SLIM
);
1008 if (!is_gimple_reg_type (TREE_TYPE (lhs
)))
1011 /* Some of these constrains might be too conservative. */
1012 if (stmt_ends_bb_p (stmt
)
1013 || gimple_has_volatile_ops (stmt
)
1014 || (TREE_CODE (lhs
) == SSA_NAME
1015 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs
))
1016 || gimple_has_side_effects (stmt
))
1018 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1019 fprintf (dump_file
, "stmt not suitable for ifcvt\n");
1023 /* tree-into-ssa.c uses GF_PLF_1, so avoid it, because
1024 in between if_convertible_loop_p and combine_blocks
1025 we can perform loop versioning. */
1026 gimple_set_plf (stmt
, GF_PLF_2
, false);
1028 if ((! gimple_vuse (stmt
)
1029 || gimple_could_trap_p_1 (stmt
, false, false)
1030 || ! ifcvt_memrefs_wont_trap (stmt
, refs
))
1031 && gimple_could_trap_p (stmt
))
1033 if (ifcvt_can_predicate (stmt
))
1035 gimple_set_plf (stmt
, GF_PLF_2
, true);
1036 need_to_predicate
= true;
1039 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1040 fprintf (dump_file
, "tree could trap...\n");
1044 /* When if-converting stores force versioning, likewise if we
1045 ended up generating store data races. */
1046 if (gimple_vdef (stmt
))
1047 need_to_predicate
= true;
1052 /* Return true when STMT is if-convertible.
1054 A statement is if-convertible if:
1055 - it is an if-convertible GIMPLE_ASSIGN,
1056 - it is a GIMPLE_LABEL or a GIMPLE_COND,
1057 - it is builtins call. */
1060 if_convertible_stmt_p (gimple
*stmt
, vec
<data_reference_p
> refs
)
1062 switch (gimple_code (stmt
))
1070 return if_convertible_gimple_assign_stmt_p (stmt
, refs
);
1074 tree fndecl
= gimple_call_fndecl (stmt
);
1077 int flags
= gimple_call_flags (stmt
);
1078 if ((flags
& ECF_CONST
)
1079 && !(flags
& ECF_LOOPING_CONST_OR_PURE
)
1080 /* We can only vectorize some builtins at the moment,
1081 so restrict if-conversion to those. */
1082 && fndecl_built_in_p (fndecl
))
1089 /* Don't know what to do with 'em so don't do anything. */
1090 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1092 fprintf (dump_file
, "don't know what to do\n");
1093 print_gimple_stmt (dump_file
, stmt
, 0, TDF_SLIM
);
1101 /* Assumes that BB has more than 1 predecessors.
1102 Returns false if at least one successor is not on critical edge
1103 and true otherwise. */
1106 all_preds_critical_p (basic_block bb
)
1111 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1112 if (EDGE_COUNT (e
->src
->succs
) == 1)
1117 /* Return true when BB is if-convertible. This routine does not check
1118 basic block's statements and phis.
1120 A basic block is not if-convertible if:
1121 - it is non-empty and it is after the exit block (in BFS order),
1122 - it is after the exit block but before the latch,
1123 - its edges are not normal.
1125 EXIT_BB is the basic block containing the exit of the LOOP. BB is
1129 if_convertible_bb_p (struct loop
*loop
, basic_block bb
, basic_block exit_bb
)
1134 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1135 fprintf (dump_file
, "----------[%d]-------------\n", bb
->index
);
1137 if (EDGE_COUNT (bb
->succs
) > 2)
1142 if (bb
!= loop
->latch
)
1144 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1145 fprintf (dump_file
, "basic block after exit bb but before latch\n");
1148 else if (!empty_block_p (bb
))
1150 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1151 fprintf (dump_file
, "non empty basic block after exit bb\n");
1154 else if (bb
== loop
->latch
1156 && !dominated_by_p (CDI_DOMINATORS
, bb
, exit_bb
))
1158 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1159 fprintf (dump_file
, "latch is not dominated by exit_block\n");
1164 /* Be less adventurous and handle only normal edges. */
1165 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1166 if (e
->flags
& (EDGE_EH
| EDGE_ABNORMAL
| EDGE_IRREDUCIBLE_LOOP
))
1168 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1169 fprintf (dump_file
, "Difficult to handle edges\n");
1176 /* Return true when all predecessor blocks of BB are visited. The
1177 VISITED bitmap keeps track of the visited blocks. */
1180 pred_blocks_visited_p (basic_block bb
, bitmap
*visited
)
1184 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1185 if (!bitmap_bit_p (*visited
, e
->src
->index
))
1191 /* Get body of a LOOP in suitable order for if-conversion. It is
1192 caller's responsibility to deallocate basic block list.
1193 If-conversion suitable order is, breadth first sort (BFS) order
1194 with an additional constraint: select a block only if all its
1195 predecessors are already selected. */
1197 static basic_block
*
1198 get_loop_body_in_if_conv_order (const struct loop
*loop
)
1200 basic_block
*blocks
, *blocks_in_bfs_order
;
1203 unsigned int index
= 0;
1204 unsigned int visited_count
= 0;
1206 gcc_assert (loop
->num_nodes
);
1207 gcc_assert (loop
->latch
!= EXIT_BLOCK_PTR_FOR_FN (cfun
));
1209 blocks
= XCNEWVEC (basic_block
, loop
->num_nodes
);
1210 visited
= BITMAP_ALLOC (NULL
);
1212 blocks_in_bfs_order
= get_loop_body_in_bfs_order (loop
);
1215 while (index
< loop
->num_nodes
)
1217 bb
= blocks_in_bfs_order
[index
];
1219 if (bb
->flags
& BB_IRREDUCIBLE_LOOP
)
1221 free (blocks_in_bfs_order
);
1222 BITMAP_FREE (visited
);
1227 if (!bitmap_bit_p (visited
, bb
->index
))
1229 if (pred_blocks_visited_p (bb
, &visited
)
1230 || bb
== loop
->header
)
1232 /* This block is now visited. */
1233 bitmap_set_bit (visited
, bb
->index
);
1234 blocks
[visited_count
++] = bb
;
1240 if (index
== loop
->num_nodes
1241 && visited_count
!= loop
->num_nodes
)
1245 free (blocks_in_bfs_order
);
1246 BITMAP_FREE (visited
);
1250 /* Returns true when the analysis of the predicates for all the basic
1251 blocks in LOOP succeeded.
1253 predicate_bbs first allocates the predicates of the basic blocks.
1254 These fields are then initialized with the tree expressions
1255 representing the predicates under which a basic block is executed
1256 in the LOOP. As the loop->header is executed at each iteration, it
1257 has the "true" predicate. Other statements executed under a
1258 condition are predicated with that condition, for example
1265 S1 will be predicated with "x", and
1266 S2 will be predicated with "!x". */
1269 predicate_bbs (loop_p loop
)
1273 for (i
= 0; i
< loop
->num_nodes
; i
++)
1274 init_bb_predicate (ifc_bbs
[i
]);
1276 for (i
= 0; i
< loop
->num_nodes
; i
++)
1278 basic_block bb
= ifc_bbs
[i
];
1282 /* The loop latch and loop exit block are always executed and
1283 have no extra conditions to be processed: skip them. */
1284 if (bb
== loop
->latch
1285 || bb_with_exit_edge_p (loop
, bb
))
1287 reset_bb_predicate (bb
);
1291 cond
= bb_predicate (bb
);
1292 stmt
= last_stmt (bb
);
1293 if (stmt
&& gimple_code (stmt
) == GIMPLE_COND
)
1296 edge true_edge
, false_edge
;
1297 location_t loc
= gimple_location (stmt
);
1298 tree c
= build2_loc (loc
, gimple_cond_code (stmt
),
1300 gimple_cond_lhs (stmt
),
1301 gimple_cond_rhs (stmt
));
1303 /* Add new condition into destination's predicate list. */
1304 extract_true_false_edges_from_block (gimple_bb (stmt
),
1305 &true_edge
, &false_edge
);
1307 /* If C is true, then TRUE_EDGE is taken. */
1308 add_to_dst_predicate_list (loop
, true_edge
, unshare_expr (cond
),
1311 /* If C is false, then FALSE_EDGE is taken. */
1312 c2
= build1_loc (loc
, TRUTH_NOT_EXPR
, boolean_type_node
,
1314 add_to_dst_predicate_list (loop
, false_edge
,
1315 unshare_expr (cond
), c2
);
1320 /* If current bb has only one successor, then consider it as an
1321 unconditional goto. */
1322 if (single_succ_p (bb
))
1324 basic_block bb_n
= single_succ (bb
);
1326 /* The successor bb inherits the predicate of its
1327 predecessor. If there is no predicate in the predecessor
1328 bb, then consider the successor bb as always executed. */
1329 if (cond
== NULL_TREE
)
1330 cond
= boolean_true_node
;
1332 add_to_predicate_list (loop
, bb_n
, cond
);
1336 /* The loop header is always executed. */
1337 reset_bb_predicate (loop
->header
);
1338 gcc_assert (bb_predicate_gimplified_stmts (loop
->header
) == NULL
1339 && bb_predicate_gimplified_stmts (loop
->latch
) == NULL
);
1342 /* Build region by adding loop pre-header and post-header blocks. */
1344 static vec
<basic_block
>
1345 build_region (struct loop
*loop
)
1347 vec
<basic_block
> region
= vNULL
;
1348 basic_block exit_bb
= NULL
;
1350 gcc_assert (ifc_bbs
);
1351 /* The first element is loop pre-header. */
1352 region
.safe_push (loop_preheader_edge (loop
)->src
);
1354 for (unsigned int i
= 0; i
< loop
->num_nodes
; i
++)
1356 basic_block bb
= ifc_bbs
[i
];
1357 region
.safe_push (bb
);
1358 /* Find loop postheader. */
1361 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1362 if (loop_exit_edge_p (loop
, e
))
1368 /* The last element is loop post-header. */
1369 gcc_assert (exit_bb
);
1370 region
.safe_push (exit_bb
);
1374 /* Return true when LOOP is if-convertible. This is a helper function
1375 for if_convertible_loop_p. REFS and DDRS are initialized and freed
1376 in if_convertible_loop_p. */
1379 if_convertible_loop_p_1 (struct loop
*loop
, vec
<data_reference_p
> *refs
)
1382 basic_block exit_bb
= NULL
;
1383 vec
<basic_block
> region
;
1385 if (find_data_references_in_loop (loop
, refs
) == chrec_dont_know
)
1388 calculate_dominance_info (CDI_DOMINATORS
);
1390 /* Allow statements that can be handled during if-conversion. */
1391 ifc_bbs
= get_loop_body_in_if_conv_order (loop
);
1394 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1395 fprintf (dump_file
, "Irreducible loop\n");
1399 for (i
= 0; i
< loop
->num_nodes
; i
++)
1401 basic_block bb
= ifc_bbs
[i
];
1403 if (!if_convertible_bb_p (loop
, bb
, exit_bb
))
1406 if (bb_with_exit_edge_p (loop
, bb
))
1410 for (i
= 0; i
< loop
->num_nodes
; i
++)
1412 basic_block bb
= ifc_bbs
[i
];
1413 gimple_stmt_iterator gsi
;
1415 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1416 switch (gimple_code (gsi_stmt (gsi
)))
1423 gimple_set_uid (gsi_stmt (gsi
), 0);
1430 data_reference_p dr
;
1433 = new hash_map
<innermost_loop_behavior_hash
, data_reference_p
>;
1434 baseref_DR_map
= new hash_map
<tree_operand_hash
, data_reference_p
>;
1436 /* Compute post-dominator tree locally. */
1437 region
= build_region (loop
);
1438 calculate_dominance_info_for_region (CDI_POST_DOMINATORS
, region
);
1440 predicate_bbs (loop
);
1442 /* Free post-dominator tree since it is not used after predication. */
1443 free_dominance_info_for_region (cfun
, CDI_POST_DOMINATORS
, region
);
1446 for (i
= 0; refs
->iterate (i
, &dr
); i
++)
1448 tree ref
= DR_REF (dr
);
1450 dr
->aux
= XNEW (struct ifc_dr
);
1451 DR_BASE_W_UNCONDITIONALLY (dr
) = false;
1452 DR_RW_UNCONDITIONALLY (dr
) = false;
1453 DR_W_UNCONDITIONALLY (dr
) = false;
1454 IFC_DR (dr
)->rw_predicate
= boolean_false_node
;
1455 IFC_DR (dr
)->w_predicate
= boolean_false_node
;
1456 IFC_DR (dr
)->base_w_predicate
= boolean_false_node
;
1457 if (gimple_uid (DR_STMT (dr
)) == 0)
1458 gimple_set_uid (DR_STMT (dr
), i
+ 1);
1460 /* If DR doesn't have innermost loop behavior or it's a compound
1461 memory reference, we synthesize its innermost loop behavior
1463 if (TREE_CODE (ref
) == COMPONENT_REF
1464 || TREE_CODE (ref
) == IMAGPART_EXPR
1465 || TREE_CODE (ref
) == REALPART_EXPR
1466 || !(DR_BASE_ADDRESS (dr
) || DR_OFFSET (dr
)
1467 || DR_INIT (dr
) || DR_STEP (dr
)))
1469 while (TREE_CODE (ref
) == COMPONENT_REF
1470 || TREE_CODE (ref
) == IMAGPART_EXPR
1471 || TREE_CODE (ref
) == REALPART_EXPR
)
1472 ref
= TREE_OPERAND (ref
, 0);
1474 memset (&DR_INNERMOST (dr
), 0, sizeof (DR_INNERMOST (dr
)));
1475 DR_BASE_ADDRESS (dr
) = ref
;
1477 hash_memrefs_baserefs_and_store_DRs_read_written_info (dr
);
1480 for (i
= 0; i
< loop
->num_nodes
; i
++)
1482 basic_block bb
= ifc_bbs
[i
];
1483 gimple_stmt_iterator itr
;
1485 /* Check the if-convertibility of statements in predicated BBs. */
1486 if (!dominated_by_p (CDI_DOMINATORS
, loop
->latch
, bb
))
1487 for (itr
= gsi_start_bb (bb
); !gsi_end_p (itr
); gsi_next (&itr
))
1488 if (!if_convertible_stmt_p (gsi_stmt (itr
), *refs
))
1492 /* Checking PHIs needs to be done after stmts, as the fact whether there
1493 are any masked loads or stores affects the tests. */
1494 for (i
= 0; i
< loop
->num_nodes
; i
++)
1496 basic_block bb
= ifc_bbs
[i
];
1499 for (itr
= gsi_start_phis (bb
); !gsi_end_p (itr
); gsi_next (&itr
))
1500 if (!if_convertible_phi_p (loop
, bb
, itr
.phi ()))
1505 fprintf (dump_file
, "Applying if-conversion\n");
1510 /* Return true when LOOP is if-convertible.
1511 LOOP is if-convertible if:
1513 - it has two or more basic blocks,
1514 - it has only one exit,
1515 - loop header is not the exit edge,
1516 - if its basic blocks and phi nodes are if convertible. */
1519 if_convertible_loop_p (struct loop
*loop
)
1524 vec
<data_reference_p
> refs
;
1526 /* Handle only innermost loop. */
1527 if (!loop
|| loop
->inner
)
1529 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1530 fprintf (dump_file
, "not innermost loop\n");
1534 /* If only one block, no need for if-conversion. */
1535 if (loop
->num_nodes
<= 2)
1537 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1538 fprintf (dump_file
, "less than 2 basic blocks\n");
1542 /* More than one loop exit is too much to handle. */
1543 if (!single_exit (loop
))
1545 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1546 fprintf (dump_file
, "multiple exits\n");
1550 /* If one of the loop header's edge is an exit edge then do not
1551 apply if-conversion. */
1552 FOR_EACH_EDGE (e
, ei
, loop
->header
->succs
)
1553 if (loop_exit_edge_p (loop
, e
))
1557 res
= if_convertible_loop_p_1 (loop
, &refs
);
1559 data_reference_p dr
;
1561 for (i
= 0; refs
.iterate (i
, &dr
); i
++)
1564 free_data_refs (refs
);
1566 delete innermost_DR_map
;
1567 innermost_DR_map
= NULL
;
1569 delete baseref_DR_map
;
1570 baseref_DR_map
= NULL
;
1575 /* Returns true if def-stmt for phi argument ARG is simple increment/decrement
1576 which is in predicated basic block.
1577 In fact, the following PHI pattern is searching:
1579 reduc_1 = PHI <..., reduc_2>
1583 reduc_2 = PHI <reduc_1, reduc_3>
1585 ARG_0 and ARG_1 are correspondent PHI arguments.
1586 REDUC, OP0 and OP1 contain reduction stmt and its operands.
1587 EXTENDED is true if PHI has > 2 arguments. */
1590 is_cond_scalar_reduction (gimple
*phi
, gimple
**reduc
, tree arg_0
, tree arg_1
,
1591 tree
*op0
, tree
*op1
, bool extended
)
1593 tree lhs
, r_op1
, r_op2
;
1595 gimple
*header_phi
= NULL
;
1596 enum tree_code reduction_op
;
1597 basic_block bb
= gimple_bb (phi
);
1598 struct loop
*loop
= bb
->loop_father
;
1599 edge latch_e
= loop_latch_edge (loop
);
1600 imm_use_iterator imm_iter
;
1601 use_operand_p use_p
;
1604 bool result
= false;
1605 if (TREE_CODE (arg_0
) != SSA_NAME
|| TREE_CODE (arg_1
) != SSA_NAME
)
1608 if (!extended
&& gimple_code (SSA_NAME_DEF_STMT (arg_0
)) == GIMPLE_PHI
)
1611 header_phi
= SSA_NAME_DEF_STMT (arg_0
);
1612 stmt
= SSA_NAME_DEF_STMT (arg_1
);
1614 else if (gimple_code (SSA_NAME_DEF_STMT (arg_1
)) == GIMPLE_PHI
)
1617 header_phi
= SSA_NAME_DEF_STMT (arg_1
);
1618 stmt
= SSA_NAME_DEF_STMT (arg_0
);
1622 if (gimple_bb (header_phi
) != loop
->header
)
1625 if (PHI_ARG_DEF_FROM_EDGE (header_phi
, latch_e
) != PHI_RESULT (phi
))
1628 if (gimple_code (stmt
) != GIMPLE_ASSIGN
1629 || gimple_has_volatile_ops (stmt
))
1632 if (!flow_bb_inside_loop_p (loop
, gimple_bb (stmt
)))
1635 if (!is_predicated (gimple_bb (stmt
)))
1638 /* Check that stmt-block is predecessor of phi-block. */
1639 FOR_EACH_EDGE (e
, ei
, gimple_bb (stmt
)->succs
)
1648 if (!has_single_use (lhs
))
1651 reduction_op
= gimple_assign_rhs_code (stmt
);
1652 if (reduction_op
!= PLUS_EXPR
&& reduction_op
!= MINUS_EXPR
)
1654 r_op1
= gimple_assign_rhs1 (stmt
);
1655 r_op2
= gimple_assign_rhs2 (stmt
);
1657 /* Make R_OP1 to hold reduction variable. */
1658 if (r_op2
== PHI_RESULT (header_phi
)
1659 && reduction_op
== PLUS_EXPR
)
1660 std::swap (r_op1
, r_op2
);
1661 else if (r_op1
!= PHI_RESULT (header_phi
))
1664 /* Check that R_OP1 is used in reduction stmt or in PHI only. */
1665 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, r_op1
)
1667 gimple
*use_stmt
= USE_STMT (use_p
);
1668 if (is_gimple_debug (use_stmt
))
1670 if (use_stmt
== stmt
)
1672 if (gimple_code (use_stmt
) != GIMPLE_PHI
)
1676 *op0
= r_op1
; *op1
= r_op2
;
1681 /* Converts conditional scalar reduction into unconditional form, e.g.
1683 if (_5 != 0) goto bb_5 else goto bb_6
1689 # res_2 = PHI <res_13(4), res_6(5)>
1692 will be converted into sequence
1693 _ifc__1 = _5 != 0 ? 1 : 0;
1694 res_2 = res_13 + _ifc__1;
1695 Argument SWAP tells that arguments of conditional expression should be
1697 Returns rhs of resulting PHI assignment. */
1700 convert_scalar_cond_reduction (gimple
*reduc
, gimple_stmt_iterator
*gsi
,
1701 tree cond
, tree op0
, tree op1
, bool swap
)
1703 gimple_stmt_iterator stmt_it
;
1706 tree rhs1
= gimple_assign_rhs1 (reduc
);
1707 tree tmp
= make_temp_ssa_name (TREE_TYPE (rhs1
), NULL
, "_ifc_");
1709 tree zero
= build_zero_cst (TREE_TYPE (rhs1
));
1711 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1713 fprintf (dump_file
, "Found cond scalar reduction.\n");
1714 print_gimple_stmt (dump_file
, reduc
, 0, TDF_SLIM
);
1717 /* Build cond expression using COND and constant operand
1718 of reduction rhs. */
1719 c
= fold_build_cond_expr (TREE_TYPE (rhs1
),
1720 unshare_expr (cond
),
1724 /* Create assignment stmt and insert it at GSI. */
1725 new_assign
= gimple_build_assign (tmp
, c
);
1726 gsi_insert_before (gsi
, new_assign
, GSI_SAME_STMT
);
1727 /* Build rhs for unconditional increment/decrement. */
1728 rhs
= fold_build2 (gimple_assign_rhs_code (reduc
),
1729 TREE_TYPE (rhs1
), op0
, tmp
);
1731 /* Delete original reduction stmt. */
1732 stmt_it
= gsi_for_stmt (reduc
);
1733 gsi_remove (&stmt_it
, true);
1734 release_defs (reduc
);
1738 /* Produce condition for all occurrences of ARG in PHI node. */
1741 gen_phi_arg_condition (gphi
*phi
, vec
<int> *occur
,
1742 gimple_stmt_iterator
*gsi
)
1746 tree cond
= NULL_TREE
;
1750 len
= occur
->length ();
1751 gcc_assert (len
> 0);
1752 for (i
= 0; i
< len
; i
++)
1754 e
= gimple_phi_arg_edge (phi
, (*occur
)[i
]);
1755 c
= bb_predicate (e
->src
);
1756 if (is_true_predicate (c
))
1761 c
= force_gimple_operand_gsi_1 (gsi
, unshare_expr (c
),
1762 is_gimple_condexpr
, NULL_TREE
,
1763 true, GSI_SAME_STMT
);
1764 if (cond
!= NULL_TREE
)
1766 /* Must build OR expression. */
1767 cond
= fold_or_predicates (EXPR_LOCATION (c
), c
, cond
);
1768 cond
= force_gimple_operand_gsi_1 (gsi
, unshare_expr (cond
),
1769 is_gimple_condexpr
, NULL_TREE
,
1770 true, GSI_SAME_STMT
);
1775 gcc_assert (cond
!= NULL_TREE
);
1779 /* Local valueization callback that follows all-use SSA edges. */
1782 ifcvt_follow_ssa_use_edges (tree val
)
1787 /* Replace a scalar PHI node with a COND_EXPR using COND as condition.
1788 This routine can handle PHI nodes with more than two arguments.
1791 S1: A = PHI <x1(1), x2(5)>
1793 S2: A = cond ? x1 : x2;
1795 The generated code is inserted at GSI that points to the top of
1796 basic block's statement list.
1797 If PHI node has more than two arguments a chain of conditional
1798 expression is produced. */
1802 predicate_scalar_phi (gphi
*phi
, gimple_stmt_iterator
*gsi
)
1804 gimple
*new_stmt
= NULL
, *reduc
;
1805 tree rhs
, res
, arg0
, arg1
, op0
, op1
, scev
;
1807 unsigned int index0
;
1808 unsigned int max
, args_len
;
1813 res
= gimple_phi_result (phi
);
1814 if (virtual_operand_p (res
))
1817 if ((rhs
= degenerate_phi_result (phi
))
1818 || ((scev
= analyze_scalar_evolution (gimple_bb (phi
)->loop_father
,
1820 && !chrec_contains_undetermined (scev
)
1822 && (rhs
= gimple_phi_arg_def (phi
, 0))))
1824 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1826 fprintf (dump_file
, "Degenerate phi!\n");
1827 print_gimple_stmt (dump_file
, phi
, 0, TDF_SLIM
);
1829 new_stmt
= gimple_build_assign (res
, rhs
);
1830 gsi_insert_before (gsi
, new_stmt
, GSI_SAME_STMT
);
1831 update_stmt (new_stmt
);
1835 bb
= gimple_bb (phi
);
1836 if (EDGE_COUNT (bb
->preds
) == 2)
1838 /* Predicate ordinary PHI node with 2 arguments. */
1839 edge first_edge
, second_edge
;
1840 basic_block true_bb
;
1841 first_edge
= EDGE_PRED (bb
, 0);
1842 second_edge
= EDGE_PRED (bb
, 1);
1843 cond
= bb_predicate (first_edge
->src
);
1844 if (TREE_CODE (cond
) == TRUTH_NOT_EXPR
)
1845 std::swap (first_edge
, second_edge
);
1846 if (EDGE_COUNT (first_edge
->src
->succs
) > 1)
1848 cond
= bb_predicate (second_edge
->src
);
1849 if (TREE_CODE (cond
) == TRUTH_NOT_EXPR
)
1850 cond
= TREE_OPERAND (cond
, 0);
1852 first_edge
= second_edge
;
1855 cond
= bb_predicate (first_edge
->src
);
1856 /* Gimplify the condition to a valid cond-expr conditonal operand. */
1857 cond
= force_gimple_operand_gsi_1 (gsi
, unshare_expr (cond
),
1858 is_gimple_condexpr
, NULL_TREE
,
1859 true, GSI_SAME_STMT
);
1860 true_bb
= first_edge
->src
;
1861 if (EDGE_PRED (bb
, 1)->src
== true_bb
)
1863 arg0
= gimple_phi_arg_def (phi
, 1);
1864 arg1
= gimple_phi_arg_def (phi
, 0);
1868 arg0
= gimple_phi_arg_def (phi
, 0);
1869 arg1
= gimple_phi_arg_def (phi
, 1);
1871 if (is_cond_scalar_reduction (phi
, &reduc
, arg0
, arg1
,
1873 /* Convert reduction stmt into vectorizable form. */
1874 rhs
= convert_scalar_cond_reduction (reduc
, gsi
, cond
, op0
, op1
,
1875 true_bb
!= gimple_bb (reduc
));
1877 /* Build new RHS using selected condition and arguments. */
1878 rhs
= fold_build_cond_expr (TREE_TYPE (res
), unshare_expr (cond
),
1880 new_stmt
= gimple_build_assign (res
, rhs
);
1881 gsi_insert_before (gsi
, new_stmt
, GSI_SAME_STMT
);
1882 gimple_stmt_iterator new_gsi
= gsi_for_stmt (new_stmt
);
1883 if (fold_stmt (&new_gsi
, ifcvt_follow_ssa_use_edges
))
1885 new_stmt
= gsi_stmt (new_gsi
);
1886 update_stmt (new_stmt
);
1889 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1891 fprintf (dump_file
, "new phi replacement stmt\n");
1892 print_gimple_stmt (dump_file
, new_stmt
, 0, TDF_SLIM
);
1897 /* Create hashmap for PHI node which contain vector of argument indexes
1898 having the same value. */
1900 hash_map
<tree_operand_hash
, auto_vec
<int> > phi_arg_map
;
1901 unsigned int num_args
= gimple_phi_num_args (phi
);
1903 /* Vector of different PHI argument values. */
1904 auto_vec
<tree
> args (num_args
);
1906 /* Compute phi_arg_map. */
1907 for (i
= 0; i
< num_args
; i
++)
1911 arg
= gimple_phi_arg_def (phi
, i
);
1912 if (!phi_arg_map
.get (arg
))
1913 args
.quick_push (arg
);
1914 phi_arg_map
.get_or_insert (arg
).safe_push (i
);
1917 /* Determine element with max number of occurrences. */
1920 args_len
= args
.length ();
1921 for (i
= 0; i
< args_len
; i
++)
1924 if ((len
= phi_arg_map
.get (args
[i
])->length ()) > max
)
1931 /* Put element with max number of occurences to the end of ARGS. */
1932 if (max_ind
!= -1 && max_ind
+1 != (int) args_len
)
1933 std::swap (args
[args_len
- 1], args
[max_ind
]);
1935 /* Handle one special case when number of arguments with different values
1936 is equal 2 and one argument has the only occurrence. Such PHI can be
1937 handled as if would have only 2 arguments. */
1938 if (args_len
== 2 && phi_arg_map
.get (args
[0])->length () == 1)
1941 indexes
= phi_arg_map
.get (args
[0]);
1942 index0
= (*indexes
)[0];
1945 e
= gimple_phi_arg_edge (phi
, index0
);
1946 cond
= bb_predicate (e
->src
);
1947 if (TREE_CODE (cond
) == TRUTH_NOT_EXPR
)
1950 cond
= TREE_OPERAND (cond
, 0);
1952 /* Gimplify the condition to a valid cond-expr conditonal operand. */
1953 cond
= force_gimple_operand_gsi_1 (gsi
, unshare_expr (cond
),
1954 is_gimple_condexpr
, NULL_TREE
,
1955 true, GSI_SAME_STMT
);
1956 if (!(is_cond_scalar_reduction (phi
, &reduc
, arg0
, arg1
,
1958 rhs
= fold_build_cond_expr (TREE_TYPE (res
), unshare_expr (cond
),
1962 /* Convert reduction stmt into vectorizable form. */
1963 rhs
= convert_scalar_cond_reduction (reduc
, gsi
, cond
, op0
, op1
,
1965 new_stmt
= gimple_build_assign (res
, rhs
);
1966 gsi_insert_before (gsi
, new_stmt
, GSI_SAME_STMT
);
1967 update_stmt (new_stmt
);
1973 tree type
= TREE_TYPE (gimple_phi_result (phi
));
1976 for (i
= 0; i
< args_len
; i
++)
1979 indexes
= phi_arg_map
.get (args
[i
]);
1980 if (i
!= args_len
- 1)
1981 lhs
= make_temp_ssa_name (type
, NULL
, "_ifc_");
1984 cond
= gen_phi_arg_condition (phi
, indexes
, gsi
);
1985 rhs
= fold_build_cond_expr (type
, unshare_expr (cond
),
1987 new_stmt
= gimple_build_assign (lhs
, rhs
);
1988 gsi_insert_before (gsi
, new_stmt
, GSI_SAME_STMT
);
1989 update_stmt (new_stmt
);
1994 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1996 fprintf (dump_file
, "new extended phi replacement stmt\n");
1997 print_gimple_stmt (dump_file
, new_stmt
, 0, TDF_SLIM
);
2001 /* Replaces in LOOP all the scalar phi nodes other than those in the
2002 LOOP->header block with conditional modify expressions. */
2005 predicate_all_scalar_phis (struct loop
*loop
)
2008 unsigned int orig_loop_num_nodes
= loop
->num_nodes
;
2011 for (i
= 1; i
< orig_loop_num_nodes
; i
++)
2014 gimple_stmt_iterator gsi
;
2015 gphi_iterator phi_gsi
;
2018 if (bb
== loop
->header
)
2021 phi_gsi
= gsi_start_phis (bb
);
2022 if (gsi_end_p (phi_gsi
))
2025 gsi
= gsi_after_labels (bb
);
2026 while (!gsi_end_p (phi_gsi
))
2028 phi
= phi_gsi
.phi ();
2029 if (virtual_operand_p (gimple_phi_result (phi
)))
2030 gsi_next (&phi_gsi
);
2033 predicate_scalar_phi (phi
, &gsi
);
2034 remove_phi_node (&phi_gsi
, false);
2040 /* Insert in each basic block of LOOP the statements produced by the
2041 gimplification of the predicates. */
2044 insert_gimplified_predicates (loop_p loop
)
2048 for (i
= 0; i
< loop
->num_nodes
; i
++)
2050 basic_block bb
= ifc_bbs
[i
];
2052 if (!is_predicated (bb
))
2053 gcc_assert (bb_predicate_gimplified_stmts (bb
) == NULL
);
2054 if (!is_predicated (bb
))
2056 /* Do not insert statements for a basic block that is not
2057 predicated. Also make sure that the predicate of the
2058 basic block is set to true. */
2059 reset_bb_predicate (bb
);
2063 stmts
= bb_predicate_gimplified_stmts (bb
);
2066 if (need_to_predicate
)
2068 /* Insert the predicate of the BB just after the label,
2069 as the if-conversion of memory writes will use this
2071 gimple_stmt_iterator gsi
= gsi_after_labels (bb
);
2072 gsi_insert_seq_before (&gsi
, stmts
, GSI_SAME_STMT
);
2076 /* Insert the predicate of the BB at the end of the BB
2077 as this would reduce the register pressure: the only
2078 use of this predicate will be in successor BBs. */
2079 gimple_stmt_iterator gsi
= gsi_last_bb (bb
);
2082 || stmt_ends_bb_p (gsi_stmt (gsi
)))
2083 gsi_insert_seq_before (&gsi
, stmts
, GSI_SAME_STMT
);
2085 gsi_insert_seq_after (&gsi
, stmts
, GSI_SAME_STMT
);
2088 /* Once the sequence is code generated, set it to NULL. */
2089 set_bb_predicate_gimplified_stmts (bb
, NULL
);
2094 /* Helper function for predicate_statements. Returns index of existent
2095 mask if it was created for given SIZE and -1 otherwise. */
2098 mask_exists (int size
, vec
<int> vec
)
2102 FOR_EACH_VEC_ELT (vec
, ix
, v
)
2108 /* Helper function for predicate_statements. STMT is a memory read or
2109 write and it needs to be predicated by MASK. Return a statement
2113 predicate_load_or_store (gimple_stmt_iterator
*gsi
, gassign
*stmt
, tree mask
)
2117 tree lhs
= gimple_assign_lhs (stmt
);
2118 tree rhs
= gimple_assign_rhs1 (stmt
);
2119 tree ref
= TREE_CODE (lhs
) == SSA_NAME
? rhs
: lhs
;
2120 mark_addressable (ref
);
2121 tree addr
= force_gimple_operand_gsi (gsi
, build_fold_addr_expr (ref
),
2122 true, NULL_TREE
, true, GSI_SAME_STMT
);
2123 tree ptr
= build_int_cst (reference_alias_ptr_type (ref
),
2124 get_object_alignment (ref
));
2125 /* Copy points-to info if possible. */
2126 if (TREE_CODE (addr
) == SSA_NAME
&& !SSA_NAME_PTR_INFO (addr
))
2127 copy_ref_info (build2 (MEM_REF
, TREE_TYPE (ref
), addr
, ptr
),
2129 if (TREE_CODE (lhs
) == SSA_NAME
)
2132 = gimple_build_call_internal (IFN_MASK_LOAD
, 3, addr
,
2134 gimple_call_set_lhs (new_stmt
, lhs
);
2135 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
2140 = gimple_build_call_internal (IFN_MASK_STORE
, 4, addr
, ptr
,
2142 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
2143 gimple_set_vdef (new_stmt
, gimple_vdef (stmt
));
2144 SSA_NAME_DEF_STMT (gimple_vdef (new_stmt
)) = new_stmt
;
2146 gimple_call_set_nothrow (new_stmt
, true);
2150 /* STMT uses OP_LHS. Check whether it is equivalent to:
2152 ... = OP_MASK ? OP_LHS : X;
2154 Return X if so, otherwise return null. OP_MASK is an SSA_NAME that is
2155 known to have value OP_COND. */
2158 check_redundant_cond_expr (gimple
*stmt
, tree op_mask
, tree op_cond
,
2161 gassign
*assign
= dyn_cast
<gassign
*> (stmt
);
2162 if (!assign
|| gimple_assign_rhs_code (assign
) != COND_EXPR
)
2165 tree use_cond
= gimple_assign_rhs1 (assign
);
2166 tree if_true
= gimple_assign_rhs2 (assign
);
2167 tree if_false
= gimple_assign_rhs3 (assign
);
2169 if ((use_cond
== op_mask
|| operand_equal_p (use_cond
, op_cond
, 0))
2170 && if_true
== op_lhs
)
2173 if (inverse_conditions_p (use_cond
, op_cond
) && if_false
== op_lhs
)
2179 /* Return true if VALUE is available for use at STMT. SSA_NAMES is
2180 the set of SSA names defined earlier in STMT's block. */
2183 value_available_p (gimple
*stmt
, hash_set
<tree_ssa_name_hash
> *ssa_names
,
2186 if (is_gimple_min_invariant (value
))
2189 if (TREE_CODE (value
) == SSA_NAME
)
2191 if (SSA_NAME_IS_DEFAULT_DEF (value
))
2194 basic_block def_bb
= gimple_bb (SSA_NAME_DEF_STMT (value
));
2195 basic_block use_bb
= gimple_bb (stmt
);
2196 return (def_bb
== use_bb
2197 ? ssa_names
->contains (value
)
2198 : dominated_by_p (CDI_DOMINATORS
, use_bb
, def_bb
));
2204 /* Helper function for predicate_statements. STMT is a potentially-trapping
2205 arithmetic operation that needs to be predicated by MASK, an SSA_NAME that
2206 has value COND. Return a statement that does so. SSA_NAMES is the set of
2207 SSA names defined earlier in STMT's block. */
2210 predicate_rhs_code (gassign
*stmt
, tree mask
, tree cond
,
2211 hash_set
<tree_ssa_name_hash
> *ssa_names
)
2213 tree lhs
= gimple_assign_lhs (stmt
);
2214 tree_code code
= gimple_assign_rhs_code (stmt
);
2215 unsigned int nops
= gimple_num_ops (stmt
);
2216 internal_fn cond_fn
= get_conditional_internal_fn (code
);
2218 /* Construct the arguments to the conditional internal function. */
2219 auto_vec
<tree
, 8> args
;
2220 args
.safe_grow (nops
+ 1);
2222 for (unsigned int i
= 1; i
< nops
; ++i
)
2223 args
[i
] = gimple_op (stmt
, i
);
2224 args
[nops
] = NULL_TREE
;
2226 /* Look for uses of the result to see whether they are COND_EXPRs that can
2227 be folded into the conditional call. */
2228 imm_use_iterator imm_iter
;
2230 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, lhs
)
2232 tree new_else
= check_redundant_cond_expr (use_stmt
, mask
, cond
, lhs
);
2233 if (new_else
&& value_available_p (stmt
, ssa_names
, new_else
))
2236 args
[nops
] = new_else
;
2237 if (operand_equal_p (new_else
, args
[nops
], 0))
2241 LHS = IFN_COND (MASK, ..., ELSE);
2242 X = MASK ? LHS : ELSE;
2244 which makes X equivalent to LHS. */
2245 tree use_lhs
= gimple_assign_lhs (use_stmt
);
2246 redundant_ssa_names
.safe_push (std::make_pair (use_lhs
, lhs
));
2251 args
[nops
] = targetm
.preferred_else_value (cond_fn
, TREE_TYPE (lhs
),
2252 nops
- 1, &args
[1]);
2254 /* Create and insert the call. */
2255 gcall
*new_stmt
= gimple_build_call_internal_vec (cond_fn
, args
);
2256 gimple_call_set_lhs (new_stmt
, lhs
);
2257 gimple_call_set_nothrow (new_stmt
, true);
2262 /* Predicate each write to memory in LOOP.
2264 This function transforms control flow constructs containing memory
2267 | for (i = 0; i < N; i++)
2271 into the following form that does not contain control flow:
2273 | for (i = 0; i < N; i++)
2274 | A[i] = cond ? expr : A[i];
2276 The original CFG looks like this:
2283 | if (i < N) goto bb_5 else goto bb_2
2287 | cond = some_computation;
2288 | if (cond) goto bb_3 else goto bb_4
2300 insert_gimplified_predicates inserts the computation of the COND
2301 expression at the beginning of the destination basic block:
2308 | if (i < N) goto bb_5 else goto bb_2
2312 | cond = some_computation;
2313 | if (cond) goto bb_3 else goto bb_4
2317 | cond = some_computation;
2326 predicate_statements is then predicating the memory write as follows:
2333 | if (i < N) goto bb_5 else goto bb_2
2337 | if (cond) goto bb_3 else goto bb_4
2341 | cond = some_computation;
2342 | A[i] = cond ? expr : A[i];
2350 and finally combine_blocks removes the basic block boundaries making
2351 the loop vectorizable:
2355 | if (i < N) goto bb_5 else goto bb_1
2359 | cond = some_computation;
2360 | A[i] = cond ? expr : A[i];
2361 | if (i < N) goto bb_5 else goto bb_4
2370 predicate_statements (loop_p loop
)
2372 unsigned int i
, orig_loop_num_nodes
= loop
->num_nodes
;
2373 auto_vec
<int, 1> vect_sizes
;
2374 auto_vec
<tree
, 1> vect_masks
;
2375 hash_set
<tree_ssa_name_hash
> ssa_names
;
2377 for (i
= 1; i
< orig_loop_num_nodes
; i
++)
2379 gimple_stmt_iterator gsi
;
2380 basic_block bb
= ifc_bbs
[i
];
2381 tree cond
= bb_predicate (bb
);
2385 if (is_true_predicate (cond
))
2389 if (TREE_CODE (cond
) == TRUTH_NOT_EXPR
)
2392 cond
= TREE_OPERAND (cond
, 0);
2395 vect_sizes
.truncate (0);
2396 vect_masks
.truncate (0);
2398 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);)
2400 gassign
*stmt
= dyn_cast
<gassign
*> (gsi_stmt (gsi
));
2403 else if (is_false_predicate (cond
)
2404 && gimple_vdef (stmt
))
2406 unlink_stmt_vdef (stmt
);
2407 gsi_remove (&gsi
, true);
2408 release_defs (stmt
);
2411 else if (gimple_plf (stmt
, GF_PLF_2
))
2413 tree lhs
= gimple_assign_lhs (stmt
);
2416 gimple_seq stmts
= NULL
;
2417 machine_mode mode
= TYPE_MODE (TREE_TYPE (lhs
));
2418 /* We checked before setting GF_PLF_2 that an equivalent
2419 integer mode exists. */
2420 int bitsize
= GET_MODE_BITSIZE (mode
).to_constant ();
2421 if (!vect_sizes
.is_empty ()
2422 && (index
= mask_exists (bitsize
, vect_sizes
)) != -1)
2423 /* Use created mask. */
2424 mask
= vect_masks
[index
];
2427 if (COMPARISON_CLASS_P (cond
))
2428 mask
= gimple_build (&stmts
, TREE_CODE (cond
),
2430 TREE_OPERAND (cond
, 0),
2431 TREE_OPERAND (cond
, 1));
2438 = constant_boolean_node (true, TREE_TYPE (mask
));
2439 mask
= gimple_build (&stmts
, BIT_XOR_EXPR
,
2440 TREE_TYPE (mask
), mask
, true_val
);
2442 gsi_insert_seq_before (&gsi
, stmts
, GSI_SAME_STMT
);
2444 /* Save mask and its size for further use. */
2445 vect_sizes
.safe_push (bitsize
);
2446 vect_masks
.safe_push (mask
);
2448 if (gimple_assign_single_p (stmt
))
2449 new_stmt
= predicate_load_or_store (&gsi
, stmt
, mask
);
2451 new_stmt
= predicate_rhs_code (stmt
, mask
, cond
, &ssa_names
);
2453 gsi_replace (&gsi
, new_stmt
, true);
2455 else if (gimple_vdef (stmt
))
2457 tree lhs
= gimple_assign_lhs (stmt
);
2458 tree rhs
= gimple_assign_rhs1 (stmt
);
2459 tree type
= TREE_TYPE (lhs
);
2461 lhs
= ifc_temp_var (type
, unshare_expr (lhs
), &gsi
);
2462 rhs
= ifc_temp_var (type
, unshare_expr (rhs
), &gsi
);
2464 std::swap (lhs
, rhs
);
2465 cond
= force_gimple_operand_gsi_1 (&gsi
, unshare_expr (cond
),
2466 is_gimple_condexpr
, NULL_TREE
,
2467 true, GSI_SAME_STMT
);
2468 rhs
= fold_build_cond_expr (type
, unshare_expr (cond
), rhs
, lhs
);
2469 gimple_assign_set_rhs1 (stmt
, ifc_temp_var (type
, rhs
, &gsi
));
2472 tree lhs
= gimple_get_lhs (gsi_stmt (gsi
));
2473 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
2474 ssa_names
.add (lhs
);
2481 /* Remove all GIMPLE_CONDs and GIMPLE_LABELs of all the basic blocks
2482 other than the exit and latch of the LOOP. Also resets the
2483 GIMPLE_DEBUG information. */
2486 remove_conditions_and_labels (loop_p loop
)
2488 gimple_stmt_iterator gsi
;
2491 for (i
= 0; i
< loop
->num_nodes
; i
++)
2493 basic_block bb
= ifc_bbs
[i
];
2495 if (bb_with_exit_edge_p (loop
, bb
)
2496 || bb
== loop
->latch
)
2499 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); )
2500 switch (gimple_code (gsi_stmt (gsi
)))
2504 gsi_remove (&gsi
, true);
2508 /* ??? Should there be conditional GIMPLE_DEBUG_BINDs? */
2509 if (gimple_debug_bind_p (gsi_stmt (gsi
)))
2511 gimple_debug_bind_reset_value (gsi_stmt (gsi
));
2512 update_stmt (gsi_stmt (gsi
));
2523 /* Combine all the basic blocks from LOOP into one or two super basic
2524 blocks. Replace PHI nodes with conditional modify expressions. */
2527 combine_blocks (struct loop
*loop
)
2529 basic_block bb
, exit_bb
, merge_target_bb
;
2530 unsigned int orig_loop_num_nodes
= loop
->num_nodes
;
2535 remove_conditions_and_labels (loop
);
2536 insert_gimplified_predicates (loop
);
2537 predicate_all_scalar_phis (loop
);
2539 if (need_to_predicate
)
2540 predicate_statements (loop
);
2542 /* Merge basic blocks: first remove all the edges in the loop,
2543 except for those from the exit block. */
2545 bool *predicated
= XNEWVEC (bool, orig_loop_num_nodes
);
2546 for (i
= 0; i
< orig_loop_num_nodes
; i
++)
2549 predicated
[i
] = !is_true_predicate (bb_predicate (bb
));
2550 free_bb_predicate (bb
);
2551 if (bb_with_exit_edge_p (loop
, bb
))
2553 gcc_assert (exit_bb
== NULL
);
2557 gcc_assert (exit_bb
!= loop
->latch
);
2559 for (i
= 1; i
< orig_loop_num_nodes
; i
++)
2563 for (ei
= ei_start (bb
->preds
); (e
= ei_safe_edge (ei
));)
2565 if (e
->src
== exit_bb
)
2572 if (exit_bb
!= NULL
)
2574 if (exit_bb
!= loop
->header
)
2576 /* Connect this node to loop header. */
2577 make_single_succ_edge (loop
->header
, exit_bb
, EDGE_FALLTHRU
);
2578 set_immediate_dominator (CDI_DOMINATORS
, exit_bb
, loop
->header
);
2581 /* Redirect non-exit edges to loop->latch. */
2582 FOR_EACH_EDGE (e
, ei
, exit_bb
->succs
)
2584 if (!loop_exit_edge_p (loop
, e
))
2585 redirect_edge_and_branch (e
, loop
->latch
);
2587 set_immediate_dominator (CDI_DOMINATORS
, loop
->latch
, exit_bb
);
2591 /* If the loop does not have an exit, reconnect header and latch. */
2592 make_edge (loop
->header
, loop
->latch
, EDGE_FALLTHRU
);
2593 set_immediate_dominator (CDI_DOMINATORS
, loop
->latch
, loop
->header
);
2596 merge_target_bb
= loop
->header
;
2598 /* Get at the virtual def valid for uses starting at the first block
2599 we merge into the header. Without a virtual PHI the loop has the
2600 same virtual use on all stmts. */
2601 gphi
*vphi
= get_virtual_phi (loop
->header
);
2602 tree last_vdef
= NULL_TREE
;
2605 last_vdef
= gimple_phi_result (vphi
);
2606 for (gimple_stmt_iterator gsi
= gsi_start_bb (loop
->header
);
2607 ! gsi_end_p (gsi
); gsi_next (&gsi
))
2608 if (gimple_vdef (gsi_stmt (gsi
)))
2609 last_vdef
= gimple_vdef (gsi_stmt (gsi
));
2611 for (i
= 1; i
< orig_loop_num_nodes
; i
++)
2613 gimple_stmt_iterator gsi
;
2614 gimple_stmt_iterator last
;
2618 if (bb
== exit_bb
|| bb
== loop
->latch
)
2621 /* We release virtual PHIs late because we have to propagate them
2622 out using the current VUSE. The def might be the one used
2624 vphi
= get_virtual_phi (bb
);
2627 imm_use_iterator iter
;
2628 use_operand_p use_p
;
2630 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, gimple_phi_result (vphi
))
2632 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
2633 SET_USE (use_p
, last_vdef
);
2635 gsi
= gsi_for_stmt (vphi
);
2636 remove_phi_node (&gsi
, true);
2639 /* Make stmts member of loop->header and clear range info from all stmts
2640 in BB which is now no longer executed conditional on a predicate we
2641 could have derived it from. */
2642 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2644 gimple
*stmt
= gsi_stmt (gsi
);
2645 gimple_set_bb (stmt
, merge_target_bb
);
2646 /* Update virtual operands. */
2649 use_operand_p use_p
= ssa_vuse_operand (stmt
);
2651 && USE_FROM_PTR (use_p
) != last_vdef
)
2652 SET_USE (use_p
, last_vdef
);
2653 if (gimple_vdef (stmt
))
2654 last_vdef
= gimple_vdef (stmt
);
2660 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_DEF
)
2661 reset_flow_sensitive_info (op
);
2665 /* Update stmt list. */
2666 last
= gsi_last_bb (merge_target_bb
);
2667 gsi_insert_seq_after_without_update (&last
, bb_seq (bb
), GSI_NEW_STMT
);
2668 set_bb_seq (bb
, NULL
);
2670 delete_basic_block (bb
);
2673 /* If possible, merge loop header to the block with the exit edge.
2674 This reduces the number of basic blocks to two, to please the
2675 vectorizer that handles only loops with two nodes. */
2677 && exit_bb
!= loop
->header
)
2679 /* We release virtual PHIs late because we have to propagate them
2680 out using the current VUSE. The def might be the one used
2682 vphi
= get_virtual_phi (exit_bb
);
2685 imm_use_iterator iter
;
2686 use_operand_p use_p
;
2688 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, gimple_phi_result (vphi
))
2690 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
2691 SET_USE (use_p
, last_vdef
);
2693 gimple_stmt_iterator gsi
= gsi_for_stmt (vphi
);
2694 remove_phi_node (&gsi
, true);
2697 if (can_merge_blocks_p (loop
->header
, exit_bb
))
2698 merge_blocks (loop
->header
, exit_bb
);
2706 /* Version LOOP before if-converting it; the original loop
2707 will be if-converted, the new copy of the loop will not,
2708 and the LOOP_VECTORIZED internal call will be guarding which
2709 loop to execute. The vectorizer pass will fold this
2710 internal call into either true or false.
2712 Note that this function intentionally invalidates profile. Both edges
2713 out of LOOP_VECTORIZED must have 100% probability so the profile remains
2714 consistent after the condition is folded in the vectorizer. */
2716 static struct loop
*
2717 version_loop_for_if_conversion (struct loop
*loop
)
2719 basic_block cond_bb
;
2720 tree cond
= make_ssa_name (boolean_type_node
);
2721 struct loop
*new_loop
;
2723 gimple_stmt_iterator gsi
;
2724 unsigned int save_length
;
2726 g
= gimple_build_call_internal (IFN_LOOP_VECTORIZED
, 2,
2727 build_int_cst (integer_type_node
, loop
->num
),
2729 gimple_call_set_lhs (g
, cond
);
2731 /* Save BB->aux around loop_version as that uses the same field. */
2732 save_length
= loop
->inner
? loop
->inner
->num_nodes
: loop
->num_nodes
;
2733 void **saved_preds
= XALLOCAVEC (void *, save_length
);
2734 for (unsigned i
= 0; i
< save_length
; i
++)
2735 saved_preds
[i
] = ifc_bbs
[i
]->aux
;
2737 initialize_original_copy_tables ();
2738 /* At this point we invalidate porfile confistency until IFN_LOOP_VECTORIZED
2739 is re-merged in the vectorizer. */
2740 new_loop
= loop_version (loop
, cond
, &cond_bb
,
2741 profile_probability::always (),
2742 profile_probability::always (),
2743 profile_probability::always (),
2744 profile_probability::always (), true);
2745 free_original_copy_tables ();
2747 for (unsigned i
= 0; i
< save_length
; i
++)
2748 ifc_bbs
[i
]->aux
= saved_preds
[i
];
2750 if (new_loop
== NULL
)
2753 new_loop
->dont_vectorize
= true;
2754 new_loop
->force_vectorize
= false;
2755 gsi
= gsi_last_bb (cond_bb
);
2756 gimple_call_set_arg (g
, 1, build_int_cst (integer_type_node
, new_loop
->num
));
2757 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
2758 update_ssa (TODO_update_ssa
);
2762 /* Return true when LOOP satisfies the follow conditions that will
2763 allow it to be recognized by the vectorizer for outer-loop
2765 - The loop is not the root node of the loop tree.
2766 - The loop has exactly one inner loop.
2767 - The loop has a single exit.
2768 - The loop header has a single successor, which is the inner
2770 - Each of the inner and outer loop latches have a single
2772 - The loop exit block has a single predecessor, which is the
2773 inner loop's exit block. */
2776 versionable_outer_loop_p (struct loop
*loop
)
2778 if (!loop_outer (loop
)
2779 || loop
->dont_vectorize
2781 || loop
->inner
->next
2782 || !single_exit (loop
)
2783 || !single_succ_p (loop
->header
)
2784 || single_succ (loop
->header
) != loop
->inner
->header
2785 || !single_pred_p (loop
->latch
)
2786 || !single_pred_p (loop
->inner
->latch
))
2789 basic_block outer_exit
= single_pred (loop
->latch
);
2790 basic_block inner_exit
= single_pred (loop
->inner
->latch
);
2792 if (!single_pred_p (outer_exit
) || single_pred (outer_exit
) != inner_exit
)
2796 fprintf (dump_file
, "Found vectorizable outer loop for versioning\n");
2801 /* Performs splitting of critical edges. Skip splitting and return false
2802 if LOOP will not be converted because:
2804 - LOOP is not well formed.
2805 - LOOP has PHI with more than MAX_PHI_ARG_NUM arguments.
2807 Last restriction is valid only if AGGRESSIVE_IF_CONV is false. */
2810 ifcvt_split_critical_edges (struct loop
*loop
, bool aggressive_if_conv
)
2814 unsigned int num
= loop
->num_nodes
;
2819 auto_vec
<edge
> critical_edges
;
2821 /* Loop is not well formed. */
2822 if (num
<= 2 || loop
->inner
|| !single_exit (loop
))
2825 body
= get_loop_body (loop
);
2826 for (i
= 0; i
< num
; i
++)
2829 if (!aggressive_if_conv
2831 && EDGE_COUNT (bb
->preds
) > MAX_PHI_ARG_NUM
)
2833 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2835 "BB %d has complicated PHI with more than %u args.\n",
2836 bb
->index
, MAX_PHI_ARG_NUM
);
2841 if (bb
== loop
->latch
|| bb_with_exit_edge_p (loop
, bb
))
2844 stmt
= last_stmt (bb
);
2845 /* Skip basic blocks not ending with conditional branch. */
2846 if (!stmt
|| gimple_code (stmt
) != GIMPLE_COND
)
2849 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2850 if (EDGE_CRITICAL_P (e
) && e
->dest
->loop_father
== loop
)
2851 critical_edges
.safe_push (e
);
2855 while (critical_edges
.length () > 0)
2857 e
= critical_edges
.pop ();
2858 /* Don't split if bb can be predicated along non-critical edge. */
2859 if (EDGE_COUNT (e
->dest
->preds
) > 2 || all_preds_critical_p (e
->dest
))
2866 /* Delete redundant statements produced by predication which prevents
2867 loop vectorization. */
2870 ifcvt_local_dce (basic_block bb
)
2875 gimple_stmt_iterator gsi
;
2876 auto_vec
<gimple
*> worklist
;
2877 enum gimple_code code
;
2878 use_operand_p use_p
;
2879 imm_use_iterator imm_iter
;
2880 std::pair
<tree
, tree
> *name_pair
;
2883 FOR_EACH_VEC_ELT (redundant_ssa_names
, i
, name_pair
)
2884 replace_uses_by (name_pair
->first
, name_pair
->second
);
2885 redundant_ssa_names
.release ();
2887 worklist
.create (64);
2888 /* Consider all phi as live statements. */
2889 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2891 phi
= gsi_stmt (gsi
);
2892 gimple_set_plf (phi
, GF_PLF_2
, true);
2893 worklist
.safe_push (phi
);
2895 /* Consider load/store statements, CALL and COND as live. */
2896 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2898 stmt
= gsi_stmt (gsi
);
2899 if (gimple_store_p (stmt
)
2900 || gimple_assign_load_p (stmt
)
2901 || is_gimple_debug (stmt
))
2903 gimple_set_plf (stmt
, GF_PLF_2
, true);
2904 worklist
.safe_push (stmt
);
2907 code
= gimple_code (stmt
);
2908 if (code
== GIMPLE_COND
|| code
== GIMPLE_CALL
)
2910 gimple_set_plf (stmt
, GF_PLF_2
, true);
2911 worklist
.safe_push (stmt
);
2914 gimple_set_plf (stmt
, GF_PLF_2
, false);
2916 if (code
== GIMPLE_ASSIGN
)
2918 tree lhs
= gimple_assign_lhs (stmt
);
2919 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
2921 stmt1
= USE_STMT (use_p
);
2922 if (gimple_bb (stmt1
) != bb
)
2924 gimple_set_plf (stmt
, GF_PLF_2
, true);
2925 worklist
.safe_push (stmt
);
2931 /* Propagate liveness through arguments of live stmt. */
2932 while (worklist
.length () > 0)
2935 use_operand_p use_p
;
2938 stmt
= worklist
.pop ();
2939 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
2941 use
= USE_FROM_PTR (use_p
);
2942 if (TREE_CODE (use
) != SSA_NAME
)
2944 stmt1
= SSA_NAME_DEF_STMT (use
);
2945 if (gimple_bb (stmt1
) != bb
2946 || gimple_plf (stmt1
, GF_PLF_2
))
2948 gimple_set_plf (stmt1
, GF_PLF_2
, true);
2949 worklist
.safe_push (stmt1
);
2952 /* Delete dead statements. */
2953 gsi
= gsi_start_bb (bb
);
2954 while (!gsi_end_p (gsi
))
2956 stmt
= gsi_stmt (gsi
);
2957 if (gimple_plf (stmt
, GF_PLF_2
))
2962 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2964 fprintf (dump_file
, "Delete dead stmt in bb#%d\n", bb
->index
);
2965 print_gimple_stmt (dump_file
, stmt
, 0, TDF_SLIM
);
2967 gsi_remove (&gsi
, true);
2968 release_defs (stmt
);
2972 /* If-convert LOOP when it is legal. For the moment this pass has no
2973 profitability analysis. Returns non-zero todo flags when something
2977 tree_if_conversion (struct loop
*loop
)
2979 unsigned int todo
= 0;
2980 bool aggressive_if_conv
;
2986 need_to_predicate
= false;
2987 any_complicated_phi
= false;
2989 /* Apply more aggressive if-conversion when loop or its outer loop were
2990 marked with simd pragma. When that's the case, we try to if-convert
2991 loop containing PHIs with more than MAX_PHI_ARG_NUM arguments. */
2992 aggressive_if_conv
= loop
->force_vectorize
;
2993 if (!aggressive_if_conv
)
2995 struct loop
*outer_loop
= loop_outer (loop
);
2996 if (outer_loop
&& outer_loop
->force_vectorize
)
2997 aggressive_if_conv
= true;
3000 if (!ifcvt_split_critical_edges (loop
, aggressive_if_conv
))
3003 if (!if_convertible_loop_p (loop
)
3004 || !dbg_cnt (if_conversion_tree
))
3007 if ((need_to_predicate
|| any_complicated_phi
)
3008 && ((!flag_tree_loop_vectorize
&& !loop
->force_vectorize
)
3009 || loop
->dont_vectorize
))
3012 /* Since we have no cost model, always version loops unless the user
3013 specified -ftree-loop-if-convert or unless versioning is required.
3014 Either version this loop, or if the pattern is right for outer-loop
3015 vectorization, version the outer loop. In the latter case we will
3016 still if-convert the original inner loop. */
3017 if (need_to_predicate
3018 || any_complicated_phi
3019 || flag_tree_loop_if_convert
!= 1)
3022 = (versionable_outer_loop_p (loop_outer (loop
))
3023 ? loop_outer (loop
) : loop
);
3024 struct loop
*nloop
= version_loop_for_if_conversion (vloop
);
3029 /* If versionable_outer_loop_p decided to version the
3030 outer loop, version also the inner loop of the non-vectorized
3031 loop copy. So we transform:
3035 if (LOOP_VECTORIZED (1, 3))
3041 loop3 (copy of loop1)
3042 if (LOOP_VECTORIZED (4, 5))
3043 loop4 (copy of loop2)
3045 loop5 (copy of loop4) */
3046 gcc_assert (nloop
->inner
&& nloop
->inner
->next
== NULL
);
3047 rloop
= nloop
->inner
;
3051 /* Now all statements are if-convertible. Combine all the basic
3052 blocks into one huge basic block doing the if-conversion
3054 combine_blocks (loop
);
3056 /* Delete dead predicate computations. */
3057 ifcvt_local_dce (loop
->header
);
3059 todo
|= TODO_cleanup_cfg
;
3066 for (i
= 0; i
< loop
->num_nodes
; i
++)
3067 free_bb_predicate (ifc_bbs
[i
]);
3081 /* Tree if-conversion pass management. */
3085 const pass_data pass_data_if_conversion
=
3087 GIMPLE_PASS
, /* type */
3089 OPTGROUP_NONE
, /* optinfo_flags */
3090 TV_TREE_LOOP_IFCVT
, /* tv_id */
3091 ( PROP_cfg
| PROP_ssa
), /* properties_required */
3092 0, /* properties_provided */
3093 0, /* properties_destroyed */
3094 0, /* todo_flags_start */
3095 0, /* todo_flags_finish */
3098 class pass_if_conversion
: public gimple_opt_pass
3101 pass_if_conversion (gcc::context
*ctxt
)
3102 : gimple_opt_pass (pass_data_if_conversion
, ctxt
)
3105 /* opt_pass methods: */
3106 virtual bool gate (function
*);
3107 virtual unsigned int execute (function
*);
3109 }; // class pass_if_conversion
3112 pass_if_conversion::gate (function
*fun
)
3114 return (((flag_tree_loop_vectorize
|| fun
->has_force_vectorize_loops
)
3115 && flag_tree_loop_if_convert
!= 0)
3116 || flag_tree_loop_if_convert
== 1);
3120 pass_if_conversion::execute (function
*fun
)
3125 if (number_of_loops (fun
) <= 1)
3128 FOR_EACH_LOOP (loop
, 0)
3129 if (flag_tree_loop_if_convert
== 1
3130 || ((flag_tree_loop_vectorize
|| loop
->force_vectorize
)
3131 && !loop
->dont_vectorize
))
3132 todo
|= tree_if_conversion (loop
);
3136 free_numbers_of_iterations_estimates (fun
);
3143 FOR_EACH_BB_FN (bb
, fun
)
3144 gcc_assert (!bb
->aux
);
3153 make_pass_if_conversion (gcc::context
*ctxt
)
3155 return new pass_if_conversion (ctxt
);