1 /* If-conversion for vectorizer.
2 Copyright (C) 2004-2023 Free Software Foundation, Inc.
3 Contributed by Devang Patel <dpatel@apple.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass implements a tree level if-conversion of loops. Its
22 initial goal is to help the vectorizer to vectorize loops with
25 A short description of if-conversion:
27 o Decide if a loop is if-convertible or not.
28 o Walk all loop basic blocks in breadth first order (BFS order).
29 o Remove conditional statements (at the end of basic block)
30 and propagate condition into destination basic blocks'
32 o Replace modify expression with conditional modify expression
33 using current basic block's condition.
34 o Merge all basic blocks
35 o Replace phi nodes with conditional modify expr
36 o Merge all basic blocks into header
38 Sample transformation:
43 # i_23 = PHI <0(0), i_18(10)>;
46 if (j_15 > 41) goto <L1>; else goto <L17>;
53 # iftmp.2_4 = PHI <0(8), 42(2)>;
57 if (i_18 <= 15) goto <L19>; else goto <L18>;
67 # i_23 = PHI <0(0), i_18(10)>;
72 iftmp.2_4 = j_15 > 41 ? 42 : 0;
75 if (i_18 <= 15) goto <L19>; else goto <L18>;
83 #define INCLUDE_ALGORITHM
86 #include "coretypes.h"
92 #include "tree-pass.h"
96 #include "optabs-tree.h"
97 #include "gimple-pretty-print.h"
99 #include "fold-const.h"
100 #include "stor-layout.h"
101 #include "gimple-iterator.h"
102 #include "gimple-fold.h"
103 #include "gimplify.h"
104 #include "gimplify-me.h"
105 #include "tree-cfg.h"
106 #include "tree-into-ssa.h"
107 #include "tree-ssa.h"
109 #include "tree-data-ref.h"
110 #include "tree-scalar-evolution.h"
111 #include "tree-ssa-loop.h"
112 #include "tree-ssa-loop-niter.h"
113 #include "tree-ssa-loop-ivopts.h"
114 #include "tree-ssa-address.h"
116 #include "tree-hash-traits.h"
118 #include "builtins.h"
120 #include "internal-fn.h"
121 #include "fold-const.h"
122 #include "tree-ssa-sccvn.h"
123 #include "tree-cfgcleanup.h"
124 #include "tree-ssa-dse.h"
125 #include "tree-vectorizer.h"
129 /* For lang_hooks.types.type_for_mode. */
130 #include "langhooks.h"
132 /* Only handle PHIs with no more arguments unless we are asked to by
134 #define MAX_PHI_ARG_NUM \
135 ((unsigned) param_max_tree_if_conversion_phi_args)
137 /* True if we've converted a statement that was only executed when some
138 condition C was true, and if for correctness we need to predicate the
139 statement to ensure that it is a no-op when C is false. See
140 predicate_statements for the kinds of predication we support. */
141 static bool need_to_predicate
;
143 /* True if we have to rewrite stmts that may invoke undefined behavior
144 when a condition C was false so it doesn't if it is always executed.
145 See predicate_statements for the kinds of predication we support. */
146 static bool need_to_rewrite_undefined
;
148 /* Indicate if there are any complicated PHIs that need to be handled in
149 if-conversion. Complicated PHI has more than two arguments and can't
150 be degenerated to two arguments PHI. See more information in comment
151 before phi_convertible_by_degenerating_args. */
152 static bool any_complicated_phi
;
154 /* True if we have bitfield accesses we can lower. */
155 static bool need_to_lower_bitfields
;
157 /* True if there is any ifcvting to be done. */
158 static bool need_to_ifcvt
;
160 /* Hash for struct innermost_loop_behavior. It depends on the user to
163 struct innermost_loop_behavior_hash
: nofree_ptr_hash
<innermost_loop_behavior
>
165 static inline hashval_t
hash (const value_type
&);
166 static inline bool equal (const value_type
&,
167 const compare_type
&);
171 innermost_loop_behavior_hash::hash (const value_type
&e
)
175 hash
= iterative_hash_expr (e
->base_address
, 0);
176 hash
= iterative_hash_expr (e
->offset
, hash
);
177 hash
= iterative_hash_expr (e
->init
, hash
);
178 return iterative_hash_expr (e
->step
, hash
);
182 innermost_loop_behavior_hash::equal (const value_type
&e1
,
183 const compare_type
&e2
)
185 if ((e1
->base_address
&& !e2
->base_address
)
186 || (!e1
->base_address
&& e2
->base_address
)
187 || (!e1
->offset
&& e2
->offset
)
188 || (e1
->offset
&& !e2
->offset
)
189 || (!e1
->init
&& e2
->init
)
190 || (e1
->init
&& !e2
->init
)
191 || (!e1
->step
&& e2
->step
)
192 || (e1
->step
&& !e2
->step
))
195 if (e1
->base_address
&& e2
->base_address
196 && !operand_equal_p (e1
->base_address
, e2
->base_address
, 0))
198 if (e1
->offset
&& e2
->offset
199 && !operand_equal_p (e1
->offset
, e2
->offset
, 0))
201 if (e1
->init
&& e2
->init
202 && !operand_equal_p (e1
->init
, e2
->init
, 0))
204 if (e1
->step
&& e2
->step
205 && !operand_equal_p (e1
->step
, e2
->step
, 0))
211 /* List of basic blocks in if-conversion-suitable order. */
212 static basic_block
*ifc_bbs
;
214 /* Hash table to store <DR's innermost loop behavior, DR> pairs. */
215 static hash_map
<innermost_loop_behavior_hash
,
216 data_reference_p
> *innermost_DR_map
;
218 /* Hash table to store <base reference, DR> pairs. */
219 static hash_map
<tree_operand_hash
, data_reference_p
> *baseref_DR_map
;
221 /* List of redundant SSA names: the first should be replaced by the second. */
222 static vec
< std::pair
<tree
, tree
> > redundant_ssa_names
;
224 /* Structure used to predicate basic blocks. This is attached to the
225 ->aux field of the BBs in the loop to be if-converted. */
226 struct bb_predicate
{
228 /* The condition under which this basic block is executed. */
231 /* PREDICATE is gimplified, and the sequence of statements is
232 recorded here, in order to avoid the duplication of computations
233 that occur in previous conditions. See PR44483. */
234 gimple_seq predicate_gimplified_stmts
;
236 /* Records the number of statements recorded into
237 PREDICATE_GIMPLIFIED_STMTS. */
238 unsigned no_predicate_stmts
;
241 /* Returns true when the basic block BB has a predicate. */
244 bb_has_predicate (basic_block bb
)
246 return bb
->aux
!= NULL
;
249 /* Returns the gimplified predicate for basic block BB. */
252 bb_predicate (basic_block bb
)
254 return ((struct bb_predicate
*) bb
->aux
)->predicate
;
257 /* Sets the gimplified predicate COND for basic block BB. */
260 set_bb_predicate (basic_block bb
, tree cond
)
262 auto aux
= (struct bb_predicate
*) bb
->aux
;
263 gcc_assert ((TREE_CODE (cond
) == TRUTH_NOT_EXPR
264 && is_gimple_val (TREE_OPERAND (cond
, 0)))
265 || is_gimple_val (cond
));
266 aux
->predicate
= cond
;
267 aux
->no_predicate_stmts
++;
269 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
270 fprintf (dump_file
, "Recording block %d value %d\n", bb
->index
,
271 aux
->no_predicate_stmts
);
274 /* Returns the sequence of statements of the gimplification of the
275 predicate for basic block BB. */
277 static inline gimple_seq
278 bb_predicate_gimplified_stmts (basic_block bb
)
280 return ((struct bb_predicate
*) bb
->aux
)->predicate_gimplified_stmts
;
283 /* Sets the sequence of statements STMTS of the gimplification of the
284 predicate for basic block BB. If PRESERVE_COUNTS then don't clear the predicate
288 set_bb_predicate_gimplified_stmts (basic_block bb
, gimple_seq stmts
,
289 bool preserve_counts
)
291 ((struct bb_predicate
*) bb
->aux
)->predicate_gimplified_stmts
= stmts
;
292 if (stmts
== NULL
&& !preserve_counts
)
293 ((struct bb_predicate
*) bb
->aux
)->no_predicate_stmts
= 0;
296 /* Adds the sequence of statements STMTS to the sequence of statements
297 of the predicate for basic block BB. */
300 add_bb_predicate_gimplified_stmts (basic_block bb
, gimple_seq stmts
)
302 /* We might have updated some stmts in STMTS via force_gimple_operand
303 calling fold_stmt and that producing multiple stmts. Delink immediate
304 uses so update_ssa after loop versioning doesn't get confused for
305 the not yet inserted predicates.
306 ??? This should go away once we reliably avoid updating stmts
308 for (gimple_stmt_iterator gsi
= gsi_start (stmts
);
309 !gsi_end_p (gsi
); gsi_next (&gsi
))
311 gimple
*stmt
= gsi_stmt (gsi
);
312 delink_stmt_imm_use (stmt
);
313 gimple_set_modified (stmt
, true);
314 ((struct bb_predicate
*) bb
->aux
)->no_predicate_stmts
++;
316 gimple_seq_add_seq_without_update
317 (&(((struct bb_predicate
*) bb
->aux
)->predicate_gimplified_stmts
), stmts
);
320 /* Return the number of statements the predicate of the basic block consists
323 static inline unsigned
324 get_bb_num_predicate_stmts (basic_block bb
)
326 return ((struct bb_predicate
*) bb
->aux
)->no_predicate_stmts
;
329 /* Initializes to TRUE the predicate of basic block BB. */
332 init_bb_predicate (basic_block bb
)
334 bb
->aux
= XNEW (struct bb_predicate
);
335 set_bb_predicate_gimplified_stmts (bb
, NULL
, false);
336 set_bb_predicate (bb
, boolean_true_node
);
339 /* Release the SSA_NAMEs associated with the predicate of basic block BB. */
342 release_bb_predicate (basic_block bb
)
344 gimple_seq stmts
= bb_predicate_gimplified_stmts (bb
);
347 /* Ensure that these stmts haven't yet been added to a bb. */
349 for (gimple_stmt_iterator i
= gsi_start (stmts
);
350 !gsi_end_p (i
); gsi_next (&i
))
351 gcc_assert (! gimple_bb (gsi_stmt (i
)));
354 gimple_seq_discard (stmts
);
355 set_bb_predicate_gimplified_stmts (bb
, NULL
, false);
359 /* Free the predicate of basic block BB. */
362 free_bb_predicate (basic_block bb
)
364 if (!bb_has_predicate (bb
))
367 release_bb_predicate (bb
);
372 /* Reinitialize predicate of BB with the true predicate. */
375 reset_bb_predicate (basic_block bb
)
377 if (!bb_has_predicate (bb
))
378 init_bb_predicate (bb
);
381 release_bb_predicate (bb
);
382 set_bb_predicate (bb
, boolean_true_node
);
386 /* Returns a new SSA_NAME of type TYPE that is assigned the value of
387 the expression EXPR. Inserts the statement created for this
388 computation before GSI and leaves the iterator GSI at the same
392 ifc_temp_var (tree type
, tree expr
, gimple_stmt_iterator
*gsi
)
394 tree new_name
= make_temp_ssa_name (type
, NULL
, "_ifc_");
395 gimple
*stmt
= gimple_build_assign (new_name
, expr
);
396 gimple_set_vuse (stmt
, gimple_vuse (gsi_stmt (*gsi
)));
397 gsi_insert_before (gsi
, stmt
, GSI_SAME_STMT
);
401 /* Return true when COND is a false predicate. */
404 is_false_predicate (tree cond
)
406 return (cond
!= NULL_TREE
407 && (cond
== boolean_false_node
408 || integer_zerop (cond
)));
411 /* Return true when COND is a true predicate. */
414 is_true_predicate (tree cond
)
416 return (cond
== NULL_TREE
417 || cond
== boolean_true_node
418 || integer_onep (cond
));
421 /* Returns true when BB has a predicate that is not trivial: true or
425 is_predicated (basic_block bb
)
427 return !is_true_predicate (bb_predicate (bb
));
430 /* Parses the predicate COND and returns its comparison code and
431 operands OP0 and OP1. */
433 static enum tree_code
434 parse_predicate (tree cond
, tree
*op0
, tree
*op1
)
438 if (TREE_CODE (cond
) == SSA_NAME
439 && is_gimple_assign (s
= SSA_NAME_DEF_STMT (cond
)))
441 if (TREE_CODE_CLASS (gimple_assign_rhs_code (s
)) == tcc_comparison
)
443 *op0
= gimple_assign_rhs1 (s
);
444 *op1
= gimple_assign_rhs2 (s
);
445 return gimple_assign_rhs_code (s
);
448 else if (gimple_assign_rhs_code (s
) == TRUTH_NOT_EXPR
)
450 tree op
= gimple_assign_rhs1 (s
);
451 tree type
= TREE_TYPE (op
);
452 enum tree_code code
= parse_predicate (op
, op0
, op1
);
454 return code
== ERROR_MARK
? ERROR_MARK
455 : invert_tree_comparison (code
, HONOR_NANS (type
));
461 if (COMPARISON_CLASS_P (cond
))
463 *op0
= TREE_OPERAND (cond
, 0);
464 *op1
= TREE_OPERAND (cond
, 1);
465 return TREE_CODE (cond
);
471 /* Returns the fold of predicate C1 OR C2 at location LOC. */
474 fold_or_predicates (location_t loc
, tree c1
, tree c2
)
476 tree op1a
, op1b
, op2a
, op2b
;
477 enum tree_code code1
= parse_predicate (c1
, &op1a
, &op1b
);
478 enum tree_code code2
= parse_predicate (c2
, &op2a
, &op2b
);
480 if (code1
!= ERROR_MARK
&& code2
!= ERROR_MARK
)
482 tree t
= maybe_fold_or_comparisons (boolean_type_node
, code1
, op1a
, op1b
,
488 return fold_build2_loc (loc
, TRUTH_OR_EXPR
, boolean_type_node
, c1
, c2
);
491 /* Returns either a COND_EXPR or the folded expression if the folded
492 expression is a MIN_EXPR, a MAX_EXPR, an ABS_EXPR,
493 a constant or a SSA_NAME. */
496 fold_build_cond_expr (tree type
, tree cond
, tree rhs
, tree lhs
)
498 /* If COND is comparison r != 0 and r has boolean type, convert COND
499 to SSA_NAME to accept by vect bool pattern. */
500 if (TREE_CODE (cond
) == NE_EXPR
)
502 tree op0
= TREE_OPERAND (cond
, 0);
503 tree op1
= TREE_OPERAND (cond
, 1);
504 if (TREE_CODE (op0
) == SSA_NAME
505 && TREE_CODE (TREE_TYPE (op0
)) == BOOLEAN_TYPE
506 && (integer_zerop (op1
)))
510 gimple_match_op
cexpr (gimple_match_cond::UNCOND
, COND_EXPR
,
511 type
, cond
, rhs
, lhs
);
512 if (cexpr
.resimplify (NULL
, follow_all_ssa_edges
))
514 if (gimple_simplified_result_is_gimple_val (&cexpr
))
516 else if (cexpr
.code
== ABS_EXPR
)
517 return build1 (ABS_EXPR
, type
, cexpr
.ops
[0]);
518 else if (cexpr
.code
== MIN_EXPR
519 || cexpr
.code
== MAX_EXPR
)
520 return build2 ((tree_code
)cexpr
.code
, type
, cexpr
.ops
[0], cexpr
.ops
[1]);
523 return build3 (COND_EXPR
, type
, cond
, rhs
, lhs
);
526 /* Add condition NC to the predicate list of basic block BB. LOOP is
527 the loop to be if-converted. Use predicate of cd-equivalent block
528 for join bb if it exists: we call basic blocks bb1 and bb2
529 cd-equivalent if they are executed under the same condition. */
532 add_to_predicate_list (class loop
*loop
, basic_block bb
, tree nc
)
537 if (is_true_predicate (nc
))
540 /* If dominance tells us this basic block is always executed,
541 don't record any predicates for it. */
542 if (dominated_by_p (CDI_DOMINATORS
, loop
->latch
, bb
))
545 dom_bb
= get_immediate_dominator (CDI_DOMINATORS
, bb
);
546 /* We use notion of cd equivalence to get simpler predicate for
547 join block, e.g. if join block has 2 predecessors with predicates
548 p1 & p2 and p1 & !p2, we'd like to get p1 for it instead of
549 p1 & p2 | p1 & !p2. */
550 if (dom_bb
!= loop
->header
551 && get_immediate_dominator (CDI_POST_DOMINATORS
, dom_bb
) == bb
)
553 gcc_assert (flow_bb_inside_loop_p (loop
, dom_bb
));
554 bc
= bb_predicate (dom_bb
);
555 if (!is_true_predicate (bc
))
556 set_bb_predicate (bb
, bc
);
558 gcc_assert (is_true_predicate (bb_predicate (bb
)));
559 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
560 fprintf (dump_file
, "Use predicate of bb#%d for bb#%d\n",
561 dom_bb
->index
, bb
->index
);
565 if (!is_predicated (bb
))
569 bc
= bb_predicate (bb
);
570 bc
= fold_or_predicates (EXPR_LOCATION (bc
), nc
, bc
);
571 if (is_true_predicate (bc
))
573 reset_bb_predicate (bb
);
578 /* Allow a TRUTH_NOT_EXPR around the main predicate. */
579 if (TREE_CODE (bc
) == TRUTH_NOT_EXPR
)
580 tp
= &TREE_OPERAND (bc
, 0);
583 if (!is_gimple_val (*tp
))
586 *tp
= force_gimple_operand (*tp
, &stmts
, true, NULL_TREE
);
587 add_bb_predicate_gimplified_stmts (bb
, stmts
);
589 set_bb_predicate (bb
, bc
);
592 /* Add the condition COND to the previous condition PREV_COND, and add
593 this to the predicate list of the destination of edge E. LOOP is
594 the loop to be if-converted. */
597 add_to_dst_predicate_list (class loop
*loop
, edge e
,
598 tree prev_cond
, tree cond
)
600 if (!flow_bb_inside_loop_p (loop
, e
->dest
))
603 if (!is_true_predicate (prev_cond
))
604 cond
= fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
,
607 if (!dominated_by_p (CDI_DOMINATORS
, loop
->latch
, e
->dest
))
608 add_to_predicate_list (loop
, e
->dest
, cond
);
611 /* Return true if one of the successor edges of BB exits LOOP. */
614 bb_with_exit_edge_p (class loop
*loop
, basic_block bb
)
619 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
620 if (loop_exit_edge_p (loop
, e
))
626 /* Given PHI which has more than two arguments, this function checks if
627 it's if-convertible by degenerating its arguments. Specifically, if
628 below two conditions are satisfied:
630 1) Number of PHI arguments with different values equals to 2 and one
631 argument has the only occurrence.
632 2) The edge corresponding to the unique argument isn't critical edge.
634 Such PHI can be handled as PHIs have only two arguments. For example,
637 res = PHI <A_1(e1), A_1(e2), A_2(e3)>;
639 can be transformed into:
641 res = (predicate of e3) ? A_2 : A_1;
643 Return TRUE if it is the case, FALSE otherwise. */
646 phi_convertible_by_degenerating_args (gphi
*phi
)
649 tree arg
, t1
= NULL
, t2
= NULL
;
650 unsigned int i
, i1
= 0, i2
= 0, n1
= 0, n2
= 0;
651 unsigned int num_args
= gimple_phi_num_args (phi
);
653 gcc_assert (num_args
> 2);
655 for (i
= 0; i
< num_args
; i
++)
657 arg
= gimple_phi_arg_def (phi
, i
);
658 if (t1
== NULL
|| operand_equal_p (t1
, arg
, 0))
664 else if (t2
== NULL
|| operand_equal_p (t2
, arg
, 0))
674 if (n1
!= 1 && n2
!= 1)
677 /* Check if the edge corresponding to the unique arg is critical. */
678 e
= gimple_phi_arg_edge (phi
, (n1
== 1) ? i1
: i2
);
679 if (EDGE_COUNT (e
->src
->succs
) > 1)
685 /* Return true when PHI is if-convertible. PHI is part of loop LOOP
686 and it belongs to basic block BB. Note at this point, it is sure
687 that PHI is if-convertible. This function updates global variable
688 ANY_COMPLICATED_PHI if PHI is complicated. */
691 if_convertible_phi_p (class loop
*loop
, basic_block bb
, gphi
*phi
)
693 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
695 fprintf (dump_file
, "-------------------------\n");
696 print_gimple_stmt (dump_file
, phi
, 0, TDF_SLIM
);
699 if (bb
!= loop
->header
700 && gimple_phi_num_args (phi
) > 2
701 && !phi_convertible_by_degenerating_args (phi
))
702 any_complicated_phi
= true;
707 /* Records the status of a data reference. This struct is attached to
708 each DR->aux field. */
711 bool rw_unconditionally
;
712 bool w_unconditionally
;
713 bool written_at_least_once
;
717 tree base_w_predicate
;
720 #define IFC_DR(DR) ((struct ifc_dr *) (DR)->aux)
721 #define DR_BASE_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->written_at_least_once)
722 #define DR_RW_UNCONDITIONALLY(DR) (IFC_DR (DR)->rw_unconditionally)
723 #define DR_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->w_unconditionally)
725 /* Iterates over DR's and stores refs, DR and base refs, DR pairs in
726 HASH tables. While storing them in HASH table, it checks if the
727 reference is unconditionally read or written and stores that as a flag
728 information. For base reference it checks if it is written atlest once
729 unconditionally and stores it as flag information along with DR.
730 In other words for every data reference A in STMT there exist other
731 accesses to a data reference with the same base with predicates that
732 add up (OR-up) to the true predicate: this ensures that the data
733 reference A is touched (read or written) on every iteration of the
734 if-converted loop. */
736 hash_memrefs_baserefs_and_store_DRs_read_written_info (data_reference_p a
)
739 data_reference_p
*master_dr
, *base_master_dr
;
740 tree base_ref
= DR_BASE_OBJECT (a
);
741 innermost_loop_behavior
*innermost
= &DR_INNERMOST (a
);
742 tree ca
= bb_predicate (gimple_bb (DR_STMT (a
)));
745 master_dr
= &innermost_DR_map
->get_or_insert (innermost
, &exist1
);
751 IFC_DR (*master_dr
)->w_predicate
752 = fold_or_predicates (UNKNOWN_LOCATION
, ca
,
753 IFC_DR (*master_dr
)->w_predicate
);
754 if (is_true_predicate (IFC_DR (*master_dr
)->w_predicate
))
755 DR_W_UNCONDITIONALLY (*master_dr
) = true;
757 IFC_DR (*master_dr
)->rw_predicate
758 = fold_or_predicates (UNKNOWN_LOCATION
, ca
,
759 IFC_DR (*master_dr
)->rw_predicate
);
760 if (is_true_predicate (IFC_DR (*master_dr
)->rw_predicate
))
761 DR_RW_UNCONDITIONALLY (*master_dr
) = true;
765 base_master_dr
= &baseref_DR_map
->get_or_insert (base_ref
, &exist2
);
768 IFC_DR (*base_master_dr
)->base_w_predicate
769 = fold_or_predicates (UNKNOWN_LOCATION
, ca
,
770 IFC_DR (*base_master_dr
)->base_w_predicate
);
771 if (is_true_predicate (IFC_DR (*base_master_dr
)->base_w_predicate
))
772 DR_BASE_W_UNCONDITIONALLY (*base_master_dr
) = true;
776 /* Return TRUE if can prove the index IDX of an array reference REF is
777 within array bound. Return false otherwise. */
780 idx_within_array_bound (tree ref
, tree
*idx
, void *dta
)
782 wi::overflow_type overflow
;
783 widest_int niter
, valid_niter
, delta
, wi_step
;
786 class loop
*loop
= (class loop
*) dta
;
788 /* Only support within-bound access for array references. */
789 if (TREE_CODE (ref
) != ARRAY_REF
)
792 /* For arrays that might have flexible sizes, it is not guaranteed that they
793 do not extend over their declared size. */
794 if (array_ref_flexible_size_p (ref
))
797 ev
= analyze_scalar_evolution (loop
, *idx
);
798 ev
= instantiate_parameters (loop
, ev
);
799 init
= initial_condition (ev
);
800 step
= evolution_part_in_loop_num (ev
, loop
->num
);
802 if (!init
|| TREE_CODE (init
) != INTEGER_CST
803 || (step
&& TREE_CODE (step
) != INTEGER_CST
))
806 low
= array_ref_low_bound (ref
);
807 high
= array_ref_up_bound (ref
);
809 /* The case of nonconstant bounds could be handled, but it would be
811 if (TREE_CODE (low
) != INTEGER_CST
812 || !high
|| TREE_CODE (high
) != INTEGER_CST
)
815 /* Check if the intial idx is within bound. */
816 if (wi::to_widest (init
) < wi::to_widest (low
)
817 || wi::to_widest (init
) > wi::to_widest (high
))
820 /* The idx is always within bound. */
821 if (!step
|| integer_zerop (step
))
824 if (!max_loop_iterations (loop
, &niter
))
827 if (wi::to_widest (step
) < 0)
829 delta
= wi::to_widest (init
) - wi::to_widest (low
);
830 wi_step
= -wi::to_widest (step
);
834 delta
= wi::to_widest (high
) - wi::to_widest (init
);
835 wi_step
= wi::to_widest (step
);
838 valid_niter
= wi::div_floor (delta
, wi_step
, SIGNED
, &overflow
);
839 /* The iteration space of idx is within array bound. */
840 if (!overflow
&& niter
<= valid_niter
)
846 /* Return TRUE if ref is a within bound array reference. */
849 ref_within_array_bound (gimple
*stmt
, tree ref
)
851 class loop
*loop
= loop_containing_stmt (stmt
);
853 gcc_assert (loop
!= NULL
);
854 return for_each_index (&ref
, idx_within_array_bound
, loop
);
858 /* Given a memory reference expression T, return TRUE if base object
859 it refers to is writable. The base object of a memory reference
860 is the main object being referenced, which is returned by function
864 base_object_writable (tree ref
)
866 tree base_tree
= get_base_address (ref
);
869 && DECL_P (base_tree
)
870 && decl_binds_to_current_def_p (base_tree
)
871 && !TREE_READONLY (base_tree
));
874 /* Return true when the memory references of STMT won't trap in the
875 if-converted code. There are two things that we have to check for:
877 - writes to memory occur to writable memory: if-conversion of
878 memory writes transforms the conditional memory writes into
879 unconditional writes, i.e. "if (cond) A[i] = foo" is transformed
880 into "A[i] = cond ? foo : A[i]", and as the write to memory may not
881 be executed at all in the original code, it may be a readonly
882 memory. To check that A is not const-qualified, we check that
883 there exists at least an unconditional write to A in the current
886 - reads or writes to memory are valid memory accesses for every
887 iteration. To check that the memory accesses are correctly formed
888 and that we are allowed to read and write in these locations, we
889 check that the memory accesses to be if-converted occur at every
890 iteration unconditionally.
892 Returns true for the memory reference in STMT, same memory reference
893 is read or written unconditionally atleast once and the base memory
894 reference is written unconditionally once. This is to check reference
895 will not write fault. Also retuns true if the memory reference is
896 unconditionally read once then we are conditionally writing to memory
897 which is defined as read and write and is bound to the definition
900 ifcvt_memrefs_wont_trap (gimple
*stmt
, vec
<data_reference_p
> drs
)
902 /* If DR didn't see a reference here we can't use it to tell
903 whether the ref traps or not. */
904 if (gimple_uid (stmt
) == 0)
907 data_reference_p
*master_dr
, *base_master_dr
;
908 data_reference_p a
= drs
[gimple_uid (stmt
) - 1];
910 tree base
= DR_BASE_OBJECT (a
);
911 innermost_loop_behavior
*innermost
= &DR_INNERMOST (a
);
913 gcc_assert (DR_STMT (a
) == stmt
);
914 gcc_assert (DR_BASE_ADDRESS (a
) || DR_OFFSET (a
)
915 || DR_INIT (a
) || DR_STEP (a
));
917 master_dr
= innermost_DR_map
->get (innermost
);
918 gcc_assert (master_dr
!= NULL
);
920 base_master_dr
= baseref_DR_map
->get (base
);
922 /* If a is unconditionally written to it doesn't trap. */
923 if (DR_W_UNCONDITIONALLY (*master_dr
))
926 /* If a is unconditionally accessed then ...
928 Even a is conditional access, we can treat it as an unconditional
929 one if it's an array reference and all its index are within array
931 if (DR_RW_UNCONDITIONALLY (*master_dr
)
932 || ref_within_array_bound (stmt
, DR_REF (a
)))
934 /* an unconditional read won't trap. */
938 /* an unconditionaly write won't trap if the base is written
939 to unconditionally. */
941 && DR_BASE_W_UNCONDITIONALLY (*base_master_dr
))
942 return flag_store_data_races
;
943 /* or the base is known to be not readonly. */
944 else if (base_object_writable (DR_REF (a
)))
945 return flag_store_data_races
;
951 /* Return true if STMT could be converted into a masked load or store
952 (conditional load or store based on a mask computed from bb predicate). */
955 ifcvt_can_use_mask_load_store (gimple
*stmt
)
957 /* Check whether this is a load or store. */
958 tree lhs
= gimple_assign_lhs (stmt
);
961 if (gimple_store_p (stmt
))
963 if (!is_gimple_val (gimple_assign_rhs1 (stmt
)))
968 else if (gimple_assign_load_p (stmt
))
971 ref
= gimple_assign_rhs1 (stmt
);
976 if (may_be_nonaddressable_p (ref
))
979 /* Mask should be integer mode of the same size as the load/store
981 machine_mode mode
= TYPE_MODE (TREE_TYPE (lhs
));
982 if (!int_mode_for_mode (mode
).exists () || VECTOR_MODE_P (mode
))
985 if (can_vec_mask_load_store_p (mode
, VOIDmode
, is_load
))
991 /* Return true if STMT could be converted from an operation that is
992 unconditional to one that is conditional on a bb predicate mask. */
995 ifcvt_can_predicate (gimple
*stmt
)
997 basic_block bb
= gimple_bb (stmt
);
999 if (!(flag_tree_loop_vectorize
|| bb
->loop_father
->force_vectorize
)
1000 || bb
->loop_father
->dont_vectorize
1001 || gimple_has_volatile_ops (stmt
))
1004 if (gimple_assign_single_p (stmt
))
1005 return ifcvt_can_use_mask_load_store (stmt
);
1007 tree_code code
= gimple_assign_rhs_code (stmt
);
1008 tree lhs_type
= TREE_TYPE (gimple_assign_lhs (stmt
));
1009 tree rhs_type
= TREE_TYPE (gimple_assign_rhs1 (stmt
));
1010 if (!types_compatible_p (lhs_type
, rhs_type
))
1012 internal_fn cond_fn
= get_conditional_internal_fn (code
);
1013 return (cond_fn
!= IFN_LAST
1014 && vectorized_internal_fn_supported_p (cond_fn
, lhs_type
));
1017 /* Return true when STMT is if-convertible.
1019 GIMPLE_ASSIGN statement is not if-convertible if,
1020 - it is not movable,
1022 - LHS is not var decl. */
1025 if_convertible_gimple_assign_stmt_p (gimple
*stmt
,
1026 vec
<data_reference_p
> refs
)
1028 tree lhs
= gimple_assign_lhs (stmt
);
1030 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1032 fprintf (dump_file
, "-------------------------\n");
1033 print_gimple_stmt (dump_file
, stmt
, 0, TDF_SLIM
);
1036 if (!is_gimple_reg_type (TREE_TYPE (lhs
)))
1039 /* Some of these constrains might be too conservative. */
1040 if (stmt_ends_bb_p (stmt
)
1041 || gimple_has_volatile_ops (stmt
)
1042 || (TREE_CODE (lhs
) == SSA_NAME
1043 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs
))
1044 || gimple_has_side_effects (stmt
))
1046 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1047 fprintf (dump_file
, "stmt not suitable for ifcvt\n");
1051 /* tree-into-ssa.cc uses GF_PLF_1, so avoid it, because
1052 in between if_convertible_loop_p and combine_blocks
1053 we can perform loop versioning. */
1054 gimple_set_plf (stmt
, GF_PLF_2
, false);
1056 if ((! gimple_vuse (stmt
)
1057 || gimple_could_trap_p_1 (stmt
, false, false)
1058 || ! ifcvt_memrefs_wont_trap (stmt
, refs
))
1059 && gimple_could_trap_p (stmt
))
1061 if (ifcvt_can_predicate (stmt
))
1063 gimple_set_plf (stmt
, GF_PLF_2
, true);
1064 need_to_predicate
= true;
1067 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1068 fprintf (dump_file
, "tree could trap...\n");
1071 else if ((INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
1072 || POINTER_TYPE_P (TREE_TYPE (lhs
)))
1073 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs
))
1074 && arith_code_with_undefined_signed_overflow
1075 (gimple_assign_rhs_code (stmt
)))
1076 /* We have to rewrite stmts with undefined overflow. */
1077 need_to_rewrite_undefined
= true;
1079 /* When if-converting stores force versioning, likewise if we
1080 ended up generating store data races. */
1081 if (gimple_vdef (stmt
))
1082 need_to_predicate
= true;
1087 /* Return true when STMT is if-convertible.
1089 A statement is if-convertible if:
1090 - it is an if-convertible GIMPLE_ASSIGN,
1091 - it is a GIMPLE_LABEL or a GIMPLE_COND,
1092 - it is builtins call,
1093 - it is a call to a function with a SIMD clone. */
1096 if_convertible_stmt_p (gimple
*stmt
, vec
<data_reference_p
> refs
)
1098 switch (gimple_code (stmt
))
1106 return if_convertible_gimple_assign_stmt_p (stmt
, refs
);
1110 tree fndecl
= gimple_call_fndecl (stmt
);
1113 /* We can vectorize some builtins and functions with SIMD
1114 "inbranch" clones. */
1115 int flags
= gimple_call_flags (stmt
);
1116 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
1117 if ((flags
& ECF_CONST
)
1118 && !(flags
& ECF_LOOPING_CONST_OR_PURE
)
1119 && fndecl_built_in_p (fndecl
))
1121 if (node
&& node
->simd_clones
!= NULL
)
1122 /* Ensure that at least one clone can be "inbranch". */
1123 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
1124 n
= n
->simdclone
->next_clone
)
1125 if (n
->simdclone
->inbranch
)
1127 gimple_set_plf (stmt
, GF_PLF_2
, true);
1128 need_to_predicate
= true;
1136 /* Don't know what to do with 'em so don't do anything. */
1137 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1139 fprintf (dump_file
, "don't know what to do\n");
1140 print_gimple_stmt (dump_file
, stmt
, 0, TDF_SLIM
);
1146 /* Assumes that BB has more than 1 predecessors.
1147 Returns false if at least one successor is not on critical edge
1148 and true otherwise. */
1151 all_preds_critical_p (basic_block bb
)
1156 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1157 if (EDGE_COUNT (e
->src
->succs
) == 1)
1162 /* Return true when BB is if-convertible. This routine does not check
1163 basic block's statements and phis.
1165 A basic block is not if-convertible if:
1166 - it is non-empty and it is after the exit block (in BFS order),
1167 - it is after the exit block but before the latch,
1168 - its edges are not normal.
1170 EXIT_BB is the basic block containing the exit of the LOOP. BB is
1174 if_convertible_bb_p (class loop
*loop
, basic_block bb
, basic_block exit_bb
)
1179 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1180 fprintf (dump_file
, "----------[%d]-------------\n", bb
->index
);
1182 if (EDGE_COUNT (bb
->succs
) > 2)
1185 if (gcall
*call
= safe_dyn_cast
<gcall
*> (*gsi_last_bb (bb
)))
1186 if (gimple_call_ctrl_altering_p (call
))
1191 if (bb
!= loop
->latch
)
1193 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1194 fprintf (dump_file
, "basic block after exit bb but before latch\n");
1197 else if (!empty_block_p (bb
))
1199 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1200 fprintf (dump_file
, "non empty basic block after exit bb\n");
1203 else if (bb
== loop
->latch
1205 && !dominated_by_p (CDI_DOMINATORS
, bb
, exit_bb
))
1207 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1208 fprintf (dump_file
, "latch is not dominated by exit_block\n");
1213 /* Be less adventurous and handle only normal edges. */
1214 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1215 if (e
->flags
& (EDGE_EH
| EDGE_ABNORMAL
| EDGE_IRREDUCIBLE_LOOP
))
1217 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1218 fprintf (dump_file
, "Difficult to handle edges\n");
1225 /* Return true when all predecessor blocks of BB are visited. The
1226 VISITED bitmap keeps track of the visited blocks. */
1229 pred_blocks_visited_p (basic_block bb
, bitmap
*visited
)
1233 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1234 if (!bitmap_bit_p (*visited
, e
->src
->index
))
1240 /* Get body of a LOOP in suitable order for if-conversion. It is
1241 caller's responsibility to deallocate basic block list.
1242 If-conversion suitable order is, breadth first sort (BFS) order
1243 with an additional constraint: select a block only if all its
1244 predecessors are already selected. */
1246 static basic_block
*
1247 get_loop_body_in_if_conv_order (const class loop
*loop
)
1249 basic_block
*blocks
, *blocks_in_bfs_order
;
1252 unsigned int index
= 0;
1253 unsigned int visited_count
= 0;
1255 gcc_assert (loop
->num_nodes
);
1256 gcc_assert (loop
->latch
!= EXIT_BLOCK_PTR_FOR_FN (cfun
));
1258 blocks
= XCNEWVEC (basic_block
, loop
->num_nodes
);
1259 visited
= BITMAP_ALLOC (NULL
);
1261 blocks_in_bfs_order
= get_loop_body_in_bfs_order (loop
);
1264 while (index
< loop
->num_nodes
)
1266 bb
= blocks_in_bfs_order
[index
];
1268 if (bb
->flags
& BB_IRREDUCIBLE_LOOP
)
1270 free (blocks_in_bfs_order
);
1271 BITMAP_FREE (visited
);
1276 if (!bitmap_bit_p (visited
, bb
->index
))
1278 if (pred_blocks_visited_p (bb
, &visited
)
1279 || bb
== loop
->header
)
1281 /* This block is now visited. */
1282 bitmap_set_bit (visited
, bb
->index
);
1283 blocks
[visited_count
++] = bb
;
1289 if (index
== loop
->num_nodes
1290 && visited_count
!= loop
->num_nodes
)
1294 free (blocks_in_bfs_order
);
1295 BITMAP_FREE (visited
);
1299 /* Returns true when the analysis of the predicates for all the basic
1300 blocks in LOOP succeeded.
1302 predicate_bbs first allocates the predicates of the basic blocks.
1303 These fields are then initialized with the tree expressions
1304 representing the predicates under which a basic block is executed
1305 in the LOOP. As the loop->header is executed at each iteration, it
1306 has the "true" predicate. Other statements executed under a
1307 condition are predicated with that condition, for example
1314 S1 will be predicated with "x", and
1315 S2 will be predicated with "!x". */
1318 predicate_bbs (loop_p loop
)
1322 for (i
= 0; i
< loop
->num_nodes
; i
++)
1323 init_bb_predicate (ifc_bbs
[i
]);
1325 for (i
= 0; i
< loop
->num_nodes
; i
++)
1327 basic_block bb
= ifc_bbs
[i
];
1330 /* The loop latch and loop exit block are always executed and
1331 have no extra conditions to be processed: skip them. */
1332 if (bb
== loop
->latch
1333 || bb_with_exit_edge_p (loop
, bb
))
1335 reset_bb_predicate (bb
);
1339 cond
= bb_predicate (bb
);
1340 if (gcond
*stmt
= safe_dyn_cast
<gcond
*> (*gsi_last_bb (bb
)))
1343 edge true_edge
, false_edge
;
1344 location_t loc
= gimple_location (stmt
);
1346 /* gcc.dg/fold-bopcond-1.c shows that despite all forwprop passes
1347 conditions can remain unfolded because of multiple uses so
1348 try to re-fold here, especially to get precision changing
1349 conversions sorted out. Do not simply fold the stmt since
1350 this is analysis only. When conditions were embedded in
1351 COND_EXPRs those were folded separately before folding the
1352 COND_EXPR but as they are now outside we have to make sure
1353 to fold them. Do it here - another opportunity would be to
1354 fold predicates as they are inserted. */
1355 gimple_match_op
cexpr (gimple_match_cond::UNCOND
,
1356 gimple_cond_code (stmt
),
1358 gimple_cond_lhs (stmt
),
1359 gimple_cond_rhs (stmt
));
1360 if (cexpr
.resimplify (NULL
, follow_all_ssa_edges
)
1361 && cexpr
.code
.is_tree_code ()
1362 && TREE_CODE_CLASS ((tree_code
)cexpr
.code
) == tcc_comparison
)
1363 c
= build2_loc (loc
, (tree_code
)cexpr
.code
, boolean_type_node
,
1364 cexpr
.ops
[0], cexpr
.ops
[1]);
1366 c
= build2_loc (loc
, gimple_cond_code (stmt
),
1368 gimple_cond_lhs (stmt
),
1369 gimple_cond_rhs (stmt
));
1371 /* Add new condition into destination's predicate list. */
1372 extract_true_false_edges_from_block (gimple_bb (stmt
),
1373 &true_edge
, &false_edge
);
1375 /* If C is true, then TRUE_EDGE is taken. */
1376 add_to_dst_predicate_list (loop
, true_edge
, unshare_expr (cond
),
1379 /* If C is false, then FALSE_EDGE is taken. */
1380 c2
= build1_loc (loc
, TRUTH_NOT_EXPR
, boolean_type_node
,
1382 add_to_dst_predicate_list (loop
, false_edge
,
1383 unshare_expr (cond
), c2
);
1388 /* If current bb has only one successor, then consider it as an
1389 unconditional goto. */
1390 if (single_succ_p (bb
))
1392 basic_block bb_n
= single_succ (bb
);
1394 /* The successor bb inherits the predicate of its
1395 predecessor. If there is no predicate in the predecessor
1396 bb, then consider the successor bb as always executed. */
1397 if (cond
== NULL_TREE
)
1398 cond
= boolean_true_node
;
1400 add_to_predicate_list (loop
, bb_n
, cond
);
1404 /* The loop header is always executed. */
1405 reset_bb_predicate (loop
->header
);
1406 gcc_assert (bb_predicate_gimplified_stmts (loop
->header
) == NULL
1407 && bb_predicate_gimplified_stmts (loop
->latch
) == NULL
);
1410 /* Build region by adding loop pre-header and post-header blocks. */
1412 static vec
<basic_block
>
1413 build_region (class loop
*loop
)
1415 vec
<basic_block
> region
= vNULL
;
1416 basic_block exit_bb
= NULL
;
1418 gcc_assert (ifc_bbs
);
1419 /* The first element is loop pre-header. */
1420 region
.safe_push (loop_preheader_edge (loop
)->src
);
1422 for (unsigned int i
= 0; i
< loop
->num_nodes
; i
++)
1424 basic_block bb
= ifc_bbs
[i
];
1425 region
.safe_push (bb
);
1426 /* Find loop postheader. */
1429 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1430 if (loop_exit_edge_p (loop
, e
))
1436 /* The last element is loop post-header. */
1437 gcc_assert (exit_bb
);
1438 region
.safe_push (exit_bb
);
1442 /* Return true when LOOP is if-convertible. This is a helper function
1443 for if_convertible_loop_p. REFS and DDRS are initialized and freed
1444 in if_convertible_loop_p. */
1447 if_convertible_loop_p_1 (class loop
*loop
, vec
<data_reference_p
> *refs
)
1450 basic_block exit_bb
= NULL
;
1451 vec
<basic_block
> region
;
1453 calculate_dominance_info (CDI_DOMINATORS
);
1455 for (i
= 0; i
< loop
->num_nodes
; i
++)
1457 basic_block bb
= ifc_bbs
[i
];
1459 if (!if_convertible_bb_p (loop
, bb
, exit_bb
))
1462 if (bb_with_exit_edge_p (loop
, bb
))
1466 for (i
= 0; i
< loop
->num_nodes
; i
++)
1468 basic_block bb
= ifc_bbs
[i
];
1469 gimple_stmt_iterator gsi
;
1471 bool may_have_nonlocal_labels
1472 = bb_with_exit_edge_p (loop
, bb
) || bb
== loop
->latch
;
1473 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1474 switch (gimple_code (gsi_stmt (gsi
)))
1477 if (!may_have_nonlocal_labels
)
1480 = gimple_label_label (as_a
<glabel
*> (gsi_stmt (gsi
)));
1481 if (DECL_NONLOCAL (label
) || FORCED_LABEL (label
))
1489 gimple_set_uid (gsi_stmt (gsi
), 0);
1496 data_reference_p dr
;
1499 = new hash_map
<innermost_loop_behavior_hash
, data_reference_p
>;
1500 baseref_DR_map
= new hash_map
<tree_operand_hash
, data_reference_p
>;
1502 /* Compute post-dominator tree locally. */
1503 region
= build_region (loop
);
1504 calculate_dominance_info_for_region (CDI_POST_DOMINATORS
, region
);
1506 predicate_bbs (loop
);
1508 /* Free post-dominator tree since it is not used after predication. */
1509 free_dominance_info_for_region (cfun
, CDI_POST_DOMINATORS
, region
);
1512 for (i
= 0; refs
->iterate (i
, &dr
); i
++)
1514 tree ref
= DR_REF (dr
);
1516 dr
->aux
= XNEW (struct ifc_dr
);
1517 DR_BASE_W_UNCONDITIONALLY (dr
) = false;
1518 DR_RW_UNCONDITIONALLY (dr
) = false;
1519 DR_W_UNCONDITIONALLY (dr
) = false;
1520 IFC_DR (dr
)->rw_predicate
= boolean_false_node
;
1521 IFC_DR (dr
)->w_predicate
= boolean_false_node
;
1522 IFC_DR (dr
)->base_w_predicate
= boolean_false_node
;
1523 if (gimple_uid (DR_STMT (dr
)) == 0)
1524 gimple_set_uid (DR_STMT (dr
), i
+ 1);
1526 /* If DR doesn't have innermost loop behavior or it's a compound
1527 memory reference, we synthesize its innermost loop behavior
1529 if (TREE_CODE (ref
) == COMPONENT_REF
1530 || TREE_CODE (ref
) == IMAGPART_EXPR
1531 || TREE_CODE (ref
) == REALPART_EXPR
1532 || !(DR_BASE_ADDRESS (dr
) || DR_OFFSET (dr
)
1533 || DR_INIT (dr
) || DR_STEP (dr
)))
1535 while (TREE_CODE (ref
) == COMPONENT_REF
1536 || TREE_CODE (ref
) == IMAGPART_EXPR
1537 || TREE_CODE (ref
) == REALPART_EXPR
)
1538 ref
= TREE_OPERAND (ref
, 0);
1540 memset (&DR_INNERMOST (dr
), 0, sizeof (DR_INNERMOST (dr
)));
1541 DR_BASE_ADDRESS (dr
) = ref
;
1543 hash_memrefs_baserefs_and_store_DRs_read_written_info (dr
);
1546 for (i
= 0; i
< loop
->num_nodes
; i
++)
1548 basic_block bb
= ifc_bbs
[i
];
1549 gimple_stmt_iterator itr
;
1551 /* Check the if-convertibility of statements in predicated BBs. */
1552 if (!dominated_by_p (CDI_DOMINATORS
, loop
->latch
, bb
))
1553 for (itr
= gsi_start_bb (bb
); !gsi_end_p (itr
); gsi_next (&itr
))
1554 if (!if_convertible_stmt_p (gsi_stmt (itr
), *refs
))
1558 /* Checking PHIs needs to be done after stmts, as the fact whether there
1559 are any masked loads or stores affects the tests. */
1560 for (i
= 0; i
< loop
->num_nodes
; i
++)
1562 basic_block bb
= ifc_bbs
[i
];
1565 for (itr
= gsi_start_phis (bb
); !gsi_end_p (itr
); gsi_next (&itr
))
1566 if (!if_convertible_phi_p (loop
, bb
, itr
.phi ()))
1571 fprintf (dump_file
, "Applying if-conversion\n");
1576 /* Return true when LOOP is if-convertible.
1577 LOOP is if-convertible if:
1579 - it has two or more basic blocks,
1580 - it has only one exit,
1581 - loop header is not the exit edge,
1582 - if its basic blocks and phi nodes are if convertible. */
1585 if_convertible_loop_p (class loop
*loop
, vec
<data_reference_p
> *refs
)
1591 /* Handle only innermost loop. */
1592 if (!loop
|| loop
->inner
)
1594 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1595 fprintf (dump_file
, "not innermost loop\n");
1599 /* If only one block, no need for if-conversion. */
1600 if (loop
->num_nodes
<= 2)
1602 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1603 fprintf (dump_file
, "less than 2 basic blocks\n");
1607 /* More than one loop exit is too much to handle. */
1608 if (!single_exit (loop
))
1610 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1611 fprintf (dump_file
, "multiple exits\n");
1615 /* If one of the loop header's edge is an exit edge then do not
1616 apply if-conversion. */
1617 FOR_EACH_EDGE (e
, ei
, loop
->header
->succs
)
1618 if (loop_exit_edge_p (loop
, e
))
1621 res
= if_convertible_loop_p_1 (loop
, refs
);
1623 delete innermost_DR_map
;
1624 innermost_DR_map
= NULL
;
1626 delete baseref_DR_map
;
1627 baseref_DR_map
= NULL
;
1632 /* Return reduc_1 if has_nop.
1635 tmp1 = (unsigned type) reduc_1;
1637 reduc_3 = (signed type) tmp2. */
1639 strip_nop_cond_scalar_reduction (bool has_nop
, tree op
)
1644 if (TREE_CODE (op
) != SSA_NAME
)
1647 gassign
*stmt
= safe_dyn_cast
<gassign
*> (SSA_NAME_DEF_STMT (op
));
1649 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt
))
1650 || !tree_nop_conversion_p (TREE_TYPE (op
), TREE_TYPE
1651 (gimple_assign_rhs1 (stmt
))))
1654 return gimple_assign_rhs1 (stmt
);
1657 /* Returns true if def-stmt for phi argument ARG is simple increment/decrement
1658 which is in predicated basic block.
1659 In fact, the following PHI pattern is searching:
1661 reduc_1 = PHI <..., reduc_2>
1665 reduc_2 = PHI <reduc_1, reduc_3>
1667 ARG_0 and ARG_1 are correspondent PHI arguments.
1668 REDUC, OP0 and OP1 contain reduction stmt and its operands.
1669 EXTENDED is true if PHI has > 2 arguments. */
1672 is_cond_scalar_reduction (gimple
*phi
, gimple
**reduc
, tree arg_0
, tree arg_1
,
1673 tree
*op0
, tree
*op1
, bool extended
, bool* has_nop
,
1676 tree lhs
, r_op1
, r_op2
, r_nop1
, r_nop2
;
1678 gimple
*header_phi
= NULL
;
1679 enum tree_code reduction_op
;
1680 basic_block bb
= gimple_bb (phi
);
1681 class loop
*loop
= bb
->loop_father
;
1682 edge latch_e
= loop_latch_edge (loop
);
1683 imm_use_iterator imm_iter
;
1684 use_operand_p use_p
;
1687 bool result
= *has_nop
= false;
1688 if (TREE_CODE (arg_0
) != SSA_NAME
|| TREE_CODE (arg_1
) != SSA_NAME
)
1691 if (!extended
&& gimple_code (SSA_NAME_DEF_STMT (arg_0
)) == GIMPLE_PHI
)
1694 header_phi
= SSA_NAME_DEF_STMT (arg_0
);
1695 stmt
= SSA_NAME_DEF_STMT (arg_1
);
1697 else if (gimple_code (SSA_NAME_DEF_STMT (arg_1
)) == GIMPLE_PHI
)
1700 header_phi
= SSA_NAME_DEF_STMT (arg_1
);
1701 stmt
= SSA_NAME_DEF_STMT (arg_0
);
1705 if (gimple_bb (header_phi
) != loop
->header
)
1708 if (PHI_ARG_DEF_FROM_EDGE (header_phi
, latch_e
) != PHI_RESULT (phi
))
1711 if (gimple_code (stmt
) != GIMPLE_ASSIGN
1712 || gimple_has_volatile_ops (stmt
))
1715 if (!flow_bb_inside_loop_p (loop
, gimple_bb (stmt
)))
1718 if (!is_predicated (gimple_bb (stmt
)))
1721 /* Check that stmt-block is predecessor of phi-block. */
1722 FOR_EACH_EDGE (e
, ei
, gimple_bb (stmt
)->succs
)
1731 if (!has_single_use (lhs
))
1734 reduction_op
= gimple_assign_rhs_code (stmt
);
1736 /* Catch something like below
1739 reduc_1 = PHI <..., reduc_2>
1742 tmp1 = (unsigned type) reduc_1;
1744 reduc_3 = (signed type) tmp2;
1746 reduc_2 = PHI <reduc_1, reduc_3>
1750 reduc_2 = PHI <0, reduc_1>
1751 tmp1 = (unsigned type)reduc_1;
1752 ifcvt = cond_expr ? rhs2 : 0
1753 tmp2 = tmp1 +/- ifcvt;
1754 reduc_1 = (signed type)tmp2; */
1756 if (CONVERT_EXPR_CODE_P (reduction_op
))
1758 lhs
= gimple_assign_rhs1 (stmt
);
1759 if (TREE_CODE (lhs
) != SSA_NAME
1760 || !has_single_use (lhs
))
1764 stmt
= SSA_NAME_DEF_STMT (lhs
);
1765 if (gimple_bb (stmt
) != gimple_bb (*nop_reduc
)
1766 || !is_gimple_assign (stmt
))
1770 reduction_op
= gimple_assign_rhs_code (stmt
);
1773 if (reduction_op
!= PLUS_EXPR
1774 && reduction_op
!= MINUS_EXPR
1775 && reduction_op
!= MULT_EXPR
1776 && reduction_op
!= BIT_IOR_EXPR
1777 && reduction_op
!= BIT_XOR_EXPR
1778 && reduction_op
!= BIT_AND_EXPR
)
1780 r_op1
= gimple_assign_rhs1 (stmt
);
1781 r_op2
= gimple_assign_rhs2 (stmt
);
1783 r_nop1
= strip_nop_cond_scalar_reduction (*has_nop
, r_op1
);
1784 r_nop2
= strip_nop_cond_scalar_reduction (*has_nop
, r_op2
);
1786 /* Make R_OP1 to hold reduction variable. */
1787 if (r_nop2
== PHI_RESULT (header_phi
)
1788 && commutative_tree_code (reduction_op
))
1790 std::swap (r_op1
, r_op2
);
1791 std::swap (r_nop1
, r_nop2
);
1793 else if (r_nop1
!= PHI_RESULT (header_phi
))
1798 /* Check that R_NOP1 is used in nop_stmt or in PHI only. */
1799 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, r_nop1
)
1801 gimple
*use_stmt
= USE_STMT (use_p
);
1802 if (is_gimple_debug (use_stmt
))
1804 if (use_stmt
== SSA_NAME_DEF_STMT (r_op1
))
1806 if (use_stmt
!= phi
)
1811 /* Check that R_OP1 is used in reduction stmt or in PHI only. */
1812 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, r_op1
)
1814 gimple
*use_stmt
= USE_STMT (use_p
);
1815 if (is_gimple_debug (use_stmt
))
1817 if (use_stmt
== stmt
)
1819 if (gimple_code (use_stmt
) != GIMPLE_PHI
)
1823 *op0
= r_op1
; *op1
= r_op2
;
1828 /* Converts conditional scalar reduction into unconditional form, e.g.
1830 if (_5 != 0) goto bb_5 else goto bb_6
1836 # res_2 = PHI <res_13(4), res_6(5)>
1839 will be converted into sequence
1840 _ifc__1 = _5 != 0 ? 1 : 0;
1841 res_2 = res_13 + _ifc__1;
1842 Argument SWAP tells that arguments of conditional expression should be
1844 Returns rhs of resulting PHI assignment. */
1847 convert_scalar_cond_reduction (gimple
*reduc
, gimple_stmt_iterator
*gsi
,
1848 tree cond
, tree op0
, tree op1
, bool swap
,
1849 bool has_nop
, gimple
* nop_reduc
)
1851 gimple_stmt_iterator stmt_it
;
1854 tree rhs1
= gimple_assign_rhs1 (reduc
);
1855 tree tmp
= make_temp_ssa_name (TREE_TYPE (rhs1
), NULL
, "_ifc_");
1857 enum tree_code reduction_op
= gimple_assign_rhs_code (reduc
);
1858 tree op_nochange
= neutral_op_for_reduction (TREE_TYPE (rhs1
), reduction_op
, NULL
);
1859 gimple_seq stmts
= NULL
;
1861 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1863 fprintf (dump_file
, "Found cond scalar reduction.\n");
1864 print_gimple_stmt (dump_file
, reduc
, 0, TDF_SLIM
);
1867 /* Build cond expression using COND and constant operand
1868 of reduction rhs. */
1869 c
= fold_build_cond_expr (TREE_TYPE (rhs1
),
1870 unshare_expr (cond
),
1871 swap
? op_nochange
: op1
,
1872 swap
? op1
: op_nochange
);
1874 /* Create assignment stmt and insert it at GSI. */
1875 new_assign
= gimple_build_assign (tmp
, c
);
1876 gsi_insert_before (gsi
, new_assign
, GSI_SAME_STMT
);
1877 /* Build rhs for unconditional increment/decrement/logic_operation. */
1878 rhs
= gimple_build (&stmts
, reduction_op
,
1879 TREE_TYPE (rhs1
), op0
, tmp
);
1883 rhs
= gimple_convert (&stmts
,
1884 TREE_TYPE (gimple_assign_lhs (nop_reduc
)), rhs
);
1885 stmt_it
= gsi_for_stmt (nop_reduc
);
1886 gsi_remove (&stmt_it
, true);
1887 release_defs (nop_reduc
);
1889 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
1891 /* Delete original reduction stmt. */
1892 stmt_it
= gsi_for_stmt (reduc
);
1893 gsi_remove (&stmt_it
, true);
1894 release_defs (reduc
);
1898 /* Generate a simplified conditional. */
1901 gen_simplified_condition (tree cond
, scalar_cond_masked_set_type
&cond_set
)
1903 /* Check if the value is already live in a previous branch. This resolves
1904 nested conditionals from diamond PHI reductions. */
1905 if (TREE_CODE (cond
) == SSA_NAME
)
1907 gimple
*stmt
= SSA_NAME_DEF_STMT (cond
);
1908 gassign
*assign
= NULL
;
1909 if ((assign
= as_a
<gassign
*> (stmt
))
1910 && gimple_assign_rhs_code (assign
) == BIT_AND_EXPR
)
1912 tree arg1
= gimple_assign_rhs1 (assign
);
1913 tree arg2
= gimple_assign_rhs2 (assign
);
1914 if (cond_set
.contains ({ arg1
, 1 }))
1915 arg1
= boolean_true_node
;
1917 arg1
= gen_simplified_condition (arg1
, cond_set
);
1919 if (cond_set
.contains ({ arg2
, 1 }))
1920 arg2
= boolean_true_node
;
1922 arg2
= gen_simplified_condition (arg2
, cond_set
);
1924 cond
= fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
, arg1
, arg2
);
1930 /* Produce condition for all occurrences of ARG in PHI node. Set *INVERT
1931 as to whether the condition is inverted. */
1934 gen_phi_arg_condition (gphi
*phi
, vec
<int> *occur
, gimple_stmt_iterator
*gsi
,
1935 scalar_cond_masked_set_type
&cond_set
, bool *invert
)
1939 tree cond
= NULL_TREE
;
1944 len
= occur
->length ();
1945 gcc_assert (len
> 0);
1946 for (i
= 0; i
< len
; i
++)
1948 e
= gimple_phi_arg_edge (phi
, (*occur
)[i
]);
1949 c
= bb_predicate (e
->src
);
1950 if (is_true_predicate (c
))
1955 /* If we have just a single inverted predicate, signal that and
1956 instead invert the COND_EXPR arms. */
1957 if (len
== 1 && TREE_CODE (c
) == TRUTH_NOT_EXPR
)
1959 c
= TREE_OPERAND (c
, 0);
1963 c
= gen_simplified_condition (c
, cond_set
);
1964 c
= force_gimple_operand_gsi (gsi
, unshare_expr (c
),
1965 true, NULL_TREE
, true, GSI_SAME_STMT
);
1966 if (cond
!= NULL_TREE
)
1968 /* Must build OR expression. */
1969 cond
= fold_or_predicates (EXPR_LOCATION (c
), c
, cond
);
1970 cond
= force_gimple_operand_gsi (gsi
, unshare_expr (cond
), true,
1971 NULL_TREE
, true, GSI_SAME_STMT
);
1976 /* Register the new possibly simplified conditional. When more than 2
1977 entries in a phi node we chain entries in the false branch, so the
1978 inverted condition is active. */
1979 scalar_cond_masked_key
pred_cond ({ cond
, 1 });
1981 pred_cond
.inverted_p
= !pred_cond
.inverted_p
;
1982 cond_set
.add (pred_cond
);
1984 gcc_assert (cond
!= NULL_TREE
);
1988 /* Create the smallest nested conditional possible. On pre-order we record
1989 which conditionals are live, and on post-order rewrite the chain by removing
1990 already active conditions.
1992 As an example we simplify:
1996 _22 = a_10 < e_11(D);
1998 _ifc__42 = _23 ? t_13 : 0;
1999 t_6 = _7 ? 1 : _ifc__42
2004 _22 = a_10 < e_11(D);
2005 _ifc__42 = _22 ? t_13 : 0;
2006 t_6 = _7 ? 1 : _ifc__42;
2008 which produces better code. */
2011 gen_phi_nest_statement (gphi
*phi
, gimple_stmt_iterator
*gsi
,
2012 scalar_cond_masked_set_type
&cond_set
, tree type
,
2013 hash_map
<tree_operand_hash
, auto_vec
<int>> &phi_arg_map
,
2014 gimple
**res_stmt
, tree lhs0
, vec
<tree
> &args
,
2017 if (idx
== args
.length ())
2018 return args
[idx
- 1];
2020 vec
<int> *indexes
= phi_arg_map
.get (args
[idx
- 1]);
2022 tree cond
= gen_phi_arg_condition (phi
, indexes
, gsi
, cond_set
, &invert
);
2023 tree arg1
= gen_phi_nest_statement (phi
, gsi
, cond_set
, type
, phi_arg_map
,
2024 res_stmt
, lhs0
, args
, idx
+ 1);
2026 unsigned prev
= idx
;
2027 unsigned curr
= prev
- 1;
2028 tree arg0
= args
[curr
];
2031 lhs
= make_temp_ssa_name (type
, NULL
, "_ifc_");
2036 rhs
= fold_build_cond_expr (type
, unshare_expr (cond
),
2039 rhs
= fold_build_cond_expr (type
, unshare_expr (cond
),
2041 gassign
*new_stmt
= gimple_build_assign (lhs
, rhs
);
2042 gsi_insert_before (gsi
, new_stmt
, GSI_SAME_STMT
);
2043 update_stmt (new_stmt
);
2044 *res_stmt
= new_stmt
;
2048 /* Replace a scalar PHI node with a COND_EXPR using COND as condition.
2049 This routine can handle PHI nodes with more than two arguments.
2052 S1: A = PHI <x1(1), x2(5)>
2054 S2: A = cond ? x1 : x2;
2056 The generated code is inserted at GSI that points to the top of
2057 basic block's statement list.
2058 If PHI node has more than two arguments a chain of conditional
2059 expression is produced. */
2063 predicate_scalar_phi (gphi
*phi
, gimple_stmt_iterator
*gsi
)
2065 gimple
*new_stmt
= NULL
, *reduc
, *nop_reduc
;
2066 tree rhs
, res
, arg0
, arg1
, op0
, op1
, scev
;
2068 unsigned int index0
;
2074 res
= gimple_phi_result (phi
);
2075 if (virtual_operand_p (res
))
2078 if ((rhs
= degenerate_phi_result (phi
))
2079 || ((scev
= analyze_scalar_evolution (gimple_bb (phi
)->loop_father
,
2081 && !chrec_contains_undetermined (scev
)
2083 && (rhs
= gimple_phi_arg_def (phi
, 0))))
2085 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2087 fprintf (dump_file
, "Degenerate phi!\n");
2088 print_gimple_stmt (dump_file
, phi
, 0, TDF_SLIM
);
2090 new_stmt
= gimple_build_assign (res
, rhs
);
2091 gsi_insert_before (gsi
, new_stmt
, GSI_SAME_STMT
);
2092 update_stmt (new_stmt
);
2096 bb
= gimple_bb (phi
);
2097 /* Keep track of conditionals already seen. */
2098 scalar_cond_masked_set_type cond_set
;
2099 if (EDGE_COUNT (bb
->preds
) == 2)
2101 /* Predicate ordinary PHI node with 2 arguments. */
2102 edge first_edge
, second_edge
;
2103 basic_block true_bb
;
2104 first_edge
= EDGE_PRED (bb
, 0);
2105 second_edge
= EDGE_PRED (bb
, 1);
2106 cond
= bb_predicate (first_edge
->src
);
2107 cond_set
.add ({ cond
, 1 });
2108 if (TREE_CODE (cond
) == TRUTH_NOT_EXPR
)
2109 std::swap (first_edge
, second_edge
);
2110 if (EDGE_COUNT (first_edge
->src
->succs
) > 1)
2112 cond
= bb_predicate (second_edge
->src
);
2113 if (TREE_CODE (cond
) == TRUTH_NOT_EXPR
)
2114 cond
= TREE_OPERAND (cond
, 0);
2116 first_edge
= second_edge
;
2119 cond
= bb_predicate (first_edge
->src
);
2121 /* Gimplify the condition to a valid cond-expr conditonal operand. */
2122 cond
= gen_simplified_condition (cond
, cond_set
);
2123 cond
= force_gimple_operand_gsi (gsi
, unshare_expr (cond
), true,
2124 NULL_TREE
, true, GSI_SAME_STMT
);
2125 true_bb
= first_edge
->src
;
2126 if (EDGE_PRED (bb
, 1)->src
== true_bb
)
2128 arg0
= gimple_phi_arg_def (phi
, 1);
2129 arg1
= gimple_phi_arg_def (phi
, 0);
2133 arg0
= gimple_phi_arg_def (phi
, 0);
2134 arg1
= gimple_phi_arg_def (phi
, 1);
2136 if (is_cond_scalar_reduction (phi
, &reduc
, arg0
, arg1
,
2137 &op0
, &op1
, false, &has_nop
,
2140 /* Convert reduction stmt into vectorizable form. */
2141 rhs
= convert_scalar_cond_reduction (reduc
, gsi
, cond
, op0
, op1
,
2142 true_bb
!= gimple_bb (reduc
),
2143 has_nop
, nop_reduc
);
2144 redundant_ssa_names
.safe_push (std::make_pair (res
, rhs
));
2147 /* Build new RHS using selected condition and arguments. */
2148 rhs
= fold_build_cond_expr (TREE_TYPE (res
), unshare_expr (cond
),
2150 new_stmt
= gimple_build_assign (res
, rhs
);
2151 gsi_insert_before (gsi
, new_stmt
, GSI_SAME_STMT
);
2152 gimple_stmt_iterator new_gsi
= gsi_for_stmt (new_stmt
);
2153 if (fold_stmt (&new_gsi
, follow_all_ssa_edges
))
2155 new_stmt
= gsi_stmt (new_gsi
);
2156 update_stmt (new_stmt
);
2159 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2161 fprintf (dump_file
, "new phi replacement stmt\n");
2162 print_gimple_stmt (dump_file
, new_stmt
, 0, TDF_SLIM
);
2167 /* Create hashmap for PHI node which contain vector of argument indexes
2168 having the same value. */
2170 hash_map
<tree_operand_hash
, auto_vec
<int> > phi_arg_map
;
2171 unsigned int num_args
= gimple_phi_num_args (phi
);
2172 /* Vector of different PHI argument values. */
2173 auto_vec
<tree
> args (num_args
);
2175 /* Compute phi_arg_map. */
2176 for (i
= 0; i
< num_args
; i
++)
2180 arg
= gimple_phi_arg_def (phi
, i
);
2181 if (!phi_arg_map
.get (arg
))
2182 args
.quick_push (arg
);
2183 phi_arg_map
.get_or_insert (arg
).safe_push (i
);
2186 /* Determine element with max number of occurrences and complexity. Looking at only
2187 number of occurrences as a measure for complexity isn't enough as all usages can
2188 be unique but the comparisons to reach the PHI node differ per branch. */
2189 typedef std::pair
<tree
, std::pair
<unsigned, unsigned>> ArgEntry
;
2190 auto_vec
<ArgEntry
> argsKV
;
2191 for (i
= 0; i
< args
.length (); i
++)
2193 unsigned int len
= 0;
2194 for (int index
: phi_arg_map
.get (args
[i
]))
2196 edge e
= gimple_phi_arg_edge (phi
, index
);
2197 len
+= get_bb_num_predicate_stmts (e
->src
);
2200 unsigned occur
= phi_arg_map
.get (args
[i
])->length ();
2201 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2202 fprintf (dump_file
, "Ranking %d as len=%d, idx=%d\n", i
, len
, occur
);
2203 argsKV
.safe_push ({ args
[i
], { len
, occur
}});
2206 /* Sort elements based on rankings ARGS. */
2207 std::sort(argsKV
.begin(), argsKV
.end(), [](const ArgEntry
&left
,
2208 const ArgEntry
&right
) {
2209 return left
.second
< right
.second
;
2212 for (i
= 0; i
< args
.length (); i
++)
2213 args
[i
] = argsKV
[i
].first
;
2215 /* Handle one special case when number of arguments with different values
2216 is equal 2 and one argument has the only occurrence. Such PHI can be
2217 handled as if would have only 2 arguments. */
2218 if (args
.length () == 2 && phi_arg_map
.get (args
[0])->length () == 1)
2221 indexes
= phi_arg_map
.get (args
[0]);
2222 index0
= (*indexes
)[0];
2225 e
= gimple_phi_arg_edge (phi
, index0
);
2226 cond
= bb_predicate (e
->src
);
2227 if (TREE_CODE (cond
) == TRUTH_NOT_EXPR
)
2230 cond
= TREE_OPERAND (cond
, 0);
2232 /* Gimplify the condition to a valid cond-expr conditonal operand. */
2233 cond
= force_gimple_operand_gsi (gsi
, unshare_expr (cond
), true,
2234 NULL_TREE
, true, GSI_SAME_STMT
);
2235 if (!(is_cond_scalar_reduction (phi
, &reduc
, arg0
, arg1
,
2236 &op0
, &op1
, true, &has_nop
, &nop_reduc
)))
2237 rhs
= fold_build_cond_expr (TREE_TYPE (res
), unshare_expr (cond
),
2242 /* Convert reduction stmt into vectorizable form. */
2243 rhs
= convert_scalar_cond_reduction (reduc
, gsi
, cond
, op0
, op1
,
2244 swap
,has_nop
, nop_reduc
);
2245 redundant_ssa_names
.safe_push (std::make_pair (res
, rhs
));
2247 new_stmt
= gimple_build_assign (res
, rhs
);
2248 gsi_insert_before (gsi
, new_stmt
, GSI_SAME_STMT
);
2249 update_stmt (new_stmt
);
2254 tree type
= TREE_TYPE (gimple_phi_result (phi
));
2255 gen_phi_nest_statement (phi
, gsi
, cond_set
, type
, phi_arg_map
,
2256 &new_stmt
, res
, args
, 1);
2259 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2261 fprintf (dump_file
, "new extended phi replacement stmt\n");
2262 print_gimple_stmt (dump_file
, new_stmt
, 0, TDF_SLIM
);
2266 /* Replaces in LOOP all the scalar phi nodes other than those in the
2267 LOOP->header block with conditional modify expressions. */
2270 predicate_all_scalar_phis (class loop
*loop
)
2273 unsigned int orig_loop_num_nodes
= loop
->num_nodes
;
2276 for (i
= 1; i
< orig_loop_num_nodes
; i
++)
2279 gimple_stmt_iterator gsi
;
2280 gphi_iterator phi_gsi
;
2283 if (bb
== loop
->header
)
2286 phi_gsi
= gsi_start_phis (bb
);
2287 if (gsi_end_p (phi_gsi
))
2290 gsi
= gsi_after_labels (bb
);
2291 while (!gsi_end_p (phi_gsi
))
2293 phi
= phi_gsi
.phi ();
2294 if (virtual_operand_p (gimple_phi_result (phi
)))
2295 gsi_next (&phi_gsi
);
2298 predicate_scalar_phi (phi
, &gsi
);
2299 remove_phi_node (&phi_gsi
, false);
2305 /* Insert in each basic block of LOOP the statements produced by the
2306 gimplification of the predicates. */
2309 insert_gimplified_predicates (loop_p loop
)
2313 for (i
= 0; i
< loop
->num_nodes
; i
++)
2315 basic_block bb
= ifc_bbs
[i
];
2317 if (!is_predicated (bb
))
2318 gcc_assert (bb_predicate_gimplified_stmts (bb
) == NULL
);
2319 if (!is_predicated (bb
))
2321 /* Do not insert statements for a basic block that is not
2322 predicated. Also make sure that the predicate of the
2323 basic block is set to true. */
2324 reset_bb_predicate (bb
);
2328 stmts
= bb_predicate_gimplified_stmts (bb
);
2331 if (need_to_predicate
)
2333 /* Insert the predicate of the BB just after the label,
2334 as the if-conversion of memory writes will use this
2336 gimple_stmt_iterator gsi
= gsi_after_labels (bb
);
2337 gsi_insert_seq_before (&gsi
, stmts
, GSI_SAME_STMT
);
2341 /* Insert the predicate of the BB at the end of the BB
2342 as this would reduce the register pressure: the only
2343 use of this predicate will be in successor BBs. */
2344 gimple_stmt_iterator gsi
= gsi_last_bb (bb
);
2347 || stmt_ends_bb_p (gsi_stmt (gsi
)))
2348 gsi_insert_seq_before (&gsi
, stmts
, GSI_SAME_STMT
);
2350 gsi_insert_seq_after (&gsi
, stmts
, GSI_SAME_STMT
);
2353 /* Once the sequence is code generated, set it to NULL. */
2354 set_bb_predicate_gimplified_stmts (bb
, NULL
, true);
2359 /* Helper function for predicate_statements. Returns index of existent
2360 mask if it was created for given SIZE and -1 otherwise. */
2363 mask_exists (int size
, const vec
<int> &vec
)
2367 FOR_EACH_VEC_ELT (vec
, ix
, v
)
2373 /* Helper function for predicate_statements. STMT is a memory read or
2374 write and it needs to be predicated by MASK. Return a statement
2378 predicate_load_or_store (gimple_stmt_iterator
*gsi
, gassign
*stmt
, tree mask
)
2382 tree lhs
= gimple_assign_lhs (stmt
);
2383 tree rhs
= gimple_assign_rhs1 (stmt
);
2384 tree ref
= TREE_CODE (lhs
) == SSA_NAME
? rhs
: lhs
;
2385 mark_addressable (ref
);
2386 tree addr
= force_gimple_operand_gsi (gsi
, build_fold_addr_expr (ref
),
2387 true, NULL_TREE
, true, GSI_SAME_STMT
);
2388 tree ptr
= build_int_cst (reference_alias_ptr_type (ref
),
2389 get_object_alignment (ref
));
2390 /* Copy points-to info if possible. */
2391 if (TREE_CODE (addr
) == SSA_NAME
&& !SSA_NAME_PTR_INFO (addr
))
2392 copy_ref_info (build2 (MEM_REF
, TREE_TYPE (ref
), addr
, ptr
),
2394 if (TREE_CODE (lhs
) == SSA_NAME
)
2397 = gimple_build_call_internal (IFN_MASK_LOAD
, 3, addr
,
2399 gimple_call_set_lhs (new_stmt
, lhs
);
2400 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
2405 = gimple_build_call_internal (IFN_MASK_STORE
, 4, addr
, ptr
,
2407 gimple_move_vops (new_stmt
, stmt
);
2409 gimple_call_set_nothrow (new_stmt
, true);
2413 /* STMT uses OP_LHS. Check whether it is equivalent to:
2415 ... = OP_MASK ? OP_LHS : X;
2417 Return X if so, otherwise return null. OP_MASK is an SSA_NAME that is
2418 known to have value OP_COND. */
2421 check_redundant_cond_expr (gimple
*stmt
, tree op_mask
, tree op_cond
,
2424 gassign
*assign
= dyn_cast
<gassign
*> (stmt
);
2425 if (!assign
|| gimple_assign_rhs_code (assign
) != COND_EXPR
)
2428 tree use_cond
= gimple_assign_rhs1 (assign
);
2429 tree if_true
= gimple_assign_rhs2 (assign
);
2430 tree if_false
= gimple_assign_rhs3 (assign
);
2432 if ((use_cond
== op_mask
|| operand_equal_p (use_cond
, op_cond
, 0))
2433 && if_true
== op_lhs
)
2436 if (inverse_conditions_p (use_cond
, op_cond
) && if_false
== op_lhs
)
2442 /* Return true if VALUE is available for use at STMT. SSA_NAMES is
2443 the set of SSA names defined earlier in STMT's block. */
2446 value_available_p (gimple
*stmt
, hash_set
<tree_ssa_name_hash
> *ssa_names
,
2449 if (is_gimple_min_invariant (value
))
2452 if (TREE_CODE (value
) == SSA_NAME
)
2454 if (SSA_NAME_IS_DEFAULT_DEF (value
))
2457 basic_block def_bb
= gimple_bb (SSA_NAME_DEF_STMT (value
));
2458 basic_block use_bb
= gimple_bb (stmt
);
2459 return (def_bb
== use_bb
2460 ? ssa_names
->contains (value
)
2461 : dominated_by_p (CDI_DOMINATORS
, use_bb
, def_bb
));
2467 /* Helper function for predicate_statements. STMT is a potentially-trapping
2468 arithmetic operation that needs to be predicated by MASK, an SSA_NAME that
2469 has value COND. Return a statement that does so. SSA_NAMES is the set of
2470 SSA names defined earlier in STMT's block. */
2473 predicate_rhs_code (gassign
*stmt
, tree mask
, tree cond
,
2474 hash_set
<tree_ssa_name_hash
> *ssa_names
)
2476 tree lhs
= gimple_assign_lhs (stmt
);
2477 tree_code code
= gimple_assign_rhs_code (stmt
);
2478 unsigned int nops
= gimple_num_ops (stmt
);
2479 internal_fn cond_fn
= get_conditional_internal_fn (code
);
2481 /* Construct the arguments to the conditional internal function. */
2482 auto_vec
<tree
, 8> args
;
2483 args
.safe_grow (nops
+ 1, true);
2485 for (unsigned int i
= 1; i
< nops
; ++i
)
2486 args
[i
] = gimple_op (stmt
, i
);
2487 args
[nops
] = NULL_TREE
;
2489 /* Look for uses of the result to see whether they are COND_EXPRs that can
2490 be folded into the conditional call. */
2491 imm_use_iterator imm_iter
;
2493 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, lhs
)
2495 tree new_else
= check_redundant_cond_expr (use_stmt
, mask
, cond
, lhs
);
2496 if (new_else
&& value_available_p (stmt
, ssa_names
, new_else
))
2499 args
[nops
] = new_else
;
2500 if (operand_equal_p (new_else
, args
[nops
], 0))
2504 LHS = IFN_COND (MASK, ..., ELSE);
2505 X = MASK ? LHS : ELSE;
2507 which makes X equivalent to LHS. */
2508 tree use_lhs
= gimple_assign_lhs (use_stmt
);
2509 redundant_ssa_names
.safe_push (std::make_pair (use_lhs
, lhs
));
2514 args
[nops
] = targetm
.preferred_else_value (cond_fn
, TREE_TYPE (lhs
),
2515 nops
- 1, &args
[1]);
2517 /* Create and insert the call. */
2518 gcall
*new_stmt
= gimple_build_call_internal_vec (cond_fn
, args
);
2519 gimple_call_set_lhs (new_stmt
, lhs
);
2520 gimple_call_set_nothrow (new_stmt
, true);
2525 /* Predicate each write to memory in LOOP.
2527 This function transforms control flow constructs containing memory
2530 | for (i = 0; i < N; i++)
2534 into the following form that does not contain control flow:
2536 | for (i = 0; i < N; i++)
2537 | A[i] = cond ? expr : A[i];
2539 The original CFG looks like this:
2546 | if (i < N) goto bb_5 else goto bb_2
2550 | cond = some_computation;
2551 | if (cond) goto bb_3 else goto bb_4
2563 insert_gimplified_predicates inserts the computation of the COND
2564 expression at the beginning of the destination basic block:
2571 | if (i < N) goto bb_5 else goto bb_2
2575 | cond = some_computation;
2576 | if (cond) goto bb_3 else goto bb_4
2580 | cond = some_computation;
2589 predicate_statements is then predicating the memory write as follows:
2596 | if (i < N) goto bb_5 else goto bb_2
2600 | if (cond) goto bb_3 else goto bb_4
2604 | cond = some_computation;
2605 | A[i] = cond ? expr : A[i];
2613 and finally combine_blocks removes the basic block boundaries making
2614 the loop vectorizable:
2618 | if (i < N) goto bb_5 else goto bb_1
2622 | cond = some_computation;
2623 | A[i] = cond ? expr : A[i];
2624 | if (i < N) goto bb_5 else goto bb_4
2633 predicate_statements (loop_p loop
)
2635 unsigned int i
, orig_loop_num_nodes
= loop
->num_nodes
;
2636 auto_vec
<int, 1> vect_sizes
;
2637 auto_vec
<tree
, 1> vect_masks
;
2638 hash_set
<tree_ssa_name_hash
> ssa_names
;
2640 for (i
= 1; i
< orig_loop_num_nodes
; i
++)
2642 gimple_stmt_iterator gsi
;
2643 basic_block bb
= ifc_bbs
[i
];
2644 tree cond
= bb_predicate (bb
);
2648 if (is_true_predicate (cond
))
2652 if (TREE_CODE (cond
) == TRUTH_NOT_EXPR
)
2655 cond
= TREE_OPERAND (cond
, 0);
2658 vect_sizes
.truncate (0);
2659 vect_masks
.truncate (0);
2661 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);)
2663 gassign
*stmt
= dyn_cast
<gassign
*> (gsi_stmt (gsi
));
2667 else if (is_false_predicate (cond
)
2668 && gimple_vdef (stmt
))
2670 unlink_stmt_vdef (stmt
);
2671 gsi_remove (&gsi
, true);
2672 release_defs (stmt
);
2675 else if (gimple_plf (stmt
, GF_PLF_2
)
2676 && is_gimple_assign (stmt
))
2678 tree lhs
= gimple_assign_lhs (stmt
);
2681 gimple_seq stmts
= NULL
;
2682 machine_mode mode
= TYPE_MODE (TREE_TYPE (lhs
));
2683 /* We checked before setting GF_PLF_2 that an equivalent
2684 integer mode exists. */
2685 int bitsize
= GET_MODE_BITSIZE (mode
).to_constant ();
2686 if (!vect_sizes
.is_empty ()
2687 && (index
= mask_exists (bitsize
, vect_sizes
)) != -1)
2688 /* Use created mask. */
2689 mask
= vect_masks
[index
];
2692 if (COMPARISON_CLASS_P (cond
))
2693 mask
= gimple_build (&stmts
, TREE_CODE (cond
),
2695 TREE_OPERAND (cond
, 0),
2696 TREE_OPERAND (cond
, 1));
2703 = constant_boolean_node (true, TREE_TYPE (mask
));
2704 mask
= gimple_build (&stmts
, BIT_XOR_EXPR
,
2705 TREE_TYPE (mask
), mask
, true_val
);
2707 gsi_insert_seq_before (&gsi
, stmts
, GSI_SAME_STMT
);
2709 /* Save mask and its size for further use. */
2710 vect_sizes
.safe_push (bitsize
);
2711 vect_masks
.safe_push (mask
);
2713 if (gimple_assign_single_p (stmt
))
2714 new_stmt
= predicate_load_or_store (&gsi
, stmt
, mask
);
2716 new_stmt
= predicate_rhs_code (stmt
, mask
, cond
, &ssa_names
);
2718 gsi_replace (&gsi
, new_stmt
, true);
2720 else if (((lhs
= gimple_assign_lhs (stmt
)), true)
2721 && (INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
2722 || POINTER_TYPE_P (TREE_TYPE (lhs
)))
2723 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs
))
2724 && arith_code_with_undefined_signed_overflow
2725 (gimple_assign_rhs_code (stmt
)))
2727 gsi_remove (&gsi
, true);
2728 gimple_seq stmts
= rewrite_to_defined_overflow (stmt
);
2730 for (gimple_stmt_iterator gsi2
= gsi_start (stmts
);
2733 gassign
*stmt2
= as_a
<gassign
*> (gsi_stmt (gsi2
));
2734 gsi_remove (&gsi2
, false);
2737 gsi_insert_before (&gsi
, stmt2
, GSI_NEW_STMT
);
2741 gsi_insert_after (&gsi
, stmt2
, GSI_NEW_STMT
);
2744 else if (gimple_vdef (stmt
))
2746 tree lhs
= gimple_assign_lhs (stmt
);
2747 tree rhs
= gimple_assign_rhs1 (stmt
);
2748 tree type
= TREE_TYPE (lhs
);
2750 lhs
= ifc_temp_var (type
, unshare_expr (lhs
), &gsi
);
2751 rhs
= ifc_temp_var (type
, unshare_expr (rhs
), &gsi
);
2753 std::swap (lhs
, rhs
);
2754 cond
= force_gimple_operand_gsi (&gsi
, unshare_expr (cond
), true,
2755 NULL_TREE
, true, GSI_SAME_STMT
);
2756 rhs
= fold_build_cond_expr (type
, unshare_expr (cond
), rhs
, lhs
);
2757 gimple_assign_set_rhs1 (stmt
, ifc_temp_var (type
, rhs
, &gsi
));
2761 if (gimple_plf (gsi_stmt (gsi
), GF_PLF_2
)
2762 && is_gimple_call (gsi_stmt (gsi
)))
2764 /* Convert functions that have a SIMD clone to IFN_MASK_CALL.
2765 This will cause the vectorizer to match the "in branch"
2766 clone variants, and serves to build the mask vector
2767 in a natural way. */
2768 gcall
*call
= dyn_cast
<gcall
*> (gsi_stmt (gsi
));
2769 tree orig_fn
= gimple_call_fn (call
);
2770 int orig_nargs
= gimple_call_num_args (call
);
2771 auto_vec
<tree
> args
;
2772 args
.safe_push (orig_fn
);
2773 for (int i
= 0; i
< orig_nargs
; i
++)
2774 args
.safe_push (gimple_call_arg (call
, i
));
2775 args
.safe_push (cond
);
2777 /* Replace the call with a IFN_MASK_CALL that has the extra
2778 condition parameter. */
2779 gcall
*new_call
= gimple_build_call_internal_vec (IFN_MASK_CALL
,
2781 gimple_call_set_lhs (new_call
, gimple_call_lhs (call
));
2782 gsi_replace (&gsi
, new_call
, true);
2785 lhs
= gimple_get_lhs (gsi_stmt (gsi
));
2786 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
2787 ssa_names
.add (lhs
);
2794 /* Remove all GIMPLE_CONDs and GIMPLE_LABELs of all the basic blocks
2795 other than the exit and latch of the LOOP. Also resets the
2796 GIMPLE_DEBUG information. */
2799 remove_conditions_and_labels (loop_p loop
)
2801 gimple_stmt_iterator gsi
;
2804 for (i
= 0; i
< loop
->num_nodes
; i
++)
2806 basic_block bb
= ifc_bbs
[i
];
2808 if (bb_with_exit_edge_p (loop
, bb
)
2809 || bb
== loop
->latch
)
2812 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); )
2813 switch (gimple_code (gsi_stmt (gsi
)))
2817 gsi_remove (&gsi
, true);
2821 /* ??? Should there be conditional GIMPLE_DEBUG_BINDs? */
2822 if (gimple_debug_bind_p (gsi_stmt (gsi
)))
2824 gimple_debug_bind_reset_value (gsi_stmt (gsi
));
2825 update_stmt (gsi_stmt (gsi
));
2836 /* Combine all the basic blocks from LOOP into one or two super basic
2837 blocks. Replace PHI nodes with conditional modify expressions. */
2840 combine_blocks (class loop
*loop
)
2842 basic_block bb
, exit_bb
, merge_target_bb
;
2843 unsigned int orig_loop_num_nodes
= loop
->num_nodes
;
2848 remove_conditions_and_labels (loop
);
2849 insert_gimplified_predicates (loop
);
2850 predicate_all_scalar_phis (loop
);
2852 if (need_to_predicate
|| need_to_rewrite_undefined
)
2853 predicate_statements (loop
);
2855 /* Merge basic blocks. */
2857 bool *predicated
= XNEWVEC (bool, orig_loop_num_nodes
);
2858 for (i
= 0; i
< orig_loop_num_nodes
; i
++)
2861 predicated
[i
] = !is_true_predicate (bb_predicate (bb
));
2862 free_bb_predicate (bb
);
2863 if (bb_with_exit_edge_p (loop
, bb
))
2865 gcc_assert (exit_bb
== NULL
);
2869 gcc_assert (exit_bb
!= loop
->latch
);
2871 merge_target_bb
= loop
->header
;
2873 /* Get at the virtual def valid for uses starting at the first block
2874 we merge into the header. Without a virtual PHI the loop has the
2875 same virtual use on all stmts. */
2876 gphi
*vphi
= get_virtual_phi (loop
->header
);
2877 tree last_vdef
= NULL_TREE
;
2880 last_vdef
= gimple_phi_result (vphi
);
2881 for (gimple_stmt_iterator gsi
= gsi_start_bb (loop
->header
);
2882 ! gsi_end_p (gsi
); gsi_next (&gsi
))
2883 if (gimple_vdef (gsi_stmt (gsi
)))
2884 last_vdef
= gimple_vdef (gsi_stmt (gsi
));
2886 for (i
= 1; i
< orig_loop_num_nodes
; i
++)
2888 gimple_stmt_iterator gsi
;
2889 gimple_stmt_iterator last
;
2893 if (bb
== exit_bb
|| bb
== loop
->latch
)
2896 /* We release virtual PHIs late because we have to propagate them
2897 out using the current VUSE. The def might be the one used
2899 vphi
= get_virtual_phi (bb
);
2902 /* When there's just loads inside the loop a stray virtual
2903 PHI merging the uses can appear, update last_vdef from
2906 last_vdef
= gimple_phi_arg_def (vphi
, 0);
2907 imm_use_iterator iter
;
2908 use_operand_p use_p
;
2910 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, gimple_phi_result (vphi
))
2912 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
2913 SET_USE (use_p
, last_vdef
);
2915 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (vphi
)))
2916 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (last_vdef
) = 1;
2917 gsi
= gsi_for_stmt (vphi
);
2918 remove_phi_node (&gsi
, true);
2921 /* Make stmts member of loop->header and clear range info from all stmts
2922 in BB which is now no longer executed conditional on a predicate we
2923 could have derived it from. */
2924 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2926 gimple
*stmt
= gsi_stmt (gsi
);
2927 gimple_set_bb (stmt
, merge_target_bb
);
2928 /* Update virtual operands. */
2931 use_operand_p use_p
= ssa_vuse_operand (stmt
);
2933 && USE_FROM_PTR (use_p
) != last_vdef
)
2934 SET_USE (use_p
, last_vdef
);
2935 if (gimple_vdef (stmt
))
2936 last_vdef
= gimple_vdef (stmt
);
2939 /* If this is the first load we arrive at update last_vdef
2940 so we handle stray PHIs correctly. */
2941 last_vdef
= gimple_vuse (stmt
);
2946 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_DEF
)
2947 reset_flow_sensitive_info (op
);
2951 /* Update stmt list. */
2952 last
= gsi_last_bb (merge_target_bb
);
2953 gsi_insert_seq_after_without_update (&last
, bb_seq (bb
), GSI_NEW_STMT
);
2954 set_bb_seq (bb
, NULL
);
2957 /* Fixup virtual operands in the exit block. */
2959 && exit_bb
!= loop
->header
)
2961 /* We release virtual PHIs late because we have to propagate them
2962 out using the current VUSE. The def might be the one used
2964 vphi
= get_virtual_phi (exit_bb
);
2967 /* When there's just loads inside the loop a stray virtual
2968 PHI merging the uses can appear, update last_vdef from
2971 last_vdef
= gimple_phi_arg_def (vphi
, 0);
2972 imm_use_iterator iter
;
2973 use_operand_p use_p
;
2975 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, gimple_phi_result (vphi
))
2977 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
2978 SET_USE (use_p
, last_vdef
);
2980 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (vphi
)))
2981 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (last_vdef
) = 1;
2982 gimple_stmt_iterator gsi
= gsi_for_stmt (vphi
);
2983 remove_phi_node (&gsi
, true);
2987 /* Now remove all the edges in the loop, except for those from the exit
2988 block and delete the blocks we elided. */
2989 for (i
= 1; i
< orig_loop_num_nodes
; i
++)
2993 for (ei
= ei_start (bb
->preds
); (e
= ei_safe_edge (ei
));)
2995 if (e
->src
== exit_bb
)
3001 for (i
= 1; i
< orig_loop_num_nodes
; i
++)
3005 if (bb
== exit_bb
|| bb
== loop
->latch
)
3008 delete_basic_block (bb
);
3011 /* Re-connect the exit block. */
3012 if (exit_bb
!= NULL
)
3014 if (exit_bb
!= loop
->header
)
3016 /* Connect this node to loop header. */
3017 make_single_succ_edge (loop
->header
, exit_bb
, EDGE_FALLTHRU
);
3018 set_immediate_dominator (CDI_DOMINATORS
, exit_bb
, loop
->header
);
3021 /* Redirect non-exit edges to loop->latch. */
3022 FOR_EACH_EDGE (e
, ei
, exit_bb
->succs
)
3024 if (!loop_exit_edge_p (loop
, e
))
3025 redirect_edge_and_branch (e
, loop
->latch
);
3027 set_immediate_dominator (CDI_DOMINATORS
, loop
->latch
, exit_bb
);
3031 /* If the loop does not have an exit, reconnect header and latch. */
3032 make_edge (loop
->header
, loop
->latch
, EDGE_FALLTHRU
);
3033 set_immediate_dominator (CDI_DOMINATORS
, loop
->latch
, loop
->header
);
3036 /* If possible, merge loop header to the block with the exit edge.
3037 This reduces the number of basic blocks to two, to please the
3038 vectorizer that handles only loops with two nodes. */
3040 && exit_bb
!= loop
->header
)
3042 if (can_merge_blocks_p (loop
->header
, exit_bb
))
3043 merge_blocks (loop
->header
, exit_bb
);
3051 /* Version LOOP before if-converting it; the original loop
3052 will be if-converted, the new copy of the loop will not,
3053 and the LOOP_VECTORIZED internal call will be guarding which
3054 loop to execute. The vectorizer pass will fold this
3055 internal call into either true or false.
3057 Note that this function intentionally invalidates profile. Both edges
3058 out of LOOP_VECTORIZED must have 100% probability so the profile remains
3059 consistent after the condition is folded in the vectorizer. */
3062 version_loop_for_if_conversion (class loop
*loop
, vec
<gimple
*> *preds
)
3064 basic_block cond_bb
;
3065 tree cond
= make_ssa_name (boolean_type_node
);
3066 class loop
*new_loop
;
3068 gimple_stmt_iterator gsi
;
3069 unsigned int save_length
= 0;
3071 g
= gimple_build_call_internal (IFN_LOOP_VECTORIZED
, 2,
3072 build_int_cst (integer_type_node
, loop
->num
),
3074 gimple_call_set_lhs (g
, cond
);
3076 void **saved_preds
= NULL
;
3077 if (any_complicated_phi
|| need_to_predicate
)
3079 /* Save BB->aux around loop_version as that uses the same field. */
3080 save_length
= loop
->inner
? loop
->inner
->num_nodes
: loop
->num_nodes
;
3081 saved_preds
= XALLOCAVEC (void *, save_length
);
3082 for (unsigned i
= 0; i
< save_length
; i
++)
3083 saved_preds
[i
] = ifc_bbs
[i
]->aux
;
3086 initialize_original_copy_tables ();
3087 /* At this point we invalidate porfile confistency until IFN_LOOP_VECTORIZED
3088 is re-merged in the vectorizer. */
3089 new_loop
= loop_version (loop
, cond
, &cond_bb
,
3090 profile_probability::always (),
3091 profile_probability::always (),
3092 profile_probability::always (),
3093 profile_probability::always (), true);
3094 free_original_copy_tables ();
3096 if (any_complicated_phi
|| need_to_predicate
)
3097 for (unsigned i
= 0; i
< save_length
; i
++)
3098 ifc_bbs
[i
]->aux
= saved_preds
[i
];
3100 if (new_loop
== NULL
)
3103 new_loop
->dont_vectorize
= true;
3104 new_loop
->force_vectorize
= false;
3105 gsi
= gsi_last_bb (cond_bb
);
3106 gimple_call_set_arg (g
, 1, build_int_cst (integer_type_node
, new_loop
->num
));
3108 preds
->safe_push (g
);
3109 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3110 update_ssa (TODO_update_ssa_no_phi
);
3114 /* Return true when LOOP satisfies the follow conditions that will
3115 allow it to be recognized by the vectorizer for outer-loop
3117 - The loop is not the root node of the loop tree.
3118 - The loop has exactly one inner loop.
3119 - The loop has a single exit.
3120 - The loop header has a single successor, which is the inner
3122 - Each of the inner and outer loop latches have a single
3124 - The loop exit block has a single predecessor, which is the
3125 inner loop's exit block. */
3128 versionable_outer_loop_p (class loop
*loop
)
3130 if (!loop_outer (loop
)
3131 || loop
->dont_vectorize
3133 || loop
->inner
->next
3134 || !single_exit (loop
)
3135 || !single_succ_p (loop
->header
)
3136 || single_succ (loop
->header
) != loop
->inner
->header
3137 || !single_pred_p (loop
->latch
)
3138 || !single_pred_p (loop
->inner
->latch
))
3141 basic_block outer_exit
= single_pred (loop
->latch
);
3142 basic_block inner_exit
= single_pred (loop
->inner
->latch
);
3144 if (!single_pred_p (outer_exit
) || single_pred (outer_exit
) != inner_exit
)
3148 fprintf (dump_file
, "Found vectorizable outer loop for versioning\n");
3153 /* Performs splitting of critical edges. Skip splitting and return false
3154 if LOOP will not be converted because:
3156 - LOOP is not well formed.
3157 - LOOP has PHI with more than MAX_PHI_ARG_NUM arguments.
3159 Last restriction is valid only if AGGRESSIVE_IF_CONV is false. */
3162 ifcvt_split_critical_edges (class loop
*loop
, bool aggressive_if_conv
)
3166 unsigned int num
= loop
->num_nodes
;
3170 auto_vec
<edge
> critical_edges
;
3172 /* Loop is not well formed. */
3176 body
= get_loop_body (loop
);
3177 for (i
= 0; i
< num
; i
++)
3180 if (!aggressive_if_conv
3182 && EDGE_COUNT (bb
->preds
) > MAX_PHI_ARG_NUM
)
3184 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3186 "BB %d has complicated PHI with more than %u args.\n",
3187 bb
->index
, MAX_PHI_ARG_NUM
);
3192 if (bb
== loop
->latch
|| bb_with_exit_edge_p (loop
, bb
))
3195 /* Skip basic blocks not ending with conditional branch. */
3196 if (!safe_is_a
<gcond
*> (*gsi_last_bb (bb
)))
3199 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
3200 if (EDGE_CRITICAL_P (e
) && e
->dest
->loop_father
== loop
)
3201 critical_edges
.safe_push (e
);
3205 while (critical_edges
.length () > 0)
3207 e
= critical_edges
.pop ();
3208 /* Don't split if bb can be predicated along non-critical edge. */
3209 if (EDGE_COUNT (e
->dest
->preds
) > 2 || all_preds_critical_p (e
->dest
))
3216 /* Delete redundant statements produced by predication which prevents
3217 loop vectorization. */
3220 ifcvt_local_dce (class loop
*loop
)
3225 gimple_stmt_iterator gsi
;
3226 auto_vec
<gimple
*> worklist
;
3227 enum gimple_code code
;
3228 use_operand_p use_p
;
3229 imm_use_iterator imm_iter
;
3231 /* The loop has a single BB only. */
3232 basic_block bb
= loop
->header
;
3233 tree latch_vdef
= NULL_TREE
;
3235 worklist
.create (64);
3236 /* Consider all phi as live statements. */
3237 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
3239 phi
= gsi_stmt (gsi
);
3240 gimple_set_plf (phi
, GF_PLF_2
, true);
3241 worklist
.safe_push (phi
);
3242 if (virtual_operand_p (gimple_phi_result (phi
)))
3243 latch_vdef
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
3245 /* Consider load/store statements, CALL and COND as live. */
3246 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
3248 stmt
= gsi_stmt (gsi
);
3249 if (is_gimple_debug (stmt
))
3251 gimple_set_plf (stmt
, GF_PLF_2
, true);
3254 if (gimple_store_p (stmt
) || gimple_assign_load_p (stmt
))
3256 gimple_set_plf (stmt
, GF_PLF_2
, true);
3257 worklist
.safe_push (stmt
);
3260 code
= gimple_code (stmt
);
3261 if (code
== GIMPLE_COND
|| code
== GIMPLE_CALL
)
3263 gimple_set_plf (stmt
, GF_PLF_2
, true);
3264 worklist
.safe_push (stmt
);
3267 gimple_set_plf (stmt
, GF_PLF_2
, false);
3269 if (code
== GIMPLE_ASSIGN
)
3271 tree lhs
= gimple_assign_lhs (stmt
);
3272 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
3274 stmt1
= USE_STMT (use_p
);
3275 if (!is_gimple_debug (stmt1
) && gimple_bb (stmt1
) != bb
)
3277 gimple_set_plf (stmt
, GF_PLF_2
, true);
3278 worklist
.safe_push (stmt
);
3284 /* Propagate liveness through arguments of live stmt. */
3285 while (worklist
.length () > 0)
3288 use_operand_p use_p
;
3291 stmt
= worklist
.pop ();
3292 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
3294 use
= USE_FROM_PTR (use_p
);
3295 if (TREE_CODE (use
) != SSA_NAME
)
3297 stmt1
= SSA_NAME_DEF_STMT (use
);
3298 if (gimple_bb (stmt1
) != bb
|| gimple_plf (stmt1
, GF_PLF_2
))
3300 gimple_set_plf (stmt1
, GF_PLF_2
, true);
3301 worklist
.safe_push (stmt1
);
3304 /* Delete dead statements. */
3305 gsi
= gsi_last_bb (bb
);
3306 while (!gsi_end_p (gsi
))
3308 gimple_stmt_iterator gsiprev
= gsi
;
3309 gsi_prev (&gsiprev
);
3310 stmt
= gsi_stmt (gsi
);
3311 if (gimple_store_p (stmt
) && gimple_vdef (stmt
))
3313 tree lhs
= gimple_get_lhs (stmt
);
3315 ao_ref_init (&write
, lhs
);
3317 if (dse_classify_store (&write
, stmt
, false, NULL
, NULL
, latch_vdef
)
3319 delete_dead_or_redundant_assignment (&gsi
, "dead");
3324 if (gimple_plf (stmt
, GF_PLF_2
))
3329 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3331 fprintf (dump_file
, "Delete dead stmt in bb#%d\n", bb
->index
);
3332 print_gimple_stmt (dump_file
, stmt
, 0, TDF_SLIM
);
3334 gsi_remove (&gsi
, true);
3335 release_defs (stmt
);
3340 /* Return true if VALUE is already available on edge PE. */
3343 ifcvt_available_on_edge_p (edge pe
, tree value
)
3345 if (is_gimple_min_invariant (value
))
3348 if (TREE_CODE (value
) == SSA_NAME
)
3350 basic_block def_bb
= gimple_bb (SSA_NAME_DEF_STMT (value
));
3351 if (!def_bb
|| dominated_by_p (CDI_DOMINATORS
, pe
->dest
, def_bb
))
3358 /* Return true if STMT can be hoisted from if-converted loop LOOP to
3362 ifcvt_can_hoist (class loop
*loop
, edge pe
, gimple
*stmt
)
3364 if (auto *call
= dyn_cast
<gcall
*> (stmt
))
3366 if (gimple_call_internal_p (call
)
3367 && internal_fn_mask_index (gimple_call_internal_fn (call
)) >= 0)
3370 else if (auto *assign
= dyn_cast
<gassign
*> (stmt
))
3372 if (gimple_assign_rhs_code (assign
) == COND_EXPR
)
3378 if (gimple_has_side_effects (stmt
)
3379 || gimple_could_trap_p (stmt
)
3380 || stmt_could_throw_p (cfun
, stmt
)
3381 || gimple_vdef (stmt
)
3382 || gimple_vuse (stmt
))
3385 int num_args
= gimple_num_args (stmt
);
3386 if (pe
!= loop_preheader_edge (loop
))
3388 for (int i
= 0; i
< num_args
; ++i
)
3389 if (!ifcvt_available_on_edge_p (pe
, gimple_arg (stmt
, i
)))
3394 for (int i
= 0; i
< num_args
; ++i
)
3395 if (!expr_invariant_in_loop_p (loop
, gimple_arg (stmt
, i
)))
3402 /* Hoist invariant statements from LOOP to edge PE. */
3405 ifcvt_hoist_invariants (class loop
*loop
, edge pe
)
3407 gimple_stmt_iterator hoist_gsi
= {};
3408 unsigned int num_blocks
= loop
->num_nodes
;
3409 basic_block
*body
= get_loop_body (loop
);
3410 for (unsigned int i
= 0; i
< num_blocks
; ++i
)
3411 for (gimple_stmt_iterator gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
);)
3413 gimple
*stmt
= gsi_stmt (gsi
);
3414 if (ifcvt_can_hoist (loop
, pe
, stmt
))
3416 /* Once we've hoisted one statement, insert other statements
3418 gsi_remove (&gsi
, false);
3420 gsi_insert_after (&hoist_gsi
, stmt
, GSI_NEW_STMT
);
3423 gsi_insert_on_edge_immediate (pe
, stmt
);
3424 hoist_gsi
= gsi_for_stmt (stmt
);
3433 /* Returns the DECL_FIELD_BIT_OFFSET of the bitfield accesse in stmt iff its
3434 type mode is not BLKmode. If BITPOS is not NULL it will hold the poly_int64
3435 value of the DECL_FIELD_BIT_OFFSET of the bitfield access and STRUCT_EXPR,
3436 if not NULL, will hold the tree representing the base struct of this
3440 get_bitfield_rep (gassign
*stmt
, bool write
, tree
*bitpos
,
3443 tree comp_ref
= write
? gimple_assign_lhs (stmt
)
3444 : gimple_assign_rhs1 (stmt
);
3446 tree field_decl
= TREE_OPERAND (comp_ref
, 1);
3447 tree rep_decl
= DECL_BIT_FIELD_REPRESENTATIVE (field_decl
);
3449 /* Bail out if the representative is not a suitable type for a scalar
3450 register variable. */
3451 if (!is_gimple_reg_type (TREE_TYPE (rep_decl
)))
3454 /* Bail out if the DECL_SIZE of the field_decl isn't the same as the BF's
3456 unsigned HOST_WIDE_INT bf_prec
3457 = TYPE_PRECISION (TREE_TYPE (gimple_assign_lhs (stmt
)));
3458 if (compare_tree_int (DECL_SIZE (field_decl
), bf_prec
) != 0)
3462 *struct_expr
= TREE_OPERAND (comp_ref
, 0);
3466 /* To calculate the bitposition of the BITFIELD_REF we have to determine
3467 where our bitfield starts in relation to the container REP_DECL. The
3468 DECL_FIELD_OFFSET of the original bitfield's member FIELD_DECL tells
3469 us how many bytes from the start of the structure there are until the
3470 start of the group of bitfield members the FIELD_DECL belongs to,
3471 whereas DECL_FIELD_BIT_OFFSET will tell us how many bits from that
3472 position our actual bitfield member starts. For the container
3473 REP_DECL adding DECL_FIELD_OFFSET and DECL_FIELD_BIT_OFFSET will tell
3474 us the distance between the start of the structure and the start of
3475 the container, though the first is in bytes and the later other in
3476 bits. With this in mind we calculate the bit position of our new
3477 BITFIELD_REF by subtracting the number of bits between the start of
3478 the structure and the container from the number of bits from the start
3479 of the structure and the actual bitfield member. */
3480 tree bf_pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
3481 DECL_FIELD_OFFSET (field_decl
),
3482 build_int_cst (bitsizetype
, BITS_PER_UNIT
));
3483 bf_pos
= fold_build2 (PLUS_EXPR
, bitsizetype
, bf_pos
,
3484 DECL_FIELD_BIT_OFFSET (field_decl
));
3485 tree rep_pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
3486 DECL_FIELD_OFFSET (rep_decl
),
3487 build_int_cst (bitsizetype
, BITS_PER_UNIT
));
3488 rep_pos
= fold_build2 (PLUS_EXPR
, bitsizetype
, rep_pos
,
3489 DECL_FIELD_BIT_OFFSET (rep_decl
));
3491 *bitpos
= fold_build2 (MINUS_EXPR
, bitsizetype
, bf_pos
, rep_pos
);
3498 /* Lowers the bitfield described by DATA.
3505 __ifc_1 = struct.<representative>;
3506 __ifc_2 = BIT_INSERT_EXPR (__ifc_1, _1, bitpos);
3507 struct.<representative> = __ifc_2;
3515 __ifc_1 = struct.<representative>;
3516 _1 = BIT_FIELD_REF (__ifc_1, bitsize, bitpos);
3518 where representative is a legal load that contains the bitfield value,
3519 bitsize is the size of the bitfield and bitpos the offset to the start of
3520 the bitfield within the representative. */
3523 lower_bitfield (gassign
*stmt
, bool write
)
3527 tree rep_decl
= get_bitfield_rep (stmt
, write
, &bitpos
, &struct_expr
);
3528 tree rep_type
= TREE_TYPE (rep_decl
);
3529 tree bf_type
= TREE_TYPE (gimple_assign_lhs (stmt
));
3531 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3532 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3534 fprintf (dump_file
, "Lowering:\n");
3535 print_gimple_stmt (dump_file
, stmt
, 0, TDF_SLIM
);
3536 fprintf (dump_file
, "to:\n");
3539 /* REP_COMP_REF is a COMPONENT_REF for the representative. NEW_VAL is it's
3540 defining SSA_NAME. */
3541 tree rep_comp_ref
= build3 (COMPONENT_REF
, rep_type
, struct_expr
, rep_decl
,
3543 tree new_val
= ifc_temp_var (rep_type
, rep_comp_ref
, &gsi
);
3545 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3546 print_gimple_stmt (dump_file
, SSA_NAME_DEF_STMT (new_val
), 0, TDF_SLIM
);
3550 new_val
= ifc_temp_var (rep_type
,
3551 build3 (BIT_INSERT_EXPR
, rep_type
, new_val
,
3552 unshare_expr (gimple_assign_rhs1 (stmt
)),
3555 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3556 print_gimple_stmt (dump_file
, SSA_NAME_DEF_STMT (new_val
), 0, TDF_SLIM
);
3558 gimple
*new_stmt
= gimple_build_assign (unshare_expr (rep_comp_ref
),
3560 gimple_move_vops (new_stmt
, stmt
);
3561 gsi_insert_before (&gsi
, new_stmt
, GSI_SAME_STMT
);
3563 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3564 print_gimple_stmt (dump_file
, new_stmt
, 0, TDF_SLIM
);
3568 tree bfr
= build3 (BIT_FIELD_REF
, bf_type
, new_val
,
3569 build_int_cst (bitsizetype
, TYPE_PRECISION (bf_type
)),
3571 new_val
= ifc_temp_var (bf_type
, bfr
, &gsi
);
3573 gimple
*new_stmt
= gimple_build_assign (gimple_assign_lhs (stmt
),
3575 gimple_move_vops (new_stmt
, stmt
);
3576 gsi_insert_before (&gsi
, new_stmt
, GSI_SAME_STMT
);
3578 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3579 print_gimple_stmt (dump_file
, new_stmt
, 0, TDF_SLIM
);
3582 gsi_remove (&gsi
, true);
3585 /* Return TRUE if there are bitfields to lower in this LOOP. Fill TO_LOWER
3586 with data structures representing these bitfields. */
3589 bitfields_to_lower_p (class loop
*loop
,
3590 vec
<gassign
*> &reads_to_lower
,
3591 vec
<gassign
*> &writes_to_lower
)
3593 gimple_stmt_iterator gsi
;
3595 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3597 fprintf (dump_file
, "Analyzing loop %d for bitfields:\n", loop
->num
);
3600 for (unsigned i
= 0; i
< loop
->num_nodes
; ++i
)
3602 basic_block bb
= ifc_bbs
[i
];
3603 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
3605 gassign
*stmt
= dyn_cast
<gassign
*> (gsi_stmt (gsi
));
3609 tree op
= gimple_assign_lhs (stmt
);
3610 bool write
= TREE_CODE (op
) == COMPONENT_REF
;
3613 op
= gimple_assign_rhs1 (stmt
);
3615 if (TREE_CODE (op
) != COMPONENT_REF
)
3618 if (DECL_BIT_FIELD_TYPE (TREE_OPERAND (op
, 1)))
3620 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3621 print_gimple_stmt (dump_file
, stmt
, 0, TDF_SLIM
);
3623 if (!INTEGRAL_TYPE_P (TREE_TYPE (op
)))
3625 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3626 fprintf (dump_file
, "\t Bitfield NO OK to lower,"
3627 " field type is not Integral.\n");
3631 if (!get_bitfield_rep (stmt
, write
, NULL
, NULL
))
3633 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3634 fprintf (dump_file
, "\t Bitfield NOT OK to lower,"
3635 " representative is BLKmode.\n");
3639 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3640 fprintf (dump_file
, "\tBitfield OK to lower.\n");
3642 writes_to_lower
.safe_push (stmt
);
3644 reads_to_lower
.safe_push (stmt
);
3648 return !reads_to_lower
.is_empty () || !writes_to_lower
.is_empty ();
3652 /* If-convert LOOP when it is legal. For the moment this pass has no
3653 profitability analysis. Returns non-zero todo flags when something
3657 tree_if_conversion (class loop
*loop
, vec
<gimple
*> *preds
)
3659 unsigned int todo
= 0;
3660 bool aggressive_if_conv
;
3662 auto_vec
<gassign
*, 4> reads_to_lower
;
3663 auto_vec
<gassign
*, 4> writes_to_lower
;
3666 auto_vec
<data_reference_p
, 10> refs
;
3671 need_to_lower_bitfields
= false;
3672 need_to_ifcvt
= false;
3673 need_to_predicate
= false;
3674 need_to_rewrite_undefined
= false;
3675 any_complicated_phi
= false;
3677 /* Apply more aggressive if-conversion when loop or its outer loop were
3678 marked with simd pragma. When that's the case, we try to if-convert
3679 loop containing PHIs with more than MAX_PHI_ARG_NUM arguments. */
3680 aggressive_if_conv
= loop
->force_vectorize
;
3681 if (!aggressive_if_conv
)
3683 class loop
*outer_loop
= loop_outer (loop
);
3684 if (outer_loop
&& outer_loop
->force_vectorize
)
3685 aggressive_if_conv
= true;
3688 if (!single_exit (loop
))
3691 /* If there are more than two BBs in the loop then there is at least one if
3693 if (loop
->num_nodes
> 2
3694 && !ifcvt_split_critical_edges (loop
, aggressive_if_conv
))
3697 ifc_bbs
= get_loop_body_in_if_conv_order (loop
);
3700 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3701 fprintf (dump_file
, "Irreducible loop\n");
3705 if (find_data_references_in_loop (loop
, &refs
) == chrec_dont_know
)
3708 if (loop
->num_nodes
> 2)
3710 need_to_ifcvt
= true;
3712 if (!if_convertible_loop_p (loop
, &refs
) || !dbg_cnt (if_conversion_tree
))
3715 if ((need_to_predicate
|| any_complicated_phi
)
3716 && ((!flag_tree_loop_vectorize
&& !loop
->force_vectorize
)
3717 || loop
->dont_vectorize
))
3721 if ((flag_tree_loop_vectorize
|| loop
->force_vectorize
)
3722 && !loop
->dont_vectorize
)
3723 need_to_lower_bitfields
= bitfields_to_lower_p (loop
, reads_to_lower
,
3726 if (!need_to_ifcvt
&& !need_to_lower_bitfields
)
3729 /* The edge to insert invariant stmts on. */
3730 pe
= loop_preheader_edge (loop
);
3732 /* Since we have no cost model, always version loops unless the user
3733 specified -ftree-loop-if-convert or unless versioning is required.
3734 Either version this loop, or if the pattern is right for outer-loop
3735 vectorization, version the outer loop. In the latter case we will
3736 still if-convert the original inner loop. */
3737 if (need_to_lower_bitfields
3738 || need_to_predicate
3739 || any_complicated_phi
3740 || flag_tree_loop_if_convert
!= 1)
3743 = (versionable_outer_loop_p (loop_outer (loop
))
3744 ? loop_outer (loop
) : loop
);
3745 class loop
*nloop
= version_loop_for_if_conversion (vloop
, preds
);
3750 /* If versionable_outer_loop_p decided to version the
3751 outer loop, version also the inner loop of the non-vectorized
3752 loop copy. So we transform:
3756 if (LOOP_VECTORIZED (1, 3))
3762 loop3 (copy of loop1)
3763 if (LOOP_VECTORIZED (4, 5))
3764 loop4 (copy of loop2)
3766 loop5 (copy of loop4) */
3767 gcc_assert (nloop
->inner
&& nloop
->inner
->next
== NULL
);
3768 rloop
= nloop
->inner
;
3771 /* If we versioned loop then make sure to insert invariant
3772 stmts before the .LOOP_VECTORIZED check since the vectorizer
3773 will re-use that for things like runtime alias versioning
3774 whose condition can end up using those invariants. */
3775 pe
= single_pred_edge (gimple_bb (preds
->last ()));
3778 if (need_to_lower_bitfields
)
3780 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3782 fprintf (dump_file
, "-------------------------\n");
3783 fprintf (dump_file
, "Start lowering bitfields\n");
3785 while (!reads_to_lower
.is_empty ())
3786 lower_bitfield (reads_to_lower
.pop (), false);
3787 while (!writes_to_lower
.is_empty ())
3788 lower_bitfield (writes_to_lower
.pop (), true);
3790 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3792 fprintf (dump_file
, "Done lowering bitfields\n");
3793 fprintf (dump_file
, "-------------------------\n");
3798 /* Now all statements are if-convertible. Combine all the basic
3799 blocks into one huge basic block doing the if-conversion
3801 combine_blocks (loop
);
3804 /* Perform local CSE, this esp. helps the vectorizer analysis if loads
3805 and stores are involved. CSE only the loop body, not the entry
3806 PHIs, those are to be kept in sync with the non-if-converted copy.
3807 ??? We'll still keep dead stores though. */
3808 exit_bbs
= BITMAP_ALLOC (NULL
);
3809 bitmap_set_bit (exit_bbs
, single_exit (loop
)->dest
->index
);
3810 bitmap_set_bit (exit_bbs
, loop
->latch
->index
);
3812 std::pair
<tree
, tree
> *name_pair
;
3813 unsigned ssa_names_idx
;
3814 FOR_EACH_VEC_ELT (redundant_ssa_names
, ssa_names_idx
, name_pair
)
3815 replace_uses_by (name_pair
->first
, name_pair
->second
);
3816 redundant_ssa_names
.release ();
3818 todo
|= do_rpo_vn (cfun
, loop_preheader_edge (loop
), exit_bbs
);
3820 /* Delete dead predicate computations. */
3821 ifcvt_local_dce (loop
);
3822 BITMAP_FREE (exit_bbs
);
3824 ifcvt_hoist_invariants (loop
, pe
);
3826 todo
|= TODO_cleanup_cfg
;
3829 data_reference_p dr
;
3831 for (i
= 0; refs
.iterate (i
, &dr
); i
++)
3842 for (i
= 0; i
< loop
->num_nodes
; i
++)
3843 free_bb_predicate (ifc_bbs
[i
]);
3851 reads_to_lower
.truncate (0);
3852 writes_to_lower
.truncate (0);
3859 /* Tree if-conversion pass management. */
3863 const pass_data pass_data_if_conversion
=
3865 GIMPLE_PASS
, /* type */
3867 OPTGROUP_NONE
, /* optinfo_flags */
3868 TV_TREE_LOOP_IFCVT
, /* tv_id */
3869 ( PROP_cfg
| PROP_ssa
), /* properties_required */
3870 0, /* properties_provided */
3871 0, /* properties_destroyed */
3872 0, /* todo_flags_start */
3873 0, /* todo_flags_finish */
3876 class pass_if_conversion
: public gimple_opt_pass
3879 pass_if_conversion (gcc::context
*ctxt
)
3880 : gimple_opt_pass (pass_data_if_conversion
, ctxt
)
3883 /* opt_pass methods: */
3884 bool gate (function
*) final override
;
3885 unsigned int execute (function
*) final override
;
3887 }; // class pass_if_conversion
3890 pass_if_conversion::gate (function
*fun
)
3892 return (((flag_tree_loop_vectorize
|| fun
->has_force_vectorize_loops
)
3893 && flag_tree_loop_if_convert
!= 0)
3894 || flag_tree_loop_if_convert
== 1);
3898 pass_if_conversion::execute (function
*fun
)
3902 if (number_of_loops (fun
) <= 1)
3905 auto_vec
<gimple
*> preds
;
3906 for (auto loop
: loops_list (cfun
, 0))
3907 if (flag_tree_loop_if_convert
== 1
3908 || ((flag_tree_loop_vectorize
|| loop
->force_vectorize
)
3909 && !loop
->dont_vectorize
))
3910 todo
|= tree_if_conversion (loop
, &preds
);
3914 free_numbers_of_iterations_estimates (fun
);
3921 FOR_EACH_BB_FN (bb
, fun
)
3922 gcc_assert (!bb
->aux
);
3925 /* Perform IL update now, it might elide some loops. */
3926 if (todo
& TODO_cleanup_cfg
)
3928 cleanup_tree_cfg ();
3929 if (need_ssa_update_p (fun
))
3930 todo
|= TODO_update_ssa
;
3932 if (todo
& TODO_update_ssa_any
)
3933 update_ssa (todo
& TODO_update_ssa_any
);
3935 /* If if-conversion elided the loop fall back to the original one. */
3936 for (unsigned i
= 0; i
< preds
.length (); ++i
)
3938 gimple
*g
= preds
[i
];
3941 unsigned ifcvt_loop
= tree_to_uhwi (gimple_call_arg (g
, 0));
3942 unsigned orig_loop
= tree_to_uhwi (gimple_call_arg (g
, 1));
3943 if (!get_loop (fun
, ifcvt_loop
) || !get_loop (fun
, orig_loop
))
3946 fprintf (dump_file
, "If-converted loop vanished\n");
3947 fold_loop_internal_call (g
, boolean_false_node
);
3957 make_pass_if_conversion (gcc::context
*ctxt
)
3959 return new pass_if_conversion (ctxt
);