1 /* If-conversion for vectorizer.
2 Copyright (C) 2004-2023 Free Software Foundation, Inc.
3 Contributed by Devang Patel <dpatel@apple.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass implements a tree level if-conversion of loops. Its
22 initial goal is to help the vectorizer to vectorize loops with
25 A short description of if-conversion:
27 o Decide if a loop is if-convertible or not.
28 o Walk all loop basic blocks in breadth first order (BFS order).
29 o Remove conditional statements (at the end of basic block)
30 and propagate condition into destination basic blocks'
32 o Replace modify expression with conditional modify expression
33 using current basic block's condition.
34 o Merge all basic blocks
35 o Replace phi nodes with conditional modify expr
36 o Merge all basic blocks into header
38 Sample transformation:
43 # i_23 = PHI <0(0), i_18(10)>;
46 if (j_15 > 41) goto <L1>; else goto <L17>;
53 # iftmp.2_4 = PHI <0(8), 42(2)>;
57 if (i_18 <= 15) goto <L19>; else goto <L18>;
67 # i_23 = PHI <0(0), i_18(10)>;
72 iftmp.2_4 = j_15 > 41 ? 42 : 0;
75 if (i_18 <= 15) goto <L19>; else goto <L18>;
85 #include "coretypes.h"
91 #include "tree-pass.h"
95 #include "optabs-tree.h"
96 #include "gimple-pretty-print.h"
98 #include "fold-const.h"
99 #include "stor-layout.h"
100 #include "gimple-iterator.h"
101 #include "gimple-fold.h"
102 #include "gimplify.h"
103 #include "gimplify-me.h"
104 #include "tree-cfg.h"
105 #include "tree-into-ssa.h"
106 #include "tree-ssa.h"
108 #include "tree-data-ref.h"
109 #include "tree-scalar-evolution.h"
110 #include "tree-ssa-loop.h"
111 #include "tree-ssa-loop-niter.h"
112 #include "tree-ssa-loop-ivopts.h"
113 #include "tree-ssa-address.h"
115 #include "tree-hash-traits.h"
117 #include "builtins.h"
119 #include "internal-fn.h"
120 #include "fold-const.h"
121 #include "tree-ssa-sccvn.h"
122 #include "tree-cfgcleanup.h"
123 #include "tree-ssa-dse.h"
124 #include "tree-vectorizer.h"
128 /* For lang_hooks.types.type_for_mode. */
129 #include "langhooks.h"
131 /* Only handle PHIs with no more arguments unless we are asked to by
133 #define MAX_PHI_ARG_NUM \
134 ((unsigned) param_max_tree_if_conversion_phi_args)
136 /* True if we've converted a statement that was only executed when some
137 condition C was true, and if for correctness we need to predicate the
138 statement to ensure that it is a no-op when C is false. See
139 predicate_statements for the kinds of predication we support. */
140 static bool need_to_predicate
;
142 /* True if we have to rewrite stmts that may invoke undefined behavior
143 when a condition C was false so it doesn't if it is always executed.
144 See predicate_statements for the kinds of predication we support. */
145 static bool need_to_rewrite_undefined
;
147 /* Indicate if there are any complicated PHIs that need to be handled in
148 if-conversion. Complicated PHI has more than two arguments and can't
149 be degenerated to two arguments PHI. See more information in comment
150 before phi_convertible_by_degenerating_args. */
151 static bool any_complicated_phi
;
153 /* True if we have bitfield accesses we can lower. */
154 static bool need_to_lower_bitfields
;
156 /* True if there is any ifcvting to be done. */
157 static bool need_to_ifcvt
;
159 /* Hash for struct innermost_loop_behavior. It depends on the user to
162 struct innermost_loop_behavior_hash
: nofree_ptr_hash
<innermost_loop_behavior
>
164 static inline hashval_t
hash (const value_type
&);
165 static inline bool equal (const value_type
&,
166 const compare_type
&);
170 innermost_loop_behavior_hash::hash (const value_type
&e
)
174 hash
= iterative_hash_expr (e
->base_address
, 0);
175 hash
= iterative_hash_expr (e
->offset
, hash
);
176 hash
= iterative_hash_expr (e
->init
, hash
);
177 return iterative_hash_expr (e
->step
, hash
);
181 innermost_loop_behavior_hash::equal (const value_type
&e1
,
182 const compare_type
&e2
)
184 if ((e1
->base_address
&& !e2
->base_address
)
185 || (!e1
->base_address
&& e2
->base_address
)
186 || (!e1
->offset
&& e2
->offset
)
187 || (e1
->offset
&& !e2
->offset
)
188 || (!e1
->init
&& e2
->init
)
189 || (e1
->init
&& !e2
->init
)
190 || (!e1
->step
&& e2
->step
)
191 || (e1
->step
&& !e2
->step
))
194 if (e1
->base_address
&& e2
->base_address
195 && !operand_equal_p (e1
->base_address
, e2
->base_address
, 0))
197 if (e1
->offset
&& e2
->offset
198 && !operand_equal_p (e1
->offset
, e2
->offset
, 0))
200 if (e1
->init
&& e2
->init
201 && !operand_equal_p (e1
->init
, e2
->init
, 0))
203 if (e1
->step
&& e2
->step
204 && !operand_equal_p (e1
->step
, e2
->step
, 0))
210 /* List of basic blocks in if-conversion-suitable order. */
211 static basic_block
*ifc_bbs
;
213 /* Hash table to store <DR's innermost loop behavior, DR> pairs. */
214 static hash_map
<innermost_loop_behavior_hash
,
215 data_reference_p
> *innermost_DR_map
;
217 /* Hash table to store <base reference, DR> pairs. */
218 static hash_map
<tree_operand_hash
, data_reference_p
> *baseref_DR_map
;
220 /* List of redundant SSA names: the first should be replaced by the second. */
221 static vec
< std::pair
<tree
, tree
> > redundant_ssa_names
;
223 /* Structure used to predicate basic blocks. This is attached to the
224 ->aux field of the BBs in the loop to be if-converted. */
225 struct bb_predicate
{
227 /* The condition under which this basic block is executed. */
230 /* PREDICATE is gimplified, and the sequence of statements is
231 recorded here, in order to avoid the duplication of computations
232 that occur in previous conditions. See PR44483. */
233 gimple_seq predicate_gimplified_stmts
;
235 /* Records the number of statements recorded into
236 PREDICATE_GIMPLIFIED_STMTS. */
237 unsigned no_predicate_stmts
;
240 /* Returns true when the basic block BB has a predicate. */
243 bb_has_predicate (basic_block bb
)
245 return bb
->aux
!= NULL
;
248 /* Returns the gimplified predicate for basic block BB. */
251 bb_predicate (basic_block bb
)
253 return ((struct bb_predicate
*) bb
->aux
)->predicate
;
256 /* Sets the gimplified predicate COND for basic block BB. */
259 set_bb_predicate (basic_block bb
, tree cond
)
261 auto aux
= (struct bb_predicate
*) bb
->aux
;
262 gcc_assert ((TREE_CODE (cond
) == TRUTH_NOT_EXPR
263 && is_gimple_val (TREE_OPERAND (cond
, 0)))
264 || is_gimple_val (cond
));
265 aux
->predicate
= cond
;
266 aux
->no_predicate_stmts
++;
268 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
269 fprintf (dump_file
, "Recording block %d value %d\n", bb
->index
,
270 aux
->no_predicate_stmts
);
273 /* Returns the sequence of statements of the gimplification of the
274 predicate for basic block BB. */
276 static inline gimple_seq
277 bb_predicate_gimplified_stmts (basic_block bb
)
279 return ((struct bb_predicate
*) bb
->aux
)->predicate_gimplified_stmts
;
282 /* Sets the sequence of statements STMTS of the gimplification of the
283 predicate for basic block BB. If PRESERVE_COUNTS then don't clear the predicate
287 set_bb_predicate_gimplified_stmts (basic_block bb
, gimple_seq stmts
,
288 bool preserve_counts
)
290 ((struct bb_predicate
*) bb
->aux
)->predicate_gimplified_stmts
= stmts
;
291 if (stmts
== NULL
&& !preserve_counts
)
292 ((struct bb_predicate
*) bb
->aux
)->no_predicate_stmts
= 0;
295 /* Adds the sequence of statements STMTS to the sequence of statements
296 of the predicate for basic block BB. */
299 add_bb_predicate_gimplified_stmts (basic_block bb
, gimple_seq stmts
)
301 /* We might have updated some stmts in STMTS via force_gimple_operand
302 calling fold_stmt and that producing multiple stmts. Delink immediate
303 uses so update_ssa after loop versioning doesn't get confused for
304 the not yet inserted predicates.
305 ??? This should go away once we reliably avoid updating stmts
307 for (gimple_stmt_iterator gsi
= gsi_start (stmts
);
308 !gsi_end_p (gsi
); gsi_next (&gsi
))
310 gimple
*stmt
= gsi_stmt (gsi
);
311 delink_stmt_imm_use (stmt
);
312 gimple_set_modified (stmt
, true);
313 ((struct bb_predicate
*) bb
->aux
)->no_predicate_stmts
++;
315 gimple_seq_add_seq_without_update
316 (&(((struct bb_predicate
*) bb
->aux
)->predicate_gimplified_stmts
), stmts
);
319 /* Return the number of statements the predicate of the basic block consists
322 static inline unsigned
323 get_bb_num_predicate_stmts (basic_block bb
)
325 return ((struct bb_predicate
*) bb
->aux
)->no_predicate_stmts
;
328 /* Initializes to TRUE the predicate of basic block BB. */
331 init_bb_predicate (basic_block bb
)
333 bb
->aux
= XNEW (struct bb_predicate
);
334 set_bb_predicate_gimplified_stmts (bb
, NULL
, false);
335 set_bb_predicate (bb
, boolean_true_node
);
338 /* Release the SSA_NAMEs associated with the predicate of basic block BB. */
341 release_bb_predicate (basic_block bb
)
343 gimple_seq stmts
= bb_predicate_gimplified_stmts (bb
);
346 /* Ensure that these stmts haven't yet been added to a bb. */
348 for (gimple_stmt_iterator i
= gsi_start (stmts
);
349 !gsi_end_p (i
); gsi_next (&i
))
350 gcc_assert (! gimple_bb (gsi_stmt (i
)));
353 gimple_seq_discard (stmts
);
354 set_bb_predicate_gimplified_stmts (bb
, NULL
, false);
358 /* Free the predicate of basic block BB. */
361 free_bb_predicate (basic_block bb
)
363 if (!bb_has_predicate (bb
))
366 release_bb_predicate (bb
);
371 /* Reinitialize predicate of BB with the true predicate. */
374 reset_bb_predicate (basic_block bb
)
376 if (!bb_has_predicate (bb
))
377 init_bb_predicate (bb
);
380 release_bb_predicate (bb
);
381 set_bb_predicate (bb
, boolean_true_node
);
385 /* Returns a new SSA_NAME of type TYPE that is assigned the value of
386 the expression EXPR. Inserts the statement created for this
387 computation before GSI and leaves the iterator GSI at the same
391 ifc_temp_var (tree type
, tree expr
, gimple_stmt_iterator
*gsi
)
393 tree new_name
= make_temp_ssa_name (type
, NULL
, "_ifc_");
394 gimple
*stmt
= gimple_build_assign (new_name
, expr
);
395 gimple_set_vuse (stmt
, gimple_vuse (gsi_stmt (*gsi
)));
396 gsi_insert_before (gsi
, stmt
, GSI_SAME_STMT
);
400 /* Return true when COND is a false predicate. */
403 is_false_predicate (tree cond
)
405 return (cond
!= NULL_TREE
406 && (cond
== boolean_false_node
407 || integer_zerop (cond
)));
410 /* Return true when COND is a true predicate. */
413 is_true_predicate (tree cond
)
415 return (cond
== NULL_TREE
416 || cond
== boolean_true_node
417 || integer_onep (cond
));
420 /* Returns true when BB has a predicate that is not trivial: true or
424 is_predicated (basic_block bb
)
426 return !is_true_predicate (bb_predicate (bb
));
429 /* Parses the predicate COND and returns its comparison code and
430 operands OP0 and OP1. */
432 static enum tree_code
433 parse_predicate (tree cond
, tree
*op0
, tree
*op1
)
437 if (TREE_CODE (cond
) == SSA_NAME
438 && is_gimple_assign (s
= SSA_NAME_DEF_STMT (cond
)))
440 if (TREE_CODE_CLASS (gimple_assign_rhs_code (s
)) == tcc_comparison
)
442 *op0
= gimple_assign_rhs1 (s
);
443 *op1
= gimple_assign_rhs2 (s
);
444 return gimple_assign_rhs_code (s
);
447 else if (gimple_assign_rhs_code (s
) == TRUTH_NOT_EXPR
)
449 tree op
= gimple_assign_rhs1 (s
);
450 tree type
= TREE_TYPE (op
);
451 enum tree_code code
= parse_predicate (op
, op0
, op1
);
453 return code
== ERROR_MARK
? ERROR_MARK
454 : invert_tree_comparison (code
, HONOR_NANS (type
));
460 if (COMPARISON_CLASS_P (cond
))
462 *op0
= TREE_OPERAND (cond
, 0);
463 *op1
= TREE_OPERAND (cond
, 1);
464 return TREE_CODE (cond
);
470 /* Returns the fold of predicate C1 OR C2 at location LOC. */
473 fold_or_predicates (location_t loc
, tree c1
, tree c2
)
475 tree op1a
, op1b
, op2a
, op2b
;
476 enum tree_code code1
= parse_predicate (c1
, &op1a
, &op1b
);
477 enum tree_code code2
= parse_predicate (c2
, &op2a
, &op2b
);
479 if (code1
!= ERROR_MARK
&& code2
!= ERROR_MARK
)
481 tree t
= maybe_fold_or_comparisons (boolean_type_node
, code1
, op1a
, op1b
,
487 return fold_build2_loc (loc
, TRUTH_OR_EXPR
, boolean_type_node
, c1
, c2
);
490 /* Returns either a COND_EXPR or the folded expression if the folded
491 expression is a MIN_EXPR, a MAX_EXPR, an ABS_EXPR,
492 a constant or a SSA_NAME. */
495 fold_build_cond_expr (tree type
, tree cond
, tree rhs
, tree lhs
)
497 /* If COND is comparison r != 0 and r has boolean type, convert COND
498 to SSA_NAME to accept by vect bool pattern. */
499 if (TREE_CODE (cond
) == NE_EXPR
)
501 tree op0
= TREE_OPERAND (cond
, 0);
502 tree op1
= TREE_OPERAND (cond
, 1);
503 if (TREE_CODE (op0
) == SSA_NAME
504 && TREE_CODE (TREE_TYPE (op0
)) == BOOLEAN_TYPE
505 && (integer_zerop (op1
)))
509 gimple_match_op
cexpr (gimple_match_cond::UNCOND
, COND_EXPR
,
510 type
, cond
, rhs
, lhs
);
511 if (cexpr
.resimplify (NULL
, follow_all_ssa_edges
))
513 if (gimple_simplified_result_is_gimple_val (&cexpr
))
515 else if (cexpr
.code
== ABS_EXPR
)
516 return build1 (ABS_EXPR
, type
, cexpr
.ops
[0]);
517 else if (cexpr
.code
== MIN_EXPR
518 || cexpr
.code
== MAX_EXPR
)
519 return build2 ((tree_code
)cexpr
.code
, type
, cexpr
.ops
[0], cexpr
.ops
[1]);
522 return build3 (COND_EXPR
, type
, cond
, rhs
, lhs
);
525 /* Add condition NC to the predicate list of basic block BB. LOOP is
526 the loop to be if-converted. Use predicate of cd-equivalent block
527 for join bb if it exists: we call basic blocks bb1 and bb2
528 cd-equivalent if they are executed under the same condition. */
531 add_to_predicate_list (class loop
*loop
, basic_block bb
, tree nc
)
536 if (is_true_predicate (nc
))
539 /* If dominance tells us this basic block is always executed,
540 don't record any predicates for it. */
541 if (dominated_by_p (CDI_DOMINATORS
, loop
->latch
, bb
))
544 dom_bb
= get_immediate_dominator (CDI_DOMINATORS
, bb
);
545 /* We use notion of cd equivalence to get simpler predicate for
546 join block, e.g. if join block has 2 predecessors with predicates
547 p1 & p2 and p1 & !p2, we'd like to get p1 for it instead of
548 p1 & p2 | p1 & !p2. */
549 if (dom_bb
!= loop
->header
550 && get_immediate_dominator (CDI_POST_DOMINATORS
, dom_bb
) == bb
)
552 gcc_assert (flow_bb_inside_loop_p (loop
, dom_bb
));
553 bc
= bb_predicate (dom_bb
);
554 if (!is_true_predicate (bc
))
555 set_bb_predicate (bb
, bc
);
557 gcc_assert (is_true_predicate (bb_predicate (bb
)));
558 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
559 fprintf (dump_file
, "Use predicate of bb#%d for bb#%d\n",
560 dom_bb
->index
, bb
->index
);
564 if (!is_predicated (bb
))
568 bc
= bb_predicate (bb
);
569 bc
= fold_or_predicates (EXPR_LOCATION (bc
), nc
, bc
);
570 if (is_true_predicate (bc
))
572 reset_bb_predicate (bb
);
577 /* Allow a TRUTH_NOT_EXPR around the main predicate. */
578 if (TREE_CODE (bc
) == TRUTH_NOT_EXPR
)
579 tp
= &TREE_OPERAND (bc
, 0);
582 if (!is_gimple_val (*tp
))
585 *tp
= force_gimple_operand (*tp
, &stmts
, true, NULL_TREE
);
586 add_bb_predicate_gimplified_stmts (bb
, stmts
);
588 set_bb_predicate (bb
, bc
);
591 /* Add the condition COND to the previous condition PREV_COND, and add
592 this to the predicate list of the destination of edge E. LOOP is
593 the loop to be if-converted. */
596 add_to_dst_predicate_list (class loop
*loop
, edge e
,
597 tree prev_cond
, tree cond
)
599 if (!flow_bb_inside_loop_p (loop
, e
->dest
))
602 if (!is_true_predicate (prev_cond
))
603 cond
= fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
,
606 if (!dominated_by_p (CDI_DOMINATORS
, loop
->latch
, e
->dest
))
607 add_to_predicate_list (loop
, e
->dest
, cond
);
610 /* Return true if one of the successor edges of BB exits LOOP. */
613 bb_with_exit_edge_p (const class loop
*loop
, basic_block bb
)
618 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
619 if (loop_exit_edge_p (loop
, e
))
625 /* Given PHI which has more than two arguments, this function checks if
626 it's if-convertible by degenerating its arguments. Specifically, if
627 below two conditions are satisfied:
629 1) Number of PHI arguments with different values equals to 2 and one
630 argument has the only occurrence.
631 2) The edge corresponding to the unique argument isn't critical edge.
633 Such PHI can be handled as PHIs have only two arguments. For example,
636 res = PHI <A_1(e1), A_1(e2), A_2(e3)>;
638 can be transformed into:
640 res = (predicate of e3) ? A_2 : A_1;
642 Return TRUE if it is the case, FALSE otherwise. */
645 phi_convertible_by_degenerating_args (gphi
*phi
)
648 tree arg
, t1
= NULL
, t2
= NULL
;
649 unsigned int i
, i1
= 0, i2
= 0, n1
= 0, n2
= 0;
650 unsigned int num_args
= gimple_phi_num_args (phi
);
652 gcc_assert (num_args
> 2);
654 for (i
= 0; i
< num_args
; i
++)
656 arg
= gimple_phi_arg_def (phi
, i
);
657 if (t1
== NULL
|| operand_equal_p (t1
, arg
, 0))
663 else if (t2
== NULL
|| operand_equal_p (t2
, arg
, 0))
673 if (n1
!= 1 && n2
!= 1)
676 /* Check if the edge corresponding to the unique arg is critical. */
677 e
= gimple_phi_arg_edge (phi
, (n1
== 1) ? i1
: i2
);
678 if (EDGE_COUNT (e
->src
->succs
) > 1)
684 /* Return true when PHI is if-convertible. PHI is part of loop LOOP
685 and it belongs to basic block BB. Note at this point, it is sure
686 that PHI is if-convertible. This function updates global variable
687 ANY_COMPLICATED_PHI if PHI is complicated. */
690 if_convertible_phi_p (class loop
*loop
, basic_block bb
, gphi
*phi
)
692 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
694 fprintf (dump_file
, "-------------------------\n");
695 print_gimple_stmt (dump_file
, phi
, 0, TDF_SLIM
);
698 if (bb
!= loop
->header
699 && gimple_phi_num_args (phi
) > 2
700 && !phi_convertible_by_degenerating_args (phi
))
701 any_complicated_phi
= true;
706 /* Records the status of a data reference. This struct is attached to
707 each DR->aux field. */
710 bool rw_unconditionally
;
711 bool w_unconditionally
;
712 bool written_at_least_once
;
716 tree base_w_predicate
;
719 #define IFC_DR(DR) ((struct ifc_dr *) (DR)->aux)
720 #define DR_BASE_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->written_at_least_once)
721 #define DR_RW_UNCONDITIONALLY(DR) (IFC_DR (DR)->rw_unconditionally)
722 #define DR_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->w_unconditionally)
724 /* Iterates over DR's and stores refs, DR and base refs, DR pairs in
725 HASH tables. While storing them in HASH table, it checks if the
726 reference is unconditionally read or written and stores that as a flag
727 information. For base reference it checks if it is written atlest once
728 unconditionally and stores it as flag information along with DR.
729 In other words for every data reference A in STMT there exist other
730 accesses to a data reference with the same base with predicates that
731 add up (OR-up) to the true predicate: this ensures that the data
732 reference A is touched (read or written) on every iteration of the
733 if-converted loop. */
735 hash_memrefs_baserefs_and_store_DRs_read_written_info (data_reference_p a
)
738 data_reference_p
*master_dr
, *base_master_dr
;
739 tree base_ref
= DR_BASE_OBJECT (a
);
740 innermost_loop_behavior
*innermost
= &DR_INNERMOST (a
);
741 tree ca
= bb_predicate (gimple_bb (DR_STMT (a
)));
744 master_dr
= &innermost_DR_map
->get_or_insert (innermost
, &exist1
);
750 IFC_DR (*master_dr
)->w_predicate
751 = fold_or_predicates (UNKNOWN_LOCATION
, ca
,
752 IFC_DR (*master_dr
)->w_predicate
);
753 if (is_true_predicate (IFC_DR (*master_dr
)->w_predicate
))
754 DR_W_UNCONDITIONALLY (*master_dr
) = true;
756 IFC_DR (*master_dr
)->rw_predicate
757 = fold_or_predicates (UNKNOWN_LOCATION
, ca
,
758 IFC_DR (*master_dr
)->rw_predicate
);
759 if (is_true_predicate (IFC_DR (*master_dr
)->rw_predicate
))
760 DR_RW_UNCONDITIONALLY (*master_dr
) = true;
764 base_master_dr
= &baseref_DR_map
->get_or_insert (base_ref
, &exist2
);
767 IFC_DR (*base_master_dr
)->base_w_predicate
768 = fold_or_predicates (UNKNOWN_LOCATION
, ca
,
769 IFC_DR (*base_master_dr
)->base_w_predicate
);
770 if (is_true_predicate (IFC_DR (*base_master_dr
)->base_w_predicate
))
771 DR_BASE_W_UNCONDITIONALLY (*base_master_dr
) = true;
775 /* Return TRUE if can prove the index IDX of an array reference REF is
776 within array bound. Return false otherwise. */
779 idx_within_array_bound (tree ref
, tree
*idx
, void *dta
)
781 wi::overflow_type overflow
;
782 widest_int niter
, valid_niter
, delta
, wi_step
;
785 class loop
*loop
= (class loop
*) dta
;
787 /* Only support within-bound access for array references. */
788 if (TREE_CODE (ref
) != ARRAY_REF
)
791 /* For arrays that might have flexible sizes, it is not guaranteed that they
792 do not extend over their declared size. */
793 if (array_ref_flexible_size_p (ref
))
796 ev
= analyze_scalar_evolution (loop
, *idx
);
797 ev
= instantiate_parameters (loop
, ev
);
798 init
= initial_condition (ev
);
799 step
= evolution_part_in_loop_num (ev
, loop
->num
);
801 if (!init
|| TREE_CODE (init
) != INTEGER_CST
802 || (step
&& TREE_CODE (step
) != INTEGER_CST
))
805 low
= array_ref_low_bound (ref
);
806 high
= array_ref_up_bound (ref
);
808 /* The case of nonconstant bounds could be handled, but it would be
810 if (TREE_CODE (low
) != INTEGER_CST
811 || !high
|| TREE_CODE (high
) != INTEGER_CST
)
814 /* Check if the intial idx is within bound. */
815 if (wi::to_widest (init
) < wi::to_widest (low
)
816 || wi::to_widest (init
) > wi::to_widest (high
))
819 /* The idx is always within bound. */
820 if (!step
|| integer_zerop (step
))
823 if (!max_loop_iterations (loop
, &niter
))
826 if (wi::to_widest (step
) < 0)
828 delta
= wi::to_widest (init
) - wi::to_widest (low
);
829 wi_step
= -wi::to_widest (step
);
833 delta
= wi::to_widest (high
) - wi::to_widest (init
);
834 wi_step
= wi::to_widest (step
);
837 valid_niter
= wi::div_floor (delta
, wi_step
, SIGNED
, &overflow
);
838 /* The iteration space of idx is within array bound. */
839 if (!overflow
&& niter
<= valid_niter
)
845 /* Return TRUE if ref is a within bound array reference. */
848 ref_within_array_bound (gimple
*stmt
, tree ref
)
850 class loop
*loop
= loop_containing_stmt (stmt
);
852 gcc_assert (loop
!= NULL
);
853 return for_each_index (&ref
, idx_within_array_bound
, loop
);
857 /* Given a memory reference expression T, return TRUE if base object
858 it refers to is writable. The base object of a memory reference
859 is the main object being referenced, which is returned by function
863 base_object_writable (tree ref
)
865 tree base_tree
= get_base_address (ref
);
868 && DECL_P (base_tree
)
869 && decl_binds_to_current_def_p (base_tree
)
870 && !TREE_READONLY (base_tree
));
873 /* Return true when the memory references of STMT won't trap in the
874 if-converted code. There are two things that we have to check for:
876 - writes to memory occur to writable memory: if-conversion of
877 memory writes transforms the conditional memory writes into
878 unconditional writes, i.e. "if (cond) A[i] = foo" is transformed
879 into "A[i] = cond ? foo : A[i]", and as the write to memory may not
880 be executed at all in the original code, it may be a readonly
881 memory. To check that A is not const-qualified, we check that
882 there exists at least an unconditional write to A in the current
885 - reads or writes to memory are valid memory accesses for every
886 iteration. To check that the memory accesses are correctly formed
887 and that we are allowed to read and write in these locations, we
888 check that the memory accesses to be if-converted occur at every
889 iteration unconditionally.
891 Returns true for the memory reference in STMT, same memory reference
892 is read or written unconditionally atleast once and the base memory
893 reference is written unconditionally once. This is to check reference
894 will not write fault. Also retuns true if the memory reference is
895 unconditionally read once then we are conditionally writing to memory
896 which is defined as read and write and is bound to the definition
899 ifcvt_memrefs_wont_trap (gimple
*stmt
, vec
<data_reference_p
> drs
)
901 /* If DR didn't see a reference here we can't use it to tell
902 whether the ref traps or not. */
903 if (gimple_uid (stmt
) == 0)
906 data_reference_p
*master_dr
, *base_master_dr
;
907 data_reference_p a
= drs
[gimple_uid (stmt
) - 1];
909 tree base
= DR_BASE_OBJECT (a
);
910 innermost_loop_behavior
*innermost
= &DR_INNERMOST (a
);
912 gcc_assert (DR_STMT (a
) == stmt
);
913 gcc_assert (DR_BASE_ADDRESS (a
) || DR_OFFSET (a
)
914 || DR_INIT (a
) || DR_STEP (a
));
916 master_dr
= innermost_DR_map
->get (innermost
);
917 gcc_assert (master_dr
!= NULL
);
919 base_master_dr
= baseref_DR_map
->get (base
);
921 /* If a is unconditionally written to it doesn't trap. */
922 if (DR_W_UNCONDITIONALLY (*master_dr
))
925 /* If a is unconditionally accessed then ...
927 Even a is conditional access, we can treat it as an unconditional
928 one if it's an array reference and all its index are within array
930 if (DR_RW_UNCONDITIONALLY (*master_dr
)
931 || ref_within_array_bound (stmt
, DR_REF (a
)))
933 /* an unconditional read won't trap. */
937 /* an unconditionaly write won't trap if the base is written
938 to unconditionally. */
940 && DR_BASE_W_UNCONDITIONALLY (*base_master_dr
))
941 return flag_store_data_races
;
942 /* or the base is known to be not readonly. */
943 else if (base_object_writable (DR_REF (a
)))
944 return flag_store_data_races
;
950 /* Return true if STMT could be converted into a masked load or store
951 (conditional load or store based on a mask computed from bb predicate). */
954 ifcvt_can_use_mask_load_store (gimple
*stmt
)
956 /* Check whether this is a load or store. */
957 tree lhs
= gimple_assign_lhs (stmt
);
960 if (gimple_store_p (stmt
))
962 if (!is_gimple_val (gimple_assign_rhs1 (stmt
)))
967 else if (gimple_assign_load_p (stmt
))
970 ref
= gimple_assign_rhs1 (stmt
);
975 if (may_be_nonaddressable_p (ref
))
978 /* Mask should be integer mode of the same size as the load/store
980 machine_mode mode
= TYPE_MODE (TREE_TYPE (lhs
));
981 if (!int_mode_for_mode (mode
).exists () || VECTOR_MODE_P (mode
))
984 if (can_vec_mask_load_store_p (mode
, VOIDmode
, is_load
))
990 /* Return true if STMT could be converted from an operation that is
991 unconditional to one that is conditional on a bb predicate mask. */
994 ifcvt_can_predicate (gimple
*stmt
)
996 basic_block bb
= gimple_bb (stmt
);
998 if (!(flag_tree_loop_vectorize
|| bb
->loop_father
->force_vectorize
)
999 || bb
->loop_father
->dont_vectorize
1000 || gimple_has_volatile_ops (stmt
))
1003 if (gimple_assign_single_p (stmt
))
1004 return ifcvt_can_use_mask_load_store (stmt
);
1006 tree_code code
= gimple_assign_rhs_code (stmt
);
1007 tree lhs_type
= TREE_TYPE (gimple_assign_lhs (stmt
));
1008 tree rhs_type
= TREE_TYPE (gimple_assign_rhs1 (stmt
));
1009 if (!types_compatible_p (lhs_type
, rhs_type
))
1011 internal_fn cond_fn
= get_conditional_internal_fn (code
);
1012 return (cond_fn
!= IFN_LAST
1013 && vectorized_internal_fn_supported_p (cond_fn
, lhs_type
));
1016 /* Return true when STMT is if-convertible.
1018 GIMPLE_ASSIGN statement is not if-convertible if,
1019 - it is not movable,
1021 - LHS is not var decl. */
1024 if_convertible_gimple_assign_stmt_p (gimple
*stmt
,
1025 vec
<data_reference_p
> refs
)
1027 tree lhs
= gimple_assign_lhs (stmt
);
1029 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1031 fprintf (dump_file
, "-------------------------\n");
1032 print_gimple_stmt (dump_file
, stmt
, 0, TDF_SLIM
);
1035 if (!is_gimple_reg_type (TREE_TYPE (lhs
)))
1038 /* Some of these constrains might be too conservative. */
1039 if (stmt_ends_bb_p (stmt
)
1040 || gimple_has_volatile_ops (stmt
)
1041 || (TREE_CODE (lhs
) == SSA_NAME
1042 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs
))
1043 || gimple_has_side_effects (stmt
))
1045 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1046 fprintf (dump_file
, "stmt not suitable for ifcvt\n");
1050 /* tree-into-ssa.cc uses GF_PLF_1, so avoid it, because
1051 in between if_convertible_loop_p and combine_blocks
1052 we can perform loop versioning. */
1053 gimple_set_plf (stmt
, GF_PLF_2
, false);
1055 if ((! gimple_vuse (stmt
)
1056 || gimple_could_trap_p_1 (stmt
, false, false)
1057 || ! ifcvt_memrefs_wont_trap (stmt
, refs
))
1058 && gimple_could_trap_p (stmt
))
1060 if (ifcvt_can_predicate (stmt
))
1062 gimple_set_plf (stmt
, GF_PLF_2
, true);
1063 need_to_predicate
= true;
1066 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1067 fprintf (dump_file
, "tree could trap...\n");
1070 else if ((INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
1071 || POINTER_TYPE_P (TREE_TYPE (lhs
)))
1072 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs
))
1073 && arith_code_with_undefined_signed_overflow
1074 (gimple_assign_rhs_code (stmt
)))
1075 /* We have to rewrite stmts with undefined overflow. */
1076 need_to_rewrite_undefined
= true;
1078 /* When if-converting stores force versioning, likewise if we
1079 ended up generating store data races. */
1080 if (gimple_vdef (stmt
))
1081 need_to_predicate
= true;
1086 /* Return true when STMT is if-convertible.
1088 A statement is if-convertible if:
1089 - it is an if-convertible GIMPLE_ASSIGN,
1090 - it is a GIMPLE_LABEL or a GIMPLE_COND,
1091 - it is builtins call,
1092 - it is a call to a function with a SIMD clone. */
1095 if_convertible_stmt_p (gimple
*stmt
, vec
<data_reference_p
> refs
)
1097 switch (gimple_code (stmt
))
1105 return if_convertible_gimple_assign_stmt_p (stmt
, refs
);
1109 /* There are some IFN_s that are used to replace builtins but have the
1110 same semantics. Even if MASK_CALL cannot handle them vectorable_call
1111 will insert the proper selection, so do not block conversion. */
1112 int flags
= gimple_call_flags (stmt
);
1113 if ((flags
& ECF_CONST
)
1114 && !(flags
& ECF_LOOPING_CONST_OR_PURE
)
1115 && gimple_call_combined_fn (stmt
) != CFN_LAST
)
1118 tree fndecl
= gimple_call_fndecl (stmt
);
1121 /* We can vectorize some builtins and functions with SIMD
1122 "inbranch" clones. */
1123 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
1124 if (node
&& node
->simd_clones
!= NULL
)
1125 /* Ensure that at least one clone can be "inbranch". */
1126 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
1127 n
= n
->simdclone
->next_clone
)
1128 if (n
->simdclone
->inbranch
)
1130 gimple_set_plf (stmt
, GF_PLF_2
, true);
1131 need_to_predicate
= true;
1140 /* Don't know what to do with 'em so don't do anything. */
1141 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1143 fprintf (dump_file
, "don't know what to do\n");
1144 print_gimple_stmt (dump_file
, stmt
, 0, TDF_SLIM
);
1150 /* Assumes that BB has more than 1 predecessors.
1151 Returns false if at least one successor is not on critical edge
1152 and true otherwise. */
1155 all_preds_critical_p (basic_block bb
)
1160 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1161 if (EDGE_COUNT (e
->src
->succs
) == 1)
1166 /* Return true when BB is if-convertible. This routine does not check
1167 basic block's statements and phis.
1169 A basic block is not if-convertible if:
1170 - it is non-empty and it is after the exit block (in BFS order),
1171 - it is after the exit block but before the latch,
1172 - its edges are not normal.
1174 EXIT_BB is the basic block containing the exit of the LOOP. BB is
1178 if_convertible_bb_p (class loop
*loop
, basic_block bb
, basic_block exit_bb
)
1183 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1184 fprintf (dump_file
, "----------[%d]-------------\n", bb
->index
);
1186 if (EDGE_COUNT (bb
->succs
) > 2)
1189 if (gcall
*call
= safe_dyn_cast
<gcall
*> (*gsi_last_bb (bb
)))
1190 if (gimple_call_ctrl_altering_p (call
))
1195 if (bb
!= loop
->latch
)
1197 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1198 fprintf (dump_file
, "basic block after exit bb but before latch\n");
1201 else if (!empty_block_p (bb
))
1203 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1204 fprintf (dump_file
, "non empty basic block after exit bb\n");
1207 else if (bb
== loop
->latch
1209 && !dominated_by_p (CDI_DOMINATORS
, bb
, exit_bb
))
1211 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1212 fprintf (dump_file
, "latch is not dominated by exit_block\n");
1217 /* Be less adventurous and handle only normal edges. */
1218 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1219 if (e
->flags
& (EDGE_EH
| EDGE_ABNORMAL
| EDGE_IRREDUCIBLE_LOOP
))
1221 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1222 fprintf (dump_file
, "Difficult to handle edges\n");
1229 /* Return true when all predecessor blocks of BB are visited. The
1230 VISITED bitmap keeps track of the visited blocks. */
1233 pred_blocks_visited_p (basic_block bb
, bitmap
*visited
)
1237 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1238 if (!bitmap_bit_p (*visited
, e
->src
->index
))
1244 /* Get body of a LOOP in suitable order for if-conversion. It is
1245 caller's responsibility to deallocate basic block list.
1246 If-conversion suitable order is, breadth first sort (BFS) order
1247 with an additional constraint: select a block only if all its
1248 predecessors are already selected. */
1250 static basic_block
*
1251 get_loop_body_in_if_conv_order (const class loop
*loop
)
1253 basic_block
*blocks
, *blocks_in_bfs_order
;
1256 unsigned int index
= 0;
1257 unsigned int visited_count
= 0;
1259 gcc_assert (loop
->num_nodes
);
1260 gcc_assert (loop
->latch
!= EXIT_BLOCK_PTR_FOR_FN (cfun
));
1262 blocks
= XCNEWVEC (basic_block
, loop
->num_nodes
);
1263 visited
= BITMAP_ALLOC (NULL
);
1265 blocks_in_bfs_order
= get_loop_body_in_bfs_order (loop
);
1268 while (index
< loop
->num_nodes
)
1270 bb
= blocks_in_bfs_order
[index
];
1272 if (bb
->flags
& BB_IRREDUCIBLE_LOOP
)
1274 free (blocks_in_bfs_order
);
1275 BITMAP_FREE (visited
);
1280 if (!bitmap_bit_p (visited
, bb
->index
))
1282 if (pred_blocks_visited_p (bb
, &visited
)
1283 || bb
== loop
->header
)
1285 /* This block is now visited. */
1286 bitmap_set_bit (visited
, bb
->index
);
1287 blocks
[visited_count
++] = bb
;
1293 if (index
== loop
->num_nodes
1294 && visited_count
!= loop
->num_nodes
)
1298 free (blocks_in_bfs_order
);
1299 BITMAP_FREE (visited
);
1301 /* Go through loop and reject if-conversion or lowering of bitfields if we
1302 encounter statements we do not believe the vectorizer will be able to
1303 handle. If adding a new type of statement here, make sure
1304 'ifcvt_local_dce' is also able to handle it propertly. */
1305 for (index
= 0; index
< loop
->num_nodes
; index
++)
1307 basic_block bb
= blocks
[index
];
1308 gimple_stmt_iterator gsi
;
1310 bool may_have_nonlocal_labels
1311 = bb_with_exit_edge_p (loop
, bb
) || bb
== loop
->latch
;
1312 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1313 switch (gimple_code (gsi_stmt (gsi
)))
1316 if (!may_have_nonlocal_labels
)
1319 = gimple_label_label (as_a
<glabel
*> (gsi_stmt (gsi
)));
1320 if (DECL_NONLOCAL (label
) || FORCED_LABEL (label
))
1331 gimple_set_uid (gsi_stmt (gsi
), 0);
1341 /* Returns true when the analysis of the predicates for all the basic
1342 blocks in LOOP succeeded.
1344 predicate_bbs first allocates the predicates of the basic blocks.
1345 These fields are then initialized with the tree expressions
1346 representing the predicates under which a basic block is executed
1347 in the LOOP. As the loop->header is executed at each iteration, it
1348 has the "true" predicate. Other statements executed under a
1349 condition are predicated with that condition, for example
1356 S1 will be predicated with "x", and
1357 S2 will be predicated with "!x". */
1360 predicate_bbs (loop_p loop
)
1364 for (i
= 0; i
< loop
->num_nodes
; i
++)
1365 init_bb_predicate (ifc_bbs
[i
]);
1367 for (i
= 0; i
< loop
->num_nodes
; i
++)
1369 basic_block bb
= ifc_bbs
[i
];
1372 /* The loop latch and loop exit block are always executed and
1373 have no extra conditions to be processed: skip them. */
1374 if (bb
== loop
->latch
1375 || bb_with_exit_edge_p (loop
, bb
))
1377 reset_bb_predicate (bb
);
1381 cond
= bb_predicate (bb
);
1382 if (gcond
*stmt
= safe_dyn_cast
<gcond
*> (*gsi_last_bb (bb
)))
1385 edge true_edge
, false_edge
;
1386 location_t loc
= gimple_location (stmt
);
1388 /* gcc.dg/fold-bopcond-1.c shows that despite all forwprop passes
1389 conditions can remain unfolded because of multiple uses so
1390 try to re-fold here, especially to get precision changing
1391 conversions sorted out. Do not simply fold the stmt since
1392 this is analysis only. When conditions were embedded in
1393 COND_EXPRs those were folded separately before folding the
1394 COND_EXPR but as they are now outside we have to make sure
1395 to fold them. Do it here - another opportunity would be to
1396 fold predicates as they are inserted. */
1397 gimple_match_op
cexpr (gimple_match_cond::UNCOND
,
1398 gimple_cond_code (stmt
),
1400 gimple_cond_lhs (stmt
),
1401 gimple_cond_rhs (stmt
));
1402 if (cexpr
.resimplify (NULL
, follow_all_ssa_edges
)
1403 && cexpr
.code
.is_tree_code ()
1404 && TREE_CODE_CLASS ((tree_code
)cexpr
.code
) == tcc_comparison
)
1405 c
= build2_loc (loc
, (tree_code
)cexpr
.code
, boolean_type_node
,
1406 cexpr
.ops
[0], cexpr
.ops
[1]);
1408 c
= build2_loc (loc
, gimple_cond_code (stmt
),
1410 gimple_cond_lhs (stmt
),
1411 gimple_cond_rhs (stmt
));
1413 /* Add new condition into destination's predicate list. */
1414 extract_true_false_edges_from_block (gimple_bb (stmt
),
1415 &true_edge
, &false_edge
);
1417 /* If C is true, then TRUE_EDGE is taken. */
1418 add_to_dst_predicate_list (loop
, true_edge
, unshare_expr (cond
),
1421 /* If C is false, then FALSE_EDGE is taken. */
1422 c2
= build1_loc (loc
, TRUTH_NOT_EXPR
, boolean_type_node
,
1424 add_to_dst_predicate_list (loop
, false_edge
,
1425 unshare_expr (cond
), c2
);
1430 /* If current bb has only one successor, then consider it as an
1431 unconditional goto. */
1432 if (single_succ_p (bb
))
1434 basic_block bb_n
= single_succ (bb
);
1436 /* The successor bb inherits the predicate of its
1437 predecessor. If there is no predicate in the predecessor
1438 bb, then consider the successor bb as always executed. */
1439 if (cond
== NULL_TREE
)
1440 cond
= boolean_true_node
;
1442 add_to_predicate_list (loop
, bb_n
, cond
);
1446 /* The loop header is always executed. */
1447 reset_bb_predicate (loop
->header
);
1448 gcc_assert (bb_predicate_gimplified_stmts (loop
->header
) == NULL
1449 && bb_predicate_gimplified_stmts (loop
->latch
) == NULL
);
1452 /* Build region by adding loop pre-header and post-header blocks. */
1454 static vec
<basic_block
>
1455 build_region (class loop
*loop
)
1457 vec
<basic_block
> region
= vNULL
;
1458 basic_block exit_bb
= NULL
;
1460 gcc_assert (ifc_bbs
);
1461 /* The first element is loop pre-header. */
1462 region
.safe_push (loop_preheader_edge (loop
)->src
);
1464 for (unsigned int i
= 0; i
< loop
->num_nodes
; i
++)
1466 basic_block bb
= ifc_bbs
[i
];
1467 region
.safe_push (bb
);
1468 /* Find loop postheader. */
1471 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1472 if (loop_exit_edge_p (loop
, e
))
1478 /* The last element is loop post-header. */
1479 gcc_assert (exit_bb
);
1480 region
.safe_push (exit_bb
);
1484 /* Return true when LOOP is if-convertible. This is a helper function
1485 for if_convertible_loop_p. REFS and DDRS are initialized and freed
1486 in if_convertible_loop_p. */
1489 if_convertible_loop_p_1 (class loop
*loop
, vec
<data_reference_p
> *refs
)
1492 basic_block exit_bb
= NULL
;
1493 vec
<basic_block
> region
;
1495 calculate_dominance_info (CDI_DOMINATORS
);
1497 for (i
= 0; i
< loop
->num_nodes
; i
++)
1499 basic_block bb
= ifc_bbs
[i
];
1501 if (!if_convertible_bb_p (loop
, bb
, exit_bb
))
1504 if (bb_with_exit_edge_p (loop
, bb
))
1508 data_reference_p dr
;
1511 = new hash_map
<innermost_loop_behavior_hash
, data_reference_p
>;
1512 baseref_DR_map
= new hash_map
<tree_operand_hash
, data_reference_p
>;
1514 /* Compute post-dominator tree locally. */
1515 region
= build_region (loop
);
1516 calculate_dominance_info_for_region (CDI_POST_DOMINATORS
, region
);
1518 predicate_bbs (loop
);
1520 /* Free post-dominator tree since it is not used after predication. */
1521 free_dominance_info_for_region (cfun
, CDI_POST_DOMINATORS
, region
);
1524 for (i
= 0; refs
->iterate (i
, &dr
); i
++)
1526 tree ref
= DR_REF (dr
);
1528 dr
->aux
= XNEW (struct ifc_dr
);
1529 DR_BASE_W_UNCONDITIONALLY (dr
) = false;
1530 DR_RW_UNCONDITIONALLY (dr
) = false;
1531 DR_W_UNCONDITIONALLY (dr
) = false;
1532 IFC_DR (dr
)->rw_predicate
= boolean_false_node
;
1533 IFC_DR (dr
)->w_predicate
= boolean_false_node
;
1534 IFC_DR (dr
)->base_w_predicate
= boolean_false_node
;
1535 if (gimple_uid (DR_STMT (dr
)) == 0)
1536 gimple_set_uid (DR_STMT (dr
), i
+ 1);
1538 /* If DR doesn't have innermost loop behavior or it's a compound
1539 memory reference, we synthesize its innermost loop behavior
1541 if (TREE_CODE (ref
) == COMPONENT_REF
1542 || TREE_CODE (ref
) == IMAGPART_EXPR
1543 || TREE_CODE (ref
) == REALPART_EXPR
1544 || !(DR_BASE_ADDRESS (dr
) || DR_OFFSET (dr
)
1545 || DR_INIT (dr
) || DR_STEP (dr
)))
1547 while (TREE_CODE (ref
) == COMPONENT_REF
1548 || TREE_CODE (ref
) == IMAGPART_EXPR
1549 || TREE_CODE (ref
) == REALPART_EXPR
)
1550 ref
= TREE_OPERAND (ref
, 0);
1552 memset (&DR_INNERMOST (dr
), 0, sizeof (DR_INNERMOST (dr
)));
1553 DR_BASE_ADDRESS (dr
) = ref
;
1555 hash_memrefs_baserefs_and_store_DRs_read_written_info (dr
);
1558 for (i
= 0; i
< loop
->num_nodes
; i
++)
1560 basic_block bb
= ifc_bbs
[i
];
1561 gimple_stmt_iterator itr
;
1563 /* Check the if-convertibility of statements in predicated BBs. */
1564 if (!dominated_by_p (CDI_DOMINATORS
, loop
->latch
, bb
))
1565 for (itr
= gsi_start_bb (bb
); !gsi_end_p (itr
); gsi_next (&itr
))
1566 if (!if_convertible_stmt_p (gsi_stmt (itr
), *refs
))
1570 /* Checking PHIs needs to be done after stmts, as the fact whether there
1571 are any masked loads or stores affects the tests. */
1572 for (i
= 0; i
< loop
->num_nodes
; i
++)
1574 basic_block bb
= ifc_bbs
[i
];
1577 for (itr
= gsi_start_phis (bb
); !gsi_end_p (itr
); gsi_next (&itr
))
1578 if (!if_convertible_phi_p (loop
, bb
, itr
.phi ()))
1583 fprintf (dump_file
, "Applying if-conversion\n");
1588 /* Return true when LOOP is if-convertible.
1589 LOOP is if-convertible if:
1591 - it has two or more basic blocks,
1592 - it has only one exit,
1593 - loop header is not the exit edge,
1594 - if its basic blocks and phi nodes are if convertible. */
1597 if_convertible_loop_p (class loop
*loop
, vec
<data_reference_p
> *refs
)
1603 /* Handle only innermost loop. */
1604 if (!loop
|| loop
->inner
)
1606 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1607 fprintf (dump_file
, "not innermost loop\n");
1611 /* If only one block, no need for if-conversion. */
1612 if (loop
->num_nodes
<= 2)
1614 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1615 fprintf (dump_file
, "less than 2 basic blocks\n");
1619 /* If one of the loop header's edge is an exit edge then do not
1620 apply if-conversion. */
1621 FOR_EACH_EDGE (e
, ei
, loop
->header
->succs
)
1622 if (loop_exit_edge_p (loop
, e
))
1625 res
= if_convertible_loop_p_1 (loop
, refs
);
1627 delete innermost_DR_map
;
1628 innermost_DR_map
= NULL
;
1630 delete baseref_DR_map
;
1631 baseref_DR_map
= NULL
;
1636 /* Return reduc_1 if has_nop.
1639 tmp1 = (unsigned type) reduc_1;
1641 reduc_3 = (signed type) tmp2. */
1643 strip_nop_cond_scalar_reduction (bool has_nop
, tree op
)
1648 if (TREE_CODE (op
) != SSA_NAME
)
1651 gassign
*stmt
= safe_dyn_cast
<gassign
*> (SSA_NAME_DEF_STMT (op
));
1653 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt
))
1654 || !tree_nop_conversion_p (TREE_TYPE (op
), TREE_TYPE
1655 (gimple_assign_rhs1 (stmt
))))
1658 return gimple_assign_rhs1 (stmt
);
1661 /* Returns true if def-stmt for phi argument ARG is simple increment/decrement
1662 which is in predicated basic block.
1663 In fact, the following PHI pattern is searching:
1665 reduc_1 = PHI <..., reduc_2>
1669 reduc_2 = PHI <reduc_1, reduc_3>
1671 ARG_0 and ARG_1 are correspondent PHI arguments.
1672 REDUC, OP0 and OP1 contain reduction stmt and its operands.
1673 EXTENDED is true if PHI has > 2 arguments. */
1676 is_cond_scalar_reduction (gimple
*phi
, gimple
**reduc
, tree arg_0
, tree arg_1
,
1677 tree
*op0
, tree
*op1
, bool extended
, bool* has_nop
,
1680 tree lhs
, r_op1
, r_op2
, r_nop1
, r_nop2
;
1682 gimple
*header_phi
= NULL
;
1683 enum tree_code reduction_op
;
1684 basic_block bb
= gimple_bb (phi
);
1685 class loop
*loop
= bb
->loop_father
;
1686 edge latch_e
= loop_latch_edge (loop
);
1687 imm_use_iterator imm_iter
;
1688 use_operand_p use_p
;
1691 bool result
= *has_nop
= false;
1692 if (TREE_CODE (arg_0
) != SSA_NAME
|| TREE_CODE (arg_1
) != SSA_NAME
)
1695 if (!extended
&& gimple_code (SSA_NAME_DEF_STMT (arg_0
)) == GIMPLE_PHI
)
1698 header_phi
= SSA_NAME_DEF_STMT (arg_0
);
1699 stmt
= SSA_NAME_DEF_STMT (arg_1
);
1701 else if (gimple_code (SSA_NAME_DEF_STMT (arg_1
)) == GIMPLE_PHI
)
1704 header_phi
= SSA_NAME_DEF_STMT (arg_1
);
1705 stmt
= SSA_NAME_DEF_STMT (arg_0
);
1709 if (gimple_bb (header_phi
) != loop
->header
)
1712 if (PHI_ARG_DEF_FROM_EDGE (header_phi
, latch_e
) != PHI_RESULT (phi
))
1715 if (gimple_code (stmt
) != GIMPLE_ASSIGN
1716 || gimple_has_volatile_ops (stmt
))
1719 if (!flow_bb_inside_loop_p (loop
, gimple_bb (stmt
)))
1722 if (!is_predicated (gimple_bb (stmt
)))
1725 /* Check that stmt-block is predecessor of phi-block. */
1726 FOR_EACH_EDGE (e
, ei
, gimple_bb (stmt
)->succs
)
1735 if (!has_single_use (lhs
))
1738 reduction_op
= gimple_assign_rhs_code (stmt
);
1740 /* Catch something like below
1743 reduc_1 = PHI <..., reduc_2>
1746 tmp1 = (unsigned type) reduc_1;
1748 reduc_3 = (signed type) tmp2;
1750 reduc_2 = PHI <reduc_1, reduc_3>
1754 reduc_2 = PHI <0, reduc_1>
1755 tmp1 = (unsigned type)reduc_1;
1756 ifcvt = cond_expr ? rhs2 : 0
1757 tmp2 = tmp1 +/- ifcvt;
1758 reduc_1 = (signed type)tmp2; */
1760 if (CONVERT_EXPR_CODE_P (reduction_op
))
1762 lhs
= gimple_assign_rhs1 (stmt
);
1763 if (TREE_CODE (lhs
) != SSA_NAME
1764 || !has_single_use (lhs
))
1768 stmt
= SSA_NAME_DEF_STMT (lhs
);
1769 if (gimple_bb (stmt
) != gimple_bb (*nop_reduc
)
1770 || !is_gimple_assign (stmt
))
1774 reduction_op
= gimple_assign_rhs_code (stmt
);
1777 if (reduction_op
!= PLUS_EXPR
1778 && reduction_op
!= MINUS_EXPR
1779 && reduction_op
!= MULT_EXPR
1780 && reduction_op
!= BIT_IOR_EXPR
1781 && reduction_op
!= BIT_XOR_EXPR
1782 && reduction_op
!= BIT_AND_EXPR
)
1784 r_op1
= gimple_assign_rhs1 (stmt
);
1785 r_op2
= gimple_assign_rhs2 (stmt
);
1787 r_nop1
= strip_nop_cond_scalar_reduction (*has_nop
, r_op1
);
1788 r_nop2
= strip_nop_cond_scalar_reduction (*has_nop
, r_op2
);
1790 /* Make R_OP1 to hold reduction variable. */
1791 if (r_nop2
== PHI_RESULT (header_phi
)
1792 && commutative_tree_code (reduction_op
))
1794 std::swap (r_op1
, r_op2
);
1795 std::swap (r_nop1
, r_nop2
);
1797 else if (r_nop1
!= PHI_RESULT (header_phi
))
1802 /* Check that R_NOP1 is used in nop_stmt or in PHI only. */
1803 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, r_nop1
)
1805 gimple
*use_stmt
= USE_STMT (use_p
);
1806 if (is_gimple_debug (use_stmt
))
1808 if (use_stmt
== SSA_NAME_DEF_STMT (r_op1
))
1810 if (use_stmt
!= phi
)
1815 /* Check that R_OP1 is used in reduction stmt or in PHI only. */
1816 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, r_op1
)
1818 gimple
*use_stmt
= USE_STMT (use_p
);
1819 if (is_gimple_debug (use_stmt
))
1821 if (use_stmt
== stmt
)
1823 if (gimple_code (use_stmt
) != GIMPLE_PHI
)
1827 *op0
= r_op1
; *op1
= r_op2
;
1832 /* Converts conditional scalar reduction into unconditional form, e.g.
1834 if (_5 != 0) goto bb_5 else goto bb_6
1840 # res_2 = PHI <res_13(4), res_6(5)>
1843 will be converted into sequence
1844 _ifc__1 = _5 != 0 ? 1 : 0;
1845 res_2 = res_13 + _ifc__1;
1846 Argument SWAP tells that arguments of conditional expression should be
1848 Returns rhs of resulting PHI assignment. */
1851 convert_scalar_cond_reduction (gimple
*reduc
, gimple_stmt_iterator
*gsi
,
1852 tree cond
, tree op0
, tree op1
, bool swap
,
1853 bool has_nop
, gimple
* nop_reduc
)
1855 gimple_stmt_iterator stmt_it
;
1858 tree rhs1
= gimple_assign_rhs1 (reduc
);
1859 tree lhs
= gimple_assign_lhs (reduc
);
1860 tree tmp
= make_temp_ssa_name (TREE_TYPE (rhs1
), NULL
, "_ifc_");
1862 enum tree_code reduction_op
= gimple_assign_rhs_code (reduc
);
1863 tree op_nochange
= neutral_op_for_reduction (TREE_TYPE (rhs1
), reduction_op
,
1865 gimple_seq stmts
= NULL
;
1867 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1869 fprintf (dump_file
, "Found cond scalar reduction.\n");
1870 print_gimple_stmt (dump_file
, reduc
, 0, TDF_SLIM
);
1873 /* If possible create a COND_OP instead of a COND_EXPR and an OP_EXPR.
1874 The COND_OP will have a neutral_op else value. */
1876 ifn
= get_conditional_internal_fn (reduction_op
);
1878 && vectorized_internal_fn_supported_p (ifn
, TREE_TYPE (lhs
))
1881 gcall
*cond_call
= gimple_build_call_internal (ifn
, 4,
1882 unshare_expr (cond
),
1884 gsi_insert_before (gsi
, cond_call
, GSI_SAME_STMT
);
1885 gimple_call_set_lhs (cond_call
, tmp
);
1890 /* Build cond expression using COND and constant operand
1891 of reduction rhs. */
1892 c
= fold_build_cond_expr (TREE_TYPE (rhs1
),
1893 unshare_expr (cond
),
1894 swap
? op_nochange
: op1
,
1895 swap
? op1
: op_nochange
);
1896 /* Create assignment stmt and insert it at GSI. */
1897 new_assign
= gimple_build_assign (tmp
, c
);
1898 gsi_insert_before (gsi
, new_assign
, GSI_SAME_STMT
);
1899 /* Build rhs for unconditional increment/decrement/logic_operation. */
1900 rhs
= gimple_build (&stmts
, reduction_op
,
1901 TREE_TYPE (rhs1
), op0
, tmp
);
1906 rhs
= gimple_convert (&stmts
,
1907 TREE_TYPE (gimple_assign_lhs (nop_reduc
)), rhs
);
1908 stmt_it
= gsi_for_stmt (nop_reduc
);
1909 gsi_remove (&stmt_it
, true);
1910 release_defs (nop_reduc
);
1912 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
1914 /* Delete original reduction stmt. */
1915 stmt_it
= gsi_for_stmt (reduc
);
1916 gsi_remove (&stmt_it
, true);
1917 release_defs (reduc
);
1921 /* Generate a simplified conditional. */
1924 gen_simplified_condition (tree cond
, scalar_cond_masked_set_type
&cond_set
)
1926 /* Check if the value is already live in a previous branch. This resolves
1927 nested conditionals from diamond PHI reductions. */
1928 if (TREE_CODE (cond
) == SSA_NAME
)
1930 gimple
*stmt
= SSA_NAME_DEF_STMT (cond
);
1931 gassign
*assign
= NULL
;
1932 if ((assign
= as_a
<gassign
*> (stmt
))
1933 && gimple_assign_rhs_code (assign
) == BIT_AND_EXPR
)
1935 tree arg1
= gimple_assign_rhs1 (assign
);
1936 tree arg2
= gimple_assign_rhs2 (assign
);
1937 if (cond_set
.contains ({ arg1
, 1 }))
1938 arg1
= boolean_true_node
;
1940 arg1
= gen_simplified_condition (arg1
, cond_set
);
1942 if (cond_set
.contains ({ arg2
, 1 }))
1943 arg2
= boolean_true_node
;
1945 arg2
= gen_simplified_condition (arg2
, cond_set
);
1947 cond
= fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
, arg1
, arg2
);
1953 /* Structure used to track meta-data on PHI arguments used to generate
1954 most efficient comparison sequence to slatten a PHI node. */
1956 typedef struct ifcvt_arg_entry
1958 /* The PHI node argument value. */
1961 /* The number of compares required to reach this PHI node from start of the
1962 BB being if-converted. */
1963 unsigned num_compares
;
1965 /* The number of times this PHI node argument appears in the current PHI
1969 /* The indices at which this PHI arg occurs inside the PHI node. */
1971 } ifcvt_arg_entry_t
;
1973 /* Produce condition for all occurrences of ARG in PHI node. Set *INVERT
1974 as to whether the condition is inverted. */
1977 gen_phi_arg_condition (gphi
*phi
, ifcvt_arg_entry_t
&arg
,
1978 gimple_stmt_iterator
*gsi
,
1979 scalar_cond_masked_set_type
&cond_set
, bool *invert
)
1983 tree cond
= NULL_TREE
;
1988 len
= arg
.indexes
->length ();
1989 gcc_assert (len
> 0);
1990 for (i
= 0; i
< len
; i
++)
1992 e
= gimple_phi_arg_edge (phi
, (*arg
.indexes
)[i
]);
1993 c
= bb_predicate (e
->src
);
1994 if (is_true_predicate (c
))
1999 /* If we have just a single inverted predicate, signal that and
2000 instead invert the COND_EXPR arms. */
2001 if (len
== 1 && TREE_CODE (c
) == TRUTH_NOT_EXPR
)
2003 c
= TREE_OPERAND (c
, 0);
2007 c
= gen_simplified_condition (c
, cond_set
);
2008 c
= force_gimple_operand_gsi (gsi
, unshare_expr (c
),
2009 true, NULL_TREE
, true, GSI_SAME_STMT
);
2010 if (cond
!= NULL_TREE
)
2012 /* Must build OR expression. */
2013 cond
= fold_or_predicates (EXPR_LOCATION (c
), c
, cond
);
2014 cond
= force_gimple_operand_gsi (gsi
, unshare_expr (cond
), true,
2015 NULL_TREE
, true, GSI_SAME_STMT
);
2020 /* Register the new possibly simplified conditional. When more than 2
2021 entries in a phi node we chain entries in the false branch, so the
2022 inverted condition is active. */
2023 scalar_cond_masked_key
pred_cond ({ cond
, 1 });
2025 pred_cond
.inverted_p
= !pred_cond
.inverted_p
;
2026 cond_set
.add (pred_cond
);
2028 gcc_assert (cond
!= NULL_TREE
);
2032 /* Create the smallest nested conditional possible. On pre-order we record
2033 which conditionals are live, and on post-order rewrite the chain by removing
2034 already active conditions.
2036 As an example we simplify:
2040 _22 = a_10 < e_11(D);
2042 _ifc__42 = _23 ? t_13 : 0;
2043 t_6 = _7 ? 1 : _ifc__42
2048 _22 = a_10 < e_11(D);
2049 _ifc__42 = _22 ? t_13 : 0;
2050 t_6 = _7 ? 1 : _ifc__42;
2052 which produces better code. */
2055 gen_phi_nest_statement (gphi
*phi
, gimple_stmt_iterator
*gsi
,
2056 scalar_cond_masked_set_type
&cond_set
, tree type
,
2057 gimple
**res_stmt
, tree lhs0
,
2058 vec
<struct ifcvt_arg_entry
> &args
, unsigned idx
)
2060 if (idx
== args
.length ())
2061 return args
[idx
- 1].arg
;
2064 tree cond
= gen_phi_arg_condition (phi
, args
[idx
- 1], gsi
, cond_set
,
2066 tree arg1
= gen_phi_nest_statement (phi
, gsi
, cond_set
, type
, res_stmt
, lhs0
,
2069 unsigned prev
= idx
;
2070 unsigned curr
= prev
- 1;
2071 tree arg0
= args
[curr
].arg
;
2074 lhs
= make_temp_ssa_name (type
, NULL
, "_ifc_");
2079 rhs
= fold_build_cond_expr (type
, unshare_expr (cond
),
2082 rhs
= fold_build_cond_expr (type
, unshare_expr (cond
),
2084 gassign
*new_stmt
= gimple_build_assign (lhs
, rhs
);
2085 gsi_insert_before (gsi
, new_stmt
, GSI_SAME_STMT
);
2086 update_stmt (new_stmt
);
2087 *res_stmt
= new_stmt
;
2091 /* When flattening a PHI node we have a choice of which conditions to test to
2092 for all the paths from the start of the dominator block of the BB with the
2093 PHI node. If the PHI node has X arguments we have to only test X - 1
2094 conditions as the last one is implicit. It does matter which conditions we
2095 test first. We should test the shortest condition first (distance here is
2096 measures in the number of logical operators in the condition) and the
2097 longest one last. This allows us to skip testing the most expensive
2098 condition. To accomplish this we need to sort the conditions. P1 and P2
2099 are sorted first based on the number of logical operations (num_compares)
2100 and then by how often they occur in the PHI node. */
2103 cmp_arg_entry (const void *p1
, const void *p2
, void * /* data. */)
2105 const ifcvt_arg_entry sval1
= *(const ifcvt_arg_entry
*)p1
;
2106 const ifcvt_arg_entry sval2
= *(const ifcvt_arg_entry
*)p2
;
2108 if (sval1
.num_compares
< sval2
.num_compares
)
2110 else if (sval1
.num_compares
> sval2
.num_compares
)
2113 if (sval1
.occurs
< sval2
.occurs
)
2115 else if (sval1
.occurs
> sval2
.occurs
)
2121 /* Replace a scalar PHI node with a COND_EXPR using COND as condition.
2122 This routine can handle PHI nodes with more than two arguments.
2125 S1: A = PHI <x1(1), x2(5)>
2127 S2: A = cond ? x1 : x2;
2129 The generated code is inserted at GSI that points to the top of
2130 basic block's statement list.
2131 If PHI node has more than two arguments a chain of conditional
2132 expression is produced. */
2136 predicate_scalar_phi (gphi
*phi
, gimple_stmt_iterator
*gsi
)
2138 gimple
*new_stmt
= NULL
, *reduc
, *nop_reduc
;
2139 tree rhs
, res
, arg0
, arg1
, op0
, op1
, scev
;
2141 unsigned int index0
;
2147 res
= gimple_phi_result (phi
);
2148 if (virtual_operand_p (res
))
2151 if ((rhs
= degenerate_phi_result (phi
))
2152 || ((scev
= analyze_scalar_evolution (gimple_bb (phi
)->loop_father
,
2154 && !chrec_contains_undetermined (scev
)
2156 && (rhs
= gimple_phi_arg_def (phi
, 0))))
2158 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2160 fprintf (dump_file
, "Degenerate phi!\n");
2161 print_gimple_stmt (dump_file
, phi
, 0, TDF_SLIM
);
2163 new_stmt
= gimple_build_assign (res
, rhs
);
2164 gsi_insert_before (gsi
, new_stmt
, GSI_SAME_STMT
);
2165 update_stmt (new_stmt
);
2169 bb
= gimple_bb (phi
);
2170 /* Keep track of conditionals already seen. */
2171 scalar_cond_masked_set_type cond_set
;
2172 if (EDGE_COUNT (bb
->preds
) == 2)
2174 /* Predicate ordinary PHI node with 2 arguments. */
2175 edge first_edge
, second_edge
;
2176 basic_block true_bb
;
2177 first_edge
= EDGE_PRED (bb
, 0);
2178 second_edge
= EDGE_PRED (bb
, 1);
2179 cond
= bb_predicate (first_edge
->src
);
2180 cond_set
.add ({ cond
, 1 });
2181 if (TREE_CODE (cond
) == TRUTH_NOT_EXPR
)
2182 std::swap (first_edge
, second_edge
);
2183 if (EDGE_COUNT (first_edge
->src
->succs
) > 1)
2185 cond
= bb_predicate (second_edge
->src
);
2186 if (TREE_CODE (cond
) == TRUTH_NOT_EXPR
)
2187 cond
= TREE_OPERAND (cond
, 0);
2189 first_edge
= second_edge
;
2192 cond
= bb_predicate (first_edge
->src
);
2194 /* Gimplify the condition to a valid cond-expr conditonal operand. */
2195 cond
= gen_simplified_condition (cond
, cond_set
);
2196 cond
= force_gimple_operand_gsi (gsi
, unshare_expr (cond
), true,
2197 NULL_TREE
, true, GSI_SAME_STMT
);
2198 true_bb
= first_edge
->src
;
2199 if (EDGE_PRED (bb
, 1)->src
== true_bb
)
2201 arg0
= gimple_phi_arg_def (phi
, 1);
2202 arg1
= gimple_phi_arg_def (phi
, 0);
2206 arg0
= gimple_phi_arg_def (phi
, 0);
2207 arg1
= gimple_phi_arg_def (phi
, 1);
2209 if (is_cond_scalar_reduction (phi
, &reduc
, arg0
, arg1
,
2210 &op0
, &op1
, false, &has_nop
,
2213 /* Convert reduction stmt into vectorizable form. */
2214 rhs
= convert_scalar_cond_reduction (reduc
, gsi
, cond
, op0
, op1
,
2215 true_bb
!= gimple_bb (reduc
),
2216 has_nop
, nop_reduc
);
2217 redundant_ssa_names
.safe_push (std::make_pair (res
, rhs
));
2220 /* Build new RHS using selected condition and arguments. */
2221 rhs
= fold_build_cond_expr (TREE_TYPE (res
), unshare_expr (cond
),
2223 new_stmt
= gimple_build_assign (res
, rhs
);
2224 gsi_insert_before (gsi
, new_stmt
, GSI_SAME_STMT
);
2225 gimple_stmt_iterator new_gsi
= gsi_for_stmt (new_stmt
);
2226 if (fold_stmt (&new_gsi
, follow_all_ssa_edges
))
2228 new_stmt
= gsi_stmt (new_gsi
);
2229 update_stmt (new_stmt
);
2232 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2234 fprintf (dump_file
, "new phi replacement stmt\n");
2235 print_gimple_stmt (dump_file
, new_stmt
, 0, TDF_SLIM
);
2240 /* Create hashmap for PHI node which contain vector of argument indexes
2241 having the same value. */
2243 hash_map
<tree_operand_hash
, auto_vec
<int> > phi_arg_map
;
2244 unsigned int num_args
= gimple_phi_num_args (phi
);
2245 /* Vector of different PHI argument values. */
2246 auto_vec
<ifcvt_arg_entry_t
> args
;
2248 /* Compute phi_arg_map, determine the list of unique PHI args and the indices
2249 where they are in the PHI node. The indices will be used to determine
2250 the conditions to apply and their complexity. */
2251 for (i
= 0; i
< num_args
; i
++)
2255 arg
= gimple_phi_arg_def (phi
, i
);
2256 if (!phi_arg_map
.get (arg
))
2257 args
.safe_push ({ arg
, 0, 0, NULL
});
2258 phi_arg_map
.get_or_insert (arg
).safe_push (i
);
2261 /* Determine element with max number of occurrences and complexity. Looking
2262 at only number of occurrences as a measure for complexity isn't enough as
2263 all usages can be unique but the comparisons to reach the PHI node differ
2265 for (unsigned i
= 0; i
< args
.length (); i
++)
2267 unsigned int len
= 0;
2268 vec
<int> *indices
= phi_arg_map
.get (args
[i
].arg
);
2269 for (int index
: *indices
)
2271 edge e
= gimple_phi_arg_edge (phi
, index
);
2272 len
+= get_bb_num_predicate_stmts (e
->src
);
2275 unsigned occur
= indices
->length ();
2276 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2277 fprintf (dump_file
, "Ranking %d as len=%d, idx=%d\n", i
, len
, occur
);
2278 args
[i
].num_compares
= len
;
2279 args
[i
].occurs
= occur
;
2280 args
[i
].indexes
= indices
;
2283 /* Sort elements based on rankings ARGS. */
2284 args
.stablesort (cmp_arg_entry
, NULL
);
2286 /* Handle one special case when number of arguments with different values
2287 is equal 2 and one argument has the only occurrence. Such PHI can be
2288 handled as if would have only 2 arguments. */
2289 if (args
.length () == 2
2290 && args
[0].indexes
->length () == 1)
2292 index0
= (*args
[0].indexes
)[0];
2295 e
= gimple_phi_arg_edge (phi
, index0
);
2296 cond
= bb_predicate (e
->src
);
2297 if (TREE_CODE (cond
) == TRUTH_NOT_EXPR
)
2300 cond
= TREE_OPERAND (cond
, 0);
2302 /* Gimplify the condition to a valid cond-expr conditonal operand. */
2303 cond
= force_gimple_operand_gsi (gsi
, unshare_expr (cond
), true,
2304 NULL_TREE
, true, GSI_SAME_STMT
);
2305 if (!(is_cond_scalar_reduction (phi
, &reduc
, arg0
, arg1
,
2306 &op0
, &op1
, true, &has_nop
, &nop_reduc
)))
2307 rhs
= fold_build_cond_expr (TREE_TYPE (res
), unshare_expr (cond
),
2309 swap
? arg0
: arg1
);
2312 /* Convert reduction stmt into vectorizable form. */
2313 rhs
= convert_scalar_cond_reduction (reduc
, gsi
, cond
, op0
, op1
,
2314 swap
, has_nop
, nop_reduc
);
2315 redundant_ssa_names
.safe_push (std::make_pair (res
, rhs
));
2317 new_stmt
= gimple_build_assign (res
, rhs
);
2318 gsi_insert_before (gsi
, new_stmt
, GSI_SAME_STMT
);
2319 update_stmt (new_stmt
);
2324 tree type
= TREE_TYPE (gimple_phi_result (phi
));
2325 gen_phi_nest_statement (phi
, gsi
, cond_set
, type
, &new_stmt
, res
,
2329 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2331 fprintf (dump_file
, "new extended phi replacement stmt\n");
2332 print_gimple_stmt (dump_file
, new_stmt
, 0, TDF_SLIM
);
2336 /* Replaces in LOOP all the scalar phi nodes other than those in the
2337 LOOP->header block with conditional modify expressions. */
2340 predicate_all_scalar_phis (class loop
*loop
)
2343 unsigned int orig_loop_num_nodes
= loop
->num_nodes
;
2346 for (i
= 1; i
< orig_loop_num_nodes
; i
++)
2349 gimple_stmt_iterator gsi
;
2350 gphi_iterator phi_gsi
;
2353 if (bb
== loop
->header
)
2356 phi_gsi
= gsi_start_phis (bb
);
2357 if (gsi_end_p (phi_gsi
))
2360 gsi
= gsi_after_labels (bb
);
2361 while (!gsi_end_p (phi_gsi
))
2363 phi
= phi_gsi
.phi ();
2364 if (virtual_operand_p (gimple_phi_result (phi
)))
2365 gsi_next (&phi_gsi
);
2368 predicate_scalar_phi (phi
, &gsi
);
2369 remove_phi_node (&phi_gsi
, false);
2375 /* Insert in each basic block of LOOP the statements produced by the
2376 gimplification of the predicates. */
2379 insert_gimplified_predicates (loop_p loop
)
2383 for (i
= 0; i
< loop
->num_nodes
; i
++)
2385 basic_block bb
= ifc_bbs
[i
];
2387 if (!is_predicated (bb
))
2388 gcc_assert (bb_predicate_gimplified_stmts (bb
) == NULL
);
2389 if (!is_predicated (bb
))
2391 /* Do not insert statements for a basic block that is not
2392 predicated. Also make sure that the predicate of the
2393 basic block is set to true. */
2394 reset_bb_predicate (bb
);
2398 stmts
= bb_predicate_gimplified_stmts (bb
);
2401 if (need_to_predicate
)
2403 /* Insert the predicate of the BB just after the label,
2404 as the if-conversion of memory writes will use this
2406 gimple_stmt_iterator gsi
= gsi_after_labels (bb
);
2407 gsi_insert_seq_before (&gsi
, stmts
, GSI_SAME_STMT
);
2411 /* Insert the predicate of the BB at the end of the BB
2412 as this would reduce the register pressure: the only
2413 use of this predicate will be in successor BBs. */
2414 gimple_stmt_iterator gsi
= gsi_last_bb (bb
);
2417 || stmt_ends_bb_p (gsi_stmt (gsi
)))
2418 gsi_insert_seq_before (&gsi
, stmts
, GSI_SAME_STMT
);
2420 gsi_insert_seq_after (&gsi
, stmts
, GSI_SAME_STMT
);
2423 /* Once the sequence is code generated, set it to NULL. */
2424 set_bb_predicate_gimplified_stmts (bb
, NULL
, true);
2429 /* Helper function for predicate_statements. Returns index of existent
2430 mask if it was created for given SIZE and -1 otherwise. */
2433 mask_exists (int size
, const vec
<int> &vec
)
2437 FOR_EACH_VEC_ELT (vec
, ix
, v
)
2443 /* Helper function for predicate_statements. STMT is a memory read or
2444 write and it needs to be predicated by MASK. Return a statement
2448 predicate_load_or_store (gimple_stmt_iterator
*gsi
, gassign
*stmt
, tree mask
)
2452 tree lhs
= gimple_assign_lhs (stmt
);
2453 tree rhs
= gimple_assign_rhs1 (stmt
);
2454 tree ref
= TREE_CODE (lhs
) == SSA_NAME
? rhs
: lhs
;
2455 mark_addressable (ref
);
2456 tree addr
= force_gimple_operand_gsi (gsi
, build_fold_addr_expr (ref
),
2457 true, NULL_TREE
, true, GSI_SAME_STMT
);
2458 tree ptr
= build_int_cst (reference_alias_ptr_type (ref
),
2459 get_object_alignment (ref
));
2460 /* Copy points-to info if possible. */
2461 if (TREE_CODE (addr
) == SSA_NAME
&& !SSA_NAME_PTR_INFO (addr
))
2462 copy_ref_info (build2 (MEM_REF
, TREE_TYPE (ref
), addr
, ptr
),
2464 if (TREE_CODE (lhs
) == SSA_NAME
)
2467 = gimple_build_call_internal (IFN_MASK_LOAD
, 3, addr
,
2469 gimple_call_set_lhs (new_stmt
, lhs
);
2470 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
2475 = gimple_build_call_internal (IFN_MASK_STORE
, 4, addr
, ptr
,
2477 gimple_move_vops (new_stmt
, stmt
);
2479 gimple_call_set_nothrow (new_stmt
, true);
2483 /* STMT uses OP_LHS. Check whether it is equivalent to:
2485 ... = OP_MASK ? OP_LHS : X;
2487 Return X if so, otherwise return null. OP_MASK is an SSA_NAME that is
2488 known to have value OP_COND. */
2491 check_redundant_cond_expr (gimple
*stmt
, tree op_mask
, tree op_cond
,
2494 gassign
*assign
= dyn_cast
<gassign
*> (stmt
);
2495 if (!assign
|| gimple_assign_rhs_code (assign
) != COND_EXPR
)
2498 tree use_cond
= gimple_assign_rhs1 (assign
);
2499 tree if_true
= gimple_assign_rhs2 (assign
);
2500 tree if_false
= gimple_assign_rhs3 (assign
);
2502 if ((use_cond
== op_mask
|| operand_equal_p (use_cond
, op_cond
, 0))
2503 && if_true
== op_lhs
)
2506 if (inverse_conditions_p (use_cond
, op_cond
) && if_false
== op_lhs
)
2512 /* Return true if VALUE is available for use at STMT. SSA_NAMES is
2513 the set of SSA names defined earlier in STMT's block. */
2516 value_available_p (gimple
*stmt
, hash_set
<tree_ssa_name_hash
> *ssa_names
,
2519 if (is_gimple_min_invariant (value
))
2522 if (TREE_CODE (value
) == SSA_NAME
)
2524 if (SSA_NAME_IS_DEFAULT_DEF (value
))
2527 basic_block def_bb
= gimple_bb (SSA_NAME_DEF_STMT (value
));
2528 basic_block use_bb
= gimple_bb (stmt
);
2529 return (def_bb
== use_bb
2530 ? ssa_names
->contains (value
)
2531 : dominated_by_p (CDI_DOMINATORS
, use_bb
, def_bb
));
2537 /* Helper function for predicate_statements. STMT is a potentially-trapping
2538 arithmetic operation that needs to be predicated by MASK, an SSA_NAME that
2539 has value COND. Return a statement that does so. SSA_NAMES is the set of
2540 SSA names defined earlier in STMT's block. */
2543 predicate_rhs_code (gassign
*stmt
, tree mask
, tree cond
,
2544 hash_set
<tree_ssa_name_hash
> *ssa_names
)
2546 tree lhs
= gimple_assign_lhs (stmt
);
2547 tree_code code
= gimple_assign_rhs_code (stmt
);
2548 unsigned int nops
= gimple_num_ops (stmt
);
2549 internal_fn cond_fn
= get_conditional_internal_fn (code
);
2551 /* Construct the arguments to the conditional internal function. */
2552 auto_vec
<tree
, 8> args
;
2553 args
.safe_grow (nops
+ 1, true);
2555 for (unsigned int i
= 1; i
< nops
; ++i
)
2556 args
[i
] = gimple_op (stmt
, i
);
2557 args
[nops
] = NULL_TREE
;
2559 /* Look for uses of the result to see whether they are COND_EXPRs that can
2560 be folded into the conditional call. */
2561 imm_use_iterator imm_iter
;
2563 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, lhs
)
2565 tree new_else
= check_redundant_cond_expr (use_stmt
, mask
, cond
, lhs
);
2566 if (new_else
&& value_available_p (stmt
, ssa_names
, new_else
))
2569 args
[nops
] = new_else
;
2570 if (operand_equal_p (new_else
, args
[nops
], 0))
2574 LHS = IFN_COND (MASK, ..., ELSE);
2575 X = MASK ? LHS : ELSE;
2577 which makes X equivalent to LHS. */
2578 tree use_lhs
= gimple_assign_lhs (use_stmt
);
2579 redundant_ssa_names
.safe_push (std::make_pair (use_lhs
, lhs
));
2584 args
[nops
] = targetm
.preferred_else_value (cond_fn
, TREE_TYPE (lhs
),
2585 nops
- 1, &args
[1]);
2587 /* Create and insert the call. */
2588 gcall
*new_stmt
= gimple_build_call_internal_vec (cond_fn
, args
);
2589 gimple_call_set_lhs (new_stmt
, lhs
);
2590 gimple_call_set_nothrow (new_stmt
, true);
2595 /* Predicate each write to memory in LOOP.
2597 This function transforms control flow constructs containing memory
2600 | for (i = 0; i < N; i++)
2604 into the following form that does not contain control flow:
2606 | for (i = 0; i < N; i++)
2607 | A[i] = cond ? expr : A[i];
2609 The original CFG looks like this:
2616 | if (i < N) goto bb_5 else goto bb_2
2620 | cond = some_computation;
2621 | if (cond) goto bb_3 else goto bb_4
2633 insert_gimplified_predicates inserts the computation of the COND
2634 expression at the beginning of the destination basic block:
2641 | if (i < N) goto bb_5 else goto bb_2
2645 | cond = some_computation;
2646 | if (cond) goto bb_3 else goto bb_4
2650 | cond = some_computation;
2659 predicate_statements is then predicating the memory write as follows:
2666 | if (i < N) goto bb_5 else goto bb_2
2670 | if (cond) goto bb_3 else goto bb_4
2674 | cond = some_computation;
2675 | A[i] = cond ? expr : A[i];
2683 and finally combine_blocks removes the basic block boundaries making
2684 the loop vectorizable:
2688 | if (i < N) goto bb_5 else goto bb_1
2692 | cond = some_computation;
2693 | A[i] = cond ? expr : A[i];
2694 | if (i < N) goto bb_5 else goto bb_4
2703 predicate_statements (loop_p loop
)
2705 unsigned int i
, orig_loop_num_nodes
= loop
->num_nodes
;
2706 auto_vec
<int, 1> vect_sizes
;
2707 auto_vec
<tree
, 1> vect_masks
;
2708 hash_set
<tree_ssa_name_hash
> ssa_names
;
2710 for (i
= 1; i
< orig_loop_num_nodes
; i
++)
2712 gimple_stmt_iterator gsi
;
2713 basic_block bb
= ifc_bbs
[i
];
2714 tree cond
= bb_predicate (bb
);
2718 if (is_true_predicate (cond
))
2722 if (TREE_CODE (cond
) == TRUTH_NOT_EXPR
)
2725 cond
= TREE_OPERAND (cond
, 0);
2728 vect_sizes
.truncate (0);
2729 vect_masks
.truncate (0);
2731 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);)
2733 gassign
*stmt
= dyn_cast
<gassign
*> (gsi_stmt (gsi
));
2737 else if (is_false_predicate (cond
)
2738 && gimple_vdef (stmt
))
2740 unlink_stmt_vdef (stmt
);
2741 gsi_remove (&gsi
, true);
2742 release_defs (stmt
);
2745 else if (gimple_plf (stmt
, GF_PLF_2
)
2746 && is_gimple_assign (stmt
))
2748 tree lhs
= gimple_assign_lhs (stmt
);
2751 gimple_seq stmts
= NULL
;
2752 machine_mode mode
= TYPE_MODE (TREE_TYPE (lhs
));
2753 /* We checked before setting GF_PLF_2 that an equivalent
2754 integer mode exists. */
2755 int bitsize
= GET_MODE_BITSIZE (mode
).to_constant ();
2756 if (!vect_sizes
.is_empty ()
2757 && (index
= mask_exists (bitsize
, vect_sizes
)) != -1)
2758 /* Use created mask. */
2759 mask
= vect_masks
[index
];
2762 if (COMPARISON_CLASS_P (cond
))
2763 mask
= gimple_build (&stmts
, TREE_CODE (cond
),
2765 TREE_OPERAND (cond
, 0),
2766 TREE_OPERAND (cond
, 1));
2773 = constant_boolean_node (true, TREE_TYPE (mask
));
2774 mask
= gimple_build (&stmts
, BIT_XOR_EXPR
,
2775 TREE_TYPE (mask
), mask
, true_val
);
2777 gsi_insert_seq_before (&gsi
, stmts
, GSI_SAME_STMT
);
2779 /* Save mask and its size for further use. */
2780 vect_sizes
.safe_push (bitsize
);
2781 vect_masks
.safe_push (mask
);
2783 if (gimple_assign_single_p (stmt
))
2784 new_stmt
= predicate_load_or_store (&gsi
, stmt
, mask
);
2786 new_stmt
= predicate_rhs_code (stmt
, mask
, cond
, &ssa_names
);
2788 gsi_replace (&gsi
, new_stmt
, true);
2790 else if (((lhs
= gimple_assign_lhs (stmt
)), true)
2791 && (INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
2792 || POINTER_TYPE_P (TREE_TYPE (lhs
)))
2793 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs
))
2794 && arith_code_with_undefined_signed_overflow
2795 (gimple_assign_rhs_code (stmt
)))
2796 rewrite_to_defined_overflow (&gsi
);
2797 else if (gimple_vdef (stmt
))
2799 tree lhs
= gimple_assign_lhs (stmt
);
2800 tree rhs
= gimple_assign_rhs1 (stmt
);
2801 tree type
= TREE_TYPE (lhs
);
2803 lhs
= ifc_temp_var (type
, unshare_expr (lhs
), &gsi
);
2804 rhs
= ifc_temp_var (type
, unshare_expr (rhs
), &gsi
);
2806 std::swap (lhs
, rhs
);
2807 cond
= force_gimple_operand_gsi (&gsi
, unshare_expr (cond
), true,
2808 NULL_TREE
, true, GSI_SAME_STMT
);
2809 rhs
= fold_build_cond_expr (type
, unshare_expr (cond
), rhs
, lhs
);
2810 gimple_assign_set_rhs1 (stmt
, ifc_temp_var (type
, rhs
, &gsi
));
2814 if (gimple_plf (gsi_stmt (gsi
), GF_PLF_2
)
2815 && is_gimple_call (gsi_stmt (gsi
)))
2817 /* Convert functions that have a SIMD clone to IFN_MASK_CALL.
2818 This will cause the vectorizer to match the "in branch"
2819 clone variants, and serves to build the mask vector
2820 in a natural way. */
2821 gcall
*call
= dyn_cast
<gcall
*> (gsi_stmt (gsi
));
2822 tree orig_fn
= gimple_call_fn (call
);
2823 int orig_nargs
= gimple_call_num_args (call
);
2824 auto_vec
<tree
> args
;
2825 args
.safe_push (orig_fn
);
2826 for (int i
= 0; i
< orig_nargs
; i
++)
2827 args
.safe_push (gimple_call_arg (call
, i
));
2828 args
.safe_push (cond
);
2830 /* Replace the call with a IFN_MASK_CALL that has the extra
2831 condition parameter. */
2832 gcall
*new_call
= gimple_build_call_internal_vec (IFN_MASK_CALL
,
2834 gimple_call_set_lhs (new_call
, gimple_call_lhs (call
));
2835 gsi_replace (&gsi
, new_call
, true);
2838 lhs
= gimple_get_lhs (gsi_stmt (gsi
));
2839 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
2840 ssa_names
.add (lhs
);
2847 /* Remove all GIMPLE_CONDs and GIMPLE_LABELs of all the basic blocks
2848 other than the exit and latch of the LOOP. Also resets the
2849 GIMPLE_DEBUG information. */
2852 remove_conditions_and_labels (loop_p loop
)
2854 gimple_stmt_iterator gsi
;
2857 for (i
= 0; i
< loop
->num_nodes
; i
++)
2859 basic_block bb
= ifc_bbs
[i
];
2861 if (bb_with_exit_edge_p (loop
, bb
)
2862 || bb
== loop
->latch
)
2865 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); )
2866 switch (gimple_code (gsi_stmt (gsi
)))
2870 gsi_remove (&gsi
, true);
2874 /* ??? Should there be conditional GIMPLE_DEBUG_BINDs? */
2875 if (gimple_debug_bind_p (gsi_stmt (gsi
)))
2877 gimple_debug_bind_reset_value (gsi_stmt (gsi
));
2878 update_stmt (gsi_stmt (gsi
));
2889 /* Combine all the basic blocks from LOOP into one or two super basic
2890 blocks. Replace PHI nodes with conditional modify expressions. */
2893 combine_blocks (class loop
*loop
)
2895 basic_block bb
, exit_bb
, merge_target_bb
;
2896 unsigned int orig_loop_num_nodes
= loop
->num_nodes
;
2901 remove_conditions_and_labels (loop
);
2902 insert_gimplified_predicates (loop
);
2903 predicate_all_scalar_phis (loop
);
2905 if (need_to_predicate
|| need_to_rewrite_undefined
)
2906 predicate_statements (loop
);
2908 /* Merge basic blocks. */
2910 bool *predicated
= XNEWVEC (bool, orig_loop_num_nodes
);
2911 for (i
= 0; i
< orig_loop_num_nodes
; i
++)
2914 predicated
[i
] = !is_true_predicate (bb_predicate (bb
));
2915 free_bb_predicate (bb
);
2916 if (bb_with_exit_edge_p (loop
, bb
))
2918 gcc_assert (exit_bb
== NULL
);
2922 gcc_assert (exit_bb
!= loop
->latch
);
2924 merge_target_bb
= loop
->header
;
2926 /* Get at the virtual def valid for uses starting at the first block
2927 we merge into the header. Without a virtual PHI the loop has the
2928 same virtual use on all stmts. */
2929 gphi
*vphi
= get_virtual_phi (loop
->header
);
2930 tree last_vdef
= NULL_TREE
;
2933 last_vdef
= gimple_phi_result (vphi
);
2934 for (gimple_stmt_iterator gsi
= gsi_start_bb (loop
->header
);
2935 ! gsi_end_p (gsi
); gsi_next (&gsi
))
2936 if (gimple_vdef (gsi_stmt (gsi
)))
2937 last_vdef
= gimple_vdef (gsi_stmt (gsi
));
2939 for (i
= 1; i
< orig_loop_num_nodes
; i
++)
2941 gimple_stmt_iterator gsi
;
2942 gimple_stmt_iterator last
;
2946 if (bb
== exit_bb
|| bb
== loop
->latch
)
2949 /* We release virtual PHIs late because we have to propagate them
2950 out using the current VUSE. The def might be the one used
2952 vphi
= get_virtual_phi (bb
);
2955 /* When there's just loads inside the loop a stray virtual
2956 PHI merging the uses can appear, update last_vdef from
2959 last_vdef
= gimple_phi_arg_def (vphi
, 0);
2960 imm_use_iterator iter
;
2961 use_operand_p use_p
;
2963 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, gimple_phi_result (vphi
))
2965 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
2966 SET_USE (use_p
, last_vdef
);
2968 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (vphi
)))
2969 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (last_vdef
) = 1;
2970 gsi
= gsi_for_stmt (vphi
);
2971 remove_phi_node (&gsi
, true);
2974 /* Make stmts member of loop->header and clear range info from all stmts
2975 in BB which is now no longer executed conditional on a predicate we
2976 could have derived it from. */
2977 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2979 gimple
*stmt
= gsi_stmt (gsi
);
2980 gimple_set_bb (stmt
, merge_target_bb
);
2981 /* Update virtual operands. */
2984 use_operand_p use_p
= ssa_vuse_operand (stmt
);
2986 && USE_FROM_PTR (use_p
) != last_vdef
)
2987 SET_USE (use_p
, last_vdef
);
2988 if (gimple_vdef (stmt
))
2989 last_vdef
= gimple_vdef (stmt
);
2992 /* If this is the first load we arrive at update last_vdef
2993 so we handle stray PHIs correctly. */
2994 last_vdef
= gimple_vuse (stmt
);
2999 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_DEF
)
3000 reset_flow_sensitive_info (op
);
3004 /* Update stmt list. */
3005 last
= gsi_last_bb (merge_target_bb
);
3006 gsi_insert_seq_after_without_update (&last
, bb_seq (bb
), GSI_NEW_STMT
);
3007 set_bb_seq (bb
, NULL
);
3010 /* Fixup virtual operands in the exit block. */
3012 && exit_bb
!= loop
->header
)
3014 /* We release virtual PHIs late because we have to propagate them
3015 out using the current VUSE. The def might be the one used
3017 vphi
= get_virtual_phi (exit_bb
);
3020 /* When there's just loads inside the loop a stray virtual
3021 PHI merging the uses can appear, update last_vdef from
3024 last_vdef
= gimple_phi_arg_def (vphi
, 0);
3025 imm_use_iterator iter
;
3026 use_operand_p use_p
;
3028 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, gimple_phi_result (vphi
))
3030 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
3031 SET_USE (use_p
, last_vdef
);
3033 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (vphi
)))
3034 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (last_vdef
) = 1;
3035 gimple_stmt_iterator gsi
= gsi_for_stmt (vphi
);
3036 remove_phi_node (&gsi
, true);
3040 /* Now remove all the edges in the loop, except for those from the exit
3041 block and delete the blocks we elided. */
3042 for (i
= 1; i
< orig_loop_num_nodes
; i
++)
3046 for (ei
= ei_start (bb
->preds
); (e
= ei_safe_edge (ei
));)
3048 if (e
->src
== exit_bb
)
3054 for (i
= 1; i
< orig_loop_num_nodes
; i
++)
3058 if (bb
== exit_bb
|| bb
== loop
->latch
)
3061 delete_basic_block (bb
);
3064 /* Re-connect the exit block. */
3065 if (exit_bb
!= NULL
)
3067 if (exit_bb
!= loop
->header
)
3069 /* Connect this node to loop header. */
3070 make_single_succ_edge (loop
->header
, exit_bb
, EDGE_FALLTHRU
);
3071 set_immediate_dominator (CDI_DOMINATORS
, exit_bb
, loop
->header
);
3074 /* Redirect non-exit edges to loop->latch. */
3075 FOR_EACH_EDGE (e
, ei
, exit_bb
->succs
)
3077 if (!loop_exit_edge_p (loop
, e
))
3078 redirect_edge_and_branch (e
, loop
->latch
);
3080 set_immediate_dominator (CDI_DOMINATORS
, loop
->latch
, exit_bb
);
3084 /* If the loop does not have an exit, reconnect header and latch. */
3085 make_edge (loop
->header
, loop
->latch
, EDGE_FALLTHRU
);
3086 set_immediate_dominator (CDI_DOMINATORS
, loop
->latch
, loop
->header
);
3089 /* If possible, merge loop header to the block with the exit edge.
3090 This reduces the number of basic blocks to two, to please the
3091 vectorizer that handles only loops with two nodes. */
3093 && exit_bb
!= loop
->header
)
3095 if (can_merge_blocks_p (loop
->header
, exit_bb
))
3096 merge_blocks (loop
->header
, exit_bb
);
3104 /* Version LOOP before if-converting it; the original loop
3105 will be if-converted, the new copy of the loop will not,
3106 and the LOOP_VECTORIZED internal call will be guarding which
3107 loop to execute. The vectorizer pass will fold this
3108 internal call into either true or false.
3110 Note that this function intentionally invalidates profile. Both edges
3111 out of LOOP_VECTORIZED must have 100% probability so the profile remains
3112 consistent after the condition is folded in the vectorizer. */
3115 version_loop_for_if_conversion (class loop
*loop
, vec
<gimple
*> *preds
)
3117 basic_block cond_bb
;
3118 tree cond
= make_ssa_name (boolean_type_node
);
3119 class loop
*new_loop
;
3121 gimple_stmt_iterator gsi
;
3122 unsigned int save_length
= 0;
3124 g
= gimple_build_call_internal (IFN_LOOP_VECTORIZED
, 2,
3125 build_int_cst (integer_type_node
, loop
->num
),
3127 gimple_call_set_lhs (g
, cond
);
3129 void **saved_preds
= NULL
;
3130 if (any_complicated_phi
|| need_to_predicate
)
3132 /* Save BB->aux around loop_version as that uses the same field. */
3133 save_length
= loop
->inner
? loop
->inner
->num_nodes
: loop
->num_nodes
;
3134 saved_preds
= XALLOCAVEC (void *, save_length
);
3135 for (unsigned i
= 0; i
< save_length
; i
++)
3136 saved_preds
[i
] = ifc_bbs
[i
]->aux
;
3139 initialize_original_copy_tables ();
3140 /* At this point we invalidate porfile confistency until IFN_LOOP_VECTORIZED
3141 is re-merged in the vectorizer. */
3142 new_loop
= loop_version (loop
, cond
, &cond_bb
,
3143 profile_probability::always (),
3144 profile_probability::always (),
3145 profile_probability::always (),
3146 profile_probability::always (), true);
3147 free_original_copy_tables ();
3149 if (any_complicated_phi
|| need_to_predicate
)
3150 for (unsigned i
= 0; i
< save_length
; i
++)
3151 ifc_bbs
[i
]->aux
= saved_preds
[i
];
3153 if (new_loop
== NULL
)
3156 new_loop
->dont_vectorize
= true;
3157 new_loop
->force_vectorize
= false;
3158 gsi
= gsi_last_bb (cond_bb
);
3159 gimple_call_set_arg (g
, 1, build_int_cst (integer_type_node
, new_loop
->num
));
3161 preds
->safe_push (g
);
3162 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3163 update_ssa (TODO_update_ssa_no_phi
);
3167 /* Return true when LOOP satisfies the follow conditions that will
3168 allow it to be recognized by the vectorizer for outer-loop
3170 - The loop is not the root node of the loop tree.
3171 - The loop has exactly one inner loop.
3172 - The loop has a single exit.
3173 - The loop header has a single successor, which is the inner
3175 - Each of the inner and outer loop latches have a single
3177 - The loop exit block has a single predecessor, which is the
3178 inner loop's exit block. */
3181 versionable_outer_loop_p (class loop
*loop
)
3183 if (!loop_outer (loop
)
3184 || loop
->dont_vectorize
3186 || loop
->inner
->next
3187 || !single_exit (loop
)
3188 || !single_succ_p (loop
->header
)
3189 || single_succ (loop
->header
) != loop
->inner
->header
3190 || !single_pred_p (loop
->latch
)
3191 || !single_pred_p (loop
->inner
->latch
))
3194 basic_block outer_exit
= single_pred (loop
->latch
);
3195 basic_block inner_exit
= single_pred (loop
->inner
->latch
);
3197 if (!single_pred_p (outer_exit
) || single_pred (outer_exit
) != inner_exit
)
3201 fprintf (dump_file
, "Found vectorizable outer loop for versioning\n");
3206 /* Performs splitting of critical edges. Skip splitting and return false
3207 if LOOP will not be converted because:
3209 - LOOP is not well formed.
3210 - LOOP has PHI with more than MAX_PHI_ARG_NUM arguments.
3212 Last restriction is valid only if AGGRESSIVE_IF_CONV is false. */
3215 ifcvt_split_critical_edges (class loop
*loop
, bool aggressive_if_conv
)
3219 unsigned int num
= loop
->num_nodes
;
3223 auto_vec
<edge
> critical_edges
;
3225 /* Loop is not well formed. */
3229 body
= get_loop_body (loop
);
3230 for (i
= 0; i
< num
; i
++)
3233 if (!aggressive_if_conv
3235 && EDGE_COUNT (bb
->preds
) > MAX_PHI_ARG_NUM
)
3237 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3239 "BB %d has complicated PHI with more than %u args.\n",
3240 bb
->index
, MAX_PHI_ARG_NUM
);
3245 if (bb
== loop
->latch
|| bb_with_exit_edge_p (loop
, bb
))
3248 /* Skip basic blocks not ending with conditional branch. */
3249 if (!safe_is_a
<gcond
*> (*gsi_last_bb (bb
)))
3252 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
3253 if (EDGE_CRITICAL_P (e
) && e
->dest
->loop_father
== loop
)
3254 critical_edges
.safe_push (e
);
3258 while (critical_edges
.length () > 0)
3260 e
= critical_edges
.pop ();
3261 /* Don't split if bb can be predicated along non-critical edge. */
3262 if (EDGE_COUNT (e
->dest
->preds
) > 2 || all_preds_critical_p (e
->dest
))
3269 /* Delete redundant statements produced by predication which prevents
3270 loop vectorization. */
3273 ifcvt_local_dce (class loop
*loop
)
3278 gimple_stmt_iterator gsi
;
3279 auto_vec
<gimple
*> worklist
;
3280 enum gimple_code code
;
3281 use_operand_p use_p
;
3282 imm_use_iterator imm_iter
;
3284 /* The loop has a single BB only. */
3285 basic_block bb
= loop
->header
;
3286 tree latch_vdef
= NULL_TREE
;
3288 worklist
.create (64);
3289 /* Consider all phi as live statements. */
3290 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
3292 phi
= gsi_stmt (gsi
);
3293 gimple_set_plf (phi
, GF_PLF_2
, true);
3294 worklist
.safe_push (phi
);
3295 if (virtual_operand_p (gimple_phi_result (phi
)))
3296 latch_vdef
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
3298 /* Consider load/store statements, CALL and COND as live. */
3299 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
3301 stmt
= gsi_stmt (gsi
);
3302 if (is_gimple_debug (stmt
))
3304 gimple_set_plf (stmt
, GF_PLF_2
, true);
3307 if (gimple_store_p (stmt
) || gimple_assign_load_p (stmt
))
3309 gimple_set_plf (stmt
, GF_PLF_2
, true);
3310 worklist
.safe_push (stmt
);
3313 code
= gimple_code (stmt
);
3314 if (code
== GIMPLE_COND
|| code
== GIMPLE_CALL
)
3316 gimple_set_plf (stmt
, GF_PLF_2
, true);
3317 worklist
.safe_push (stmt
);
3320 gimple_set_plf (stmt
, GF_PLF_2
, false);
3322 if (code
== GIMPLE_ASSIGN
)
3324 tree lhs
= gimple_assign_lhs (stmt
);
3325 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
3327 stmt1
= USE_STMT (use_p
);
3328 if (!is_gimple_debug (stmt1
) && gimple_bb (stmt1
) != bb
)
3330 gimple_set_plf (stmt
, GF_PLF_2
, true);
3331 worklist
.safe_push (stmt
);
3337 /* Propagate liveness through arguments of live stmt. */
3338 while (worklist
.length () > 0)
3341 use_operand_p use_p
;
3344 stmt
= worklist
.pop ();
3345 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
3347 use
= USE_FROM_PTR (use_p
);
3348 if (TREE_CODE (use
) != SSA_NAME
)
3350 stmt1
= SSA_NAME_DEF_STMT (use
);
3351 if (gimple_bb (stmt1
) != bb
|| gimple_plf (stmt1
, GF_PLF_2
))
3353 gimple_set_plf (stmt1
, GF_PLF_2
, true);
3354 worklist
.safe_push (stmt1
);
3357 /* Delete dead statements. */
3358 gsi
= gsi_last_bb (bb
);
3359 while (!gsi_end_p (gsi
))
3361 gimple_stmt_iterator gsiprev
= gsi
;
3362 gsi_prev (&gsiprev
);
3363 stmt
= gsi_stmt (gsi
);
3364 if (gimple_store_p (stmt
) && gimple_vdef (stmt
))
3366 tree lhs
= gimple_get_lhs (stmt
);
3368 ao_ref_init (&write
, lhs
);
3370 if (dse_classify_store (&write
, stmt
, false, NULL
, NULL
, latch_vdef
)
3372 delete_dead_or_redundant_assignment (&gsi
, "dead");
3377 if (gimple_plf (stmt
, GF_PLF_2
))
3382 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3384 fprintf (dump_file
, "Delete dead stmt in bb#%d\n", bb
->index
);
3385 print_gimple_stmt (dump_file
, stmt
, 0, TDF_SLIM
);
3387 gsi_remove (&gsi
, true);
3388 release_defs (stmt
);
3393 /* Return true if VALUE is already available on edge PE. */
3396 ifcvt_available_on_edge_p (edge pe
, tree value
)
3398 if (is_gimple_min_invariant (value
))
3401 if (TREE_CODE (value
) == SSA_NAME
)
3403 basic_block def_bb
= gimple_bb (SSA_NAME_DEF_STMT (value
));
3404 if (!def_bb
|| dominated_by_p (CDI_DOMINATORS
, pe
->dest
, def_bb
))
3411 /* Return true if STMT can be hoisted from if-converted loop LOOP to
3415 ifcvt_can_hoist (class loop
*loop
, edge pe
, gimple
*stmt
)
3417 if (auto *call
= dyn_cast
<gcall
*> (stmt
))
3419 if (gimple_call_internal_p (call
)
3420 && internal_fn_mask_index (gimple_call_internal_fn (call
)) >= 0)
3423 else if (auto *assign
= dyn_cast
<gassign
*> (stmt
))
3425 if (gimple_assign_rhs_code (assign
) == COND_EXPR
)
3431 if (gimple_has_side_effects (stmt
)
3432 || gimple_could_trap_p (stmt
)
3433 || stmt_could_throw_p (cfun
, stmt
)
3434 || gimple_vdef (stmt
)
3435 || gimple_vuse (stmt
))
3438 int num_args
= gimple_num_args (stmt
);
3439 if (pe
!= loop_preheader_edge (loop
))
3441 for (int i
= 0; i
< num_args
; ++i
)
3442 if (!ifcvt_available_on_edge_p (pe
, gimple_arg (stmt
, i
)))
3447 for (int i
= 0; i
< num_args
; ++i
)
3448 if (!expr_invariant_in_loop_p (loop
, gimple_arg (stmt
, i
)))
3455 /* Hoist invariant statements from LOOP to edge PE. */
3458 ifcvt_hoist_invariants (class loop
*loop
, edge pe
)
3460 gimple_stmt_iterator hoist_gsi
= {};
3461 unsigned int num_blocks
= loop
->num_nodes
;
3462 basic_block
*body
= get_loop_body (loop
);
3463 for (unsigned int i
= 0; i
< num_blocks
; ++i
)
3464 for (gimple_stmt_iterator gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
);)
3466 gimple
*stmt
= gsi_stmt (gsi
);
3467 if (ifcvt_can_hoist (loop
, pe
, stmt
))
3469 /* Once we've hoisted one statement, insert other statements
3471 gsi_remove (&gsi
, false);
3473 gsi_insert_after (&hoist_gsi
, stmt
, GSI_NEW_STMT
);
3476 gsi_insert_on_edge_immediate (pe
, stmt
);
3477 hoist_gsi
= gsi_for_stmt (stmt
);
3486 /* Returns the DECL_FIELD_BIT_OFFSET of the bitfield accesse in stmt iff its
3487 type mode is not BLKmode. If BITPOS is not NULL it will hold the poly_int64
3488 value of the DECL_FIELD_BIT_OFFSET of the bitfield access and STRUCT_EXPR,
3489 if not NULL, will hold the tree representing the base struct of this
3493 get_bitfield_rep (gassign
*stmt
, bool write
, tree
*bitpos
,
3496 tree comp_ref
= write
? gimple_assign_lhs (stmt
)
3497 : gimple_assign_rhs1 (stmt
);
3499 tree field_decl
= TREE_OPERAND (comp_ref
, 1);
3500 tree ref_offset
= component_ref_field_offset (comp_ref
);
3501 tree rep_decl
= DECL_BIT_FIELD_REPRESENTATIVE (field_decl
);
3503 /* Bail out if the representative is not a suitable type for a scalar
3504 register variable. */
3505 if (!is_gimple_reg_type (TREE_TYPE (rep_decl
)))
3508 /* Bail out if the DECL_SIZE of the field_decl isn't the same as the BF's
3510 unsigned HOST_WIDE_INT bf_prec
3511 = TYPE_PRECISION (TREE_TYPE (gimple_assign_lhs (stmt
)));
3512 if (compare_tree_int (DECL_SIZE (field_decl
), bf_prec
) != 0)
3515 if (TREE_CODE (DECL_FIELD_OFFSET (rep_decl
)) != INTEGER_CST
3516 || TREE_CODE (ref_offset
) != INTEGER_CST
)
3518 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3519 fprintf (dump_file
, "\t Bitfield NOT OK to lower,"
3520 " offset is non-constant.\n");
3525 *struct_expr
= TREE_OPERAND (comp_ref
, 0);
3529 /* To calculate the bitposition of the BITFIELD_REF we have to determine
3530 where our bitfield starts in relation to the container REP_DECL. The
3531 DECL_FIELD_OFFSET of the original bitfield's member FIELD_DECL tells
3532 us how many bytes from the start of the structure there are until the
3533 start of the group of bitfield members the FIELD_DECL belongs to,
3534 whereas DECL_FIELD_BIT_OFFSET will tell us how many bits from that
3535 position our actual bitfield member starts. For the container
3536 REP_DECL adding DECL_FIELD_OFFSET and DECL_FIELD_BIT_OFFSET will tell
3537 us the distance between the start of the structure and the start of
3538 the container, though the first is in bytes and the later other in
3539 bits. With this in mind we calculate the bit position of our new
3540 BITFIELD_REF by subtracting the number of bits between the start of
3541 the structure and the container from the number of bits from the start
3542 of the structure and the actual bitfield member. */
3543 tree bf_pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
3545 build_int_cst (bitsizetype
, BITS_PER_UNIT
));
3546 bf_pos
= fold_build2 (PLUS_EXPR
, bitsizetype
, bf_pos
,
3547 DECL_FIELD_BIT_OFFSET (field_decl
));
3548 tree rep_pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
3549 DECL_FIELD_OFFSET (rep_decl
),
3550 build_int_cst (bitsizetype
, BITS_PER_UNIT
));
3551 rep_pos
= fold_build2 (PLUS_EXPR
, bitsizetype
, rep_pos
,
3552 DECL_FIELD_BIT_OFFSET (rep_decl
));
3554 *bitpos
= fold_build2 (MINUS_EXPR
, bitsizetype
, bf_pos
, rep_pos
);
3561 /* Lowers the bitfield described by DATA.
3568 __ifc_1 = struct.<representative>;
3569 __ifc_2 = BIT_INSERT_EXPR (__ifc_1, _1, bitpos);
3570 struct.<representative> = __ifc_2;
3578 __ifc_1 = struct.<representative>;
3579 _1 = BIT_FIELD_REF (__ifc_1, bitsize, bitpos);
3581 where representative is a legal load that contains the bitfield value,
3582 bitsize is the size of the bitfield and bitpos the offset to the start of
3583 the bitfield within the representative. */
3586 lower_bitfield (gassign
*stmt
, bool write
)
3590 tree rep_decl
= get_bitfield_rep (stmt
, write
, &bitpos
, &struct_expr
);
3591 tree rep_type
= TREE_TYPE (rep_decl
);
3592 tree bf_type
= TREE_TYPE (gimple_assign_lhs (stmt
));
3594 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3595 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3597 fprintf (dump_file
, "Lowering:\n");
3598 print_gimple_stmt (dump_file
, stmt
, 0, TDF_SLIM
);
3599 fprintf (dump_file
, "to:\n");
3602 /* REP_COMP_REF is a COMPONENT_REF for the representative. NEW_VAL is it's
3603 defining SSA_NAME. */
3604 tree rep_comp_ref
= build3 (COMPONENT_REF
, rep_type
, struct_expr
, rep_decl
,
3606 tree new_val
= ifc_temp_var (rep_type
, rep_comp_ref
, &gsi
);
3608 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3609 print_gimple_stmt (dump_file
, SSA_NAME_DEF_STMT (new_val
), 0, TDF_SLIM
);
3613 new_val
= ifc_temp_var (rep_type
,
3614 build3 (BIT_INSERT_EXPR
, rep_type
, new_val
,
3615 unshare_expr (gimple_assign_rhs1 (stmt
)),
3618 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3619 print_gimple_stmt (dump_file
, SSA_NAME_DEF_STMT (new_val
), 0, TDF_SLIM
);
3621 gimple
*new_stmt
= gimple_build_assign (unshare_expr (rep_comp_ref
),
3623 gimple_move_vops (new_stmt
, stmt
);
3624 gsi_insert_before (&gsi
, new_stmt
, GSI_SAME_STMT
);
3626 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3627 print_gimple_stmt (dump_file
, new_stmt
, 0, TDF_SLIM
);
3631 tree bfr
= build3 (BIT_FIELD_REF
, bf_type
, new_val
,
3632 build_int_cst (bitsizetype
, TYPE_PRECISION (bf_type
)),
3634 new_val
= ifc_temp_var (bf_type
, bfr
, &gsi
);
3636 gimple
*new_stmt
= gimple_build_assign (gimple_assign_lhs (stmt
),
3638 gimple_move_vops (new_stmt
, stmt
);
3639 gsi_insert_before (&gsi
, new_stmt
, GSI_SAME_STMT
);
3641 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3642 print_gimple_stmt (dump_file
, new_stmt
, 0, TDF_SLIM
);
3645 gsi_remove (&gsi
, true);
3648 /* Return TRUE if there are bitfields to lower in this LOOP. Fill TO_LOWER
3649 with data structures representing these bitfields. */
3652 bitfields_to_lower_p (class loop
*loop
,
3653 vec
<gassign
*> &reads_to_lower
,
3654 vec
<gassign
*> &writes_to_lower
)
3656 gimple_stmt_iterator gsi
;
3658 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3660 fprintf (dump_file
, "Analyzing loop %d for bitfields:\n", loop
->num
);
3663 for (unsigned i
= 0; i
< loop
->num_nodes
; ++i
)
3665 basic_block bb
= ifc_bbs
[i
];
3666 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
3668 gassign
*stmt
= dyn_cast
<gassign
*> (gsi_stmt (gsi
));
3672 tree op
= gimple_assign_lhs (stmt
);
3673 bool write
= TREE_CODE (op
) == COMPONENT_REF
;
3676 op
= gimple_assign_rhs1 (stmt
);
3678 if (TREE_CODE (op
) != COMPONENT_REF
)
3681 if (DECL_BIT_FIELD_TYPE (TREE_OPERAND (op
, 1)))
3683 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3684 print_gimple_stmt (dump_file
, stmt
, 0, TDF_SLIM
);
3686 if (!INTEGRAL_TYPE_P (TREE_TYPE (op
)))
3688 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3689 fprintf (dump_file
, "\t Bitfield NO OK to lower,"
3690 " field type is not Integral.\n");
3694 if (!get_bitfield_rep (stmt
, write
, NULL
, NULL
))
3696 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3697 fprintf (dump_file
, "\t Bitfield NOT OK to lower,"
3698 " representative is BLKmode.\n");
3702 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3703 fprintf (dump_file
, "\tBitfield OK to lower.\n");
3705 writes_to_lower
.safe_push (stmt
);
3707 reads_to_lower
.safe_push (stmt
);
3711 return !reads_to_lower
.is_empty () || !writes_to_lower
.is_empty ();
3715 /* If-convert LOOP when it is legal. For the moment this pass has no
3716 profitability analysis. Returns non-zero todo flags when something
3720 tree_if_conversion (class loop
*loop
, vec
<gimple
*> *preds
)
3722 unsigned int todo
= 0;
3723 bool aggressive_if_conv
;
3725 auto_vec
<gassign
*, 4> reads_to_lower
;
3726 auto_vec
<gassign
*, 4> writes_to_lower
;
3729 auto_vec
<data_reference_p
, 10> refs
;
3734 need_to_lower_bitfields
= false;
3735 need_to_ifcvt
= false;
3736 need_to_predicate
= false;
3737 need_to_rewrite_undefined
= false;
3738 any_complicated_phi
= false;
3740 /* Apply more aggressive if-conversion when loop or its outer loop were
3741 marked with simd pragma. When that's the case, we try to if-convert
3742 loop containing PHIs with more than MAX_PHI_ARG_NUM arguments. */
3743 aggressive_if_conv
= loop
->force_vectorize
;
3744 if (!aggressive_if_conv
)
3746 class loop
*outer_loop
= loop_outer (loop
);
3747 if (outer_loop
&& outer_loop
->force_vectorize
)
3748 aggressive_if_conv
= true;
3751 /* If there are more than two BBs in the loop then there is at least one if
3753 if (loop
->num_nodes
> 2
3754 && !ifcvt_split_critical_edges (loop
, aggressive_if_conv
))
3757 ifc_bbs
= get_loop_body_in_if_conv_order (loop
);
3760 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3761 fprintf (dump_file
, "Irreducible loop\n");
3765 if (find_data_references_in_loop (loop
, &refs
) == chrec_dont_know
)
3768 if (loop
->num_nodes
> 2)
3770 /* More than one loop exit is too much to handle. */
3771 if (!single_exit (loop
))
3773 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3774 fprintf (dump_file
, "Can not ifcvt due to multiple exits\n");
3778 need_to_ifcvt
= true;
3780 if (!if_convertible_loop_p (loop
, &refs
)
3781 || !dbg_cnt (if_conversion_tree
))
3784 if ((need_to_predicate
|| any_complicated_phi
)
3785 && ((!flag_tree_loop_vectorize
&& !loop
->force_vectorize
)
3786 || loop
->dont_vectorize
))
3791 if ((flag_tree_loop_vectorize
|| loop
->force_vectorize
)
3792 && !loop
->dont_vectorize
)
3793 need_to_lower_bitfields
= bitfields_to_lower_p (loop
, reads_to_lower
,
3796 if (!need_to_ifcvt
&& !need_to_lower_bitfields
)
3799 /* The edge to insert invariant stmts on. */
3800 pe
= loop_preheader_edge (loop
);
3802 /* Since we have no cost model, always version loops unless the user
3803 specified -ftree-loop-if-convert or unless versioning is required.
3804 Either version this loop, or if the pattern is right for outer-loop
3805 vectorization, version the outer loop. In the latter case we will
3806 still if-convert the original inner loop. */
3807 if (need_to_lower_bitfields
3808 || need_to_predicate
3809 || any_complicated_phi
3810 || flag_tree_loop_if_convert
!= 1)
3813 = (versionable_outer_loop_p (loop_outer (loop
))
3814 ? loop_outer (loop
) : loop
);
3815 class loop
*nloop
= version_loop_for_if_conversion (vloop
, preds
);
3820 /* If versionable_outer_loop_p decided to version the
3821 outer loop, version also the inner loop of the non-vectorized
3822 loop copy. So we transform:
3826 if (LOOP_VECTORIZED (1, 3))
3832 loop3 (copy of loop1)
3833 if (LOOP_VECTORIZED (4, 5))
3834 loop4 (copy of loop2)
3836 loop5 (copy of loop4) */
3837 gcc_assert (nloop
->inner
&& nloop
->inner
->next
== NULL
);
3838 rloop
= nloop
->inner
;
3841 /* If we versioned loop then make sure to insert invariant
3842 stmts before the .LOOP_VECTORIZED check since the vectorizer
3843 will re-use that for things like runtime alias versioning
3844 whose condition can end up using those invariants. */
3845 pe
= single_pred_edge (gimple_bb (preds
->last ()));
3848 if (need_to_lower_bitfields
)
3850 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3852 fprintf (dump_file
, "-------------------------\n");
3853 fprintf (dump_file
, "Start lowering bitfields\n");
3855 while (!reads_to_lower
.is_empty ())
3856 lower_bitfield (reads_to_lower
.pop (), false);
3857 while (!writes_to_lower
.is_empty ())
3858 lower_bitfield (writes_to_lower
.pop (), true);
3860 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3862 fprintf (dump_file
, "Done lowering bitfields\n");
3863 fprintf (dump_file
, "-------------------------\n");
3868 /* Before we rewrite edges we'll record their original position in the
3869 edge map such that we can map the edges between the ifcvt and the
3870 non-ifcvt loop during peeling. */
3872 for (edge exit
: get_loop_exit_edges (loop
))
3873 exit
->aux
= (void*)idx
++;
3875 /* Now all statements are if-convertible. Combine all the basic
3876 blocks into one huge basic block doing the if-conversion
3878 combine_blocks (loop
);
3881 /* Perform local CSE, this esp. helps the vectorizer analysis if loads
3882 and stores are involved. CSE only the loop body, not the entry
3883 PHIs, those are to be kept in sync with the non-if-converted copy.
3884 ??? We'll still keep dead stores though. */
3885 exit_bbs
= BITMAP_ALLOC (NULL
);
3886 for (edge exit
: get_loop_exit_edges (loop
))
3887 bitmap_set_bit (exit_bbs
, exit
->dest
->index
);
3888 bitmap_set_bit (exit_bbs
, loop
->latch
->index
);
3890 std::pair
<tree
, tree
> *name_pair
;
3891 unsigned ssa_names_idx
;
3892 FOR_EACH_VEC_ELT (redundant_ssa_names
, ssa_names_idx
, name_pair
)
3893 replace_uses_by (name_pair
->first
, name_pair
->second
);
3894 redundant_ssa_names
.release ();
3896 todo
|= do_rpo_vn (cfun
, loop_preheader_edge (loop
), exit_bbs
);
3898 /* Delete dead predicate computations. */
3899 ifcvt_local_dce (loop
);
3900 BITMAP_FREE (exit_bbs
);
3902 ifcvt_hoist_invariants (loop
, pe
);
3904 todo
|= TODO_cleanup_cfg
;
3907 data_reference_p dr
;
3909 for (i
= 0; refs
.iterate (i
, &dr
); i
++)
3920 for (i
= 0; i
< loop
->num_nodes
; i
++)
3921 free_bb_predicate (ifc_bbs
[i
]);
3929 reads_to_lower
.truncate (0);
3930 writes_to_lower
.truncate (0);
3937 /* Tree if-conversion pass management. */
3941 const pass_data pass_data_if_conversion
=
3943 GIMPLE_PASS
, /* type */
3945 OPTGROUP_NONE
, /* optinfo_flags */
3946 TV_TREE_LOOP_IFCVT
, /* tv_id */
3947 ( PROP_cfg
| PROP_ssa
), /* properties_required */
3948 0, /* properties_provided */
3949 0, /* properties_destroyed */
3950 0, /* todo_flags_start */
3951 0, /* todo_flags_finish */
3954 class pass_if_conversion
: public gimple_opt_pass
3957 pass_if_conversion (gcc::context
*ctxt
)
3958 : gimple_opt_pass (pass_data_if_conversion
, ctxt
)
3961 /* opt_pass methods: */
3962 bool gate (function
*) final override
;
3963 unsigned int execute (function
*) final override
;
3965 }; // class pass_if_conversion
3968 pass_if_conversion::gate (function
*fun
)
3970 return (((flag_tree_loop_vectorize
|| fun
->has_force_vectorize_loops
)
3971 && flag_tree_loop_if_convert
!= 0)
3972 || flag_tree_loop_if_convert
== 1);
3976 pass_if_conversion::execute (function
*fun
)
3980 if (number_of_loops (fun
) <= 1)
3983 auto_vec
<gimple
*> preds
;
3984 for (auto loop
: loops_list (cfun
, 0))
3985 if (flag_tree_loop_if_convert
== 1
3986 || ((flag_tree_loop_vectorize
|| loop
->force_vectorize
)
3987 && !loop
->dont_vectorize
))
3988 todo
|= tree_if_conversion (loop
, &preds
);
3992 free_numbers_of_iterations_estimates (fun
);
3999 FOR_EACH_BB_FN (bb
, fun
)
4000 gcc_assert (!bb
->aux
);
4003 /* Perform IL update now, it might elide some loops. */
4004 if (todo
& TODO_cleanup_cfg
)
4006 cleanup_tree_cfg ();
4007 if (need_ssa_update_p (fun
))
4008 todo
|= TODO_update_ssa
;
4010 if (todo
& TODO_update_ssa_any
)
4011 update_ssa (todo
& TODO_update_ssa_any
);
4013 /* If if-conversion elided the loop fall back to the original one. */
4014 for (unsigned i
= 0; i
< preds
.length (); ++i
)
4016 gimple
*g
= preds
[i
];
4019 unsigned ifcvt_loop
= tree_to_uhwi (gimple_call_arg (g
, 0));
4020 unsigned orig_loop
= tree_to_uhwi (gimple_call_arg (g
, 1));
4021 if (!get_loop (fun
, ifcvt_loop
) || !get_loop (fun
, orig_loop
))
4024 fprintf (dump_file
, "If-converted loop vanished\n");
4025 fold_loop_internal_call (g
, boolean_false_node
);
4035 make_pass_if_conversion (gcc::context
*ctxt
)
4037 return new pass_if_conversion (ctxt
);