testsuite: powerpc/vec_reve_1.c requires VSX.
[official-gcc.git] / gcc / tree-if-conv.c
blob1cc2016076aeb887a3c05cd4b7cb9d6523647f8b
1 /* If-conversion for vectorizer.
2 Copyright (C) 2004-2021 Free Software Foundation, Inc.
3 Contributed by Devang Patel <dpatel@apple.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass implements a tree level if-conversion of loops. Its
22 initial goal is to help the vectorizer to vectorize loops with
23 conditions.
25 A short description of if-conversion:
27 o Decide if a loop is if-convertible or not.
28 o Walk all loop basic blocks in breadth first order (BFS order).
29 o Remove conditional statements (at the end of basic block)
30 and propagate condition into destination basic blocks'
31 predicate list.
32 o Replace modify expression with conditional modify expression
33 using current basic block's condition.
34 o Merge all basic blocks
35 o Replace phi nodes with conditional modify expr
36 o Merge all basic blocks into header
38 Sample transformation:
40 INPUT
41 -----
43 # i_23 = PHI <0(0), i_18(10)>;
44 <L0>:;
45 j_15 = A[i_23];
46 if (j_15 > 41) goto <L1>; else goto <L17>;
48 <L17>:;
49 goto <bb 3> (<L3>);
51 <L1>:;
53 # iftmp.2_4 = PHI <0(8), 42(2)>;
54 <L3>:;
55 A[i_23] = iftmp.2_4;
56 i_18 = i_23 + 1;
57 if (i_18 <= 15) goto <L19>; else goto <L18>;
59 <L19>:;
60 goto <bb 1> (<L0>);
62 <L18>:;
64 OUTPUT
65 ------
67 # i_23 = PHI <0(0), i_18(10)>;
68 <L0>:;
69 j_15 = A[i_23];
71 <L3>:;
72 iftmp.2_4 = j_15 > 41 ? 42 : 0;
73 A[i_23] = iftmp.2_4;
74 i_18 = i_23 + 1;
75 if (i_18 <= 15) goto <L19>; else goto <L18>;
77 <L19>:;
78 goto <bb 1> (<L0>);
80 <L18>:;
83 #include "config.h"
84 #include "system.h"
85 #include "coretypes.h"
86 #include "backend.h"
87 #include "rtl.h"
88 #include "tree.h"
89 #include "gimple.h"
90 #include "cfghooks.h"
91 #include "tree-pass.h"
92 #include "ssa.h"
93 #include "expmed.h"
94 #include "optabs-query.h"
95 #include "gimple-pretty-print.h"
96 #include "alias.h"
97 #include "fold-const.h"
98 #include "stor-layout.h"
99 #include "gimple-fold.h"
100 #include "gimplify.h"
101 #include "gimple-iterator.h"
102 #include "gimplify-me.h"
103 #include "tree-cfg.h"
104 #include "tree-into-ssa.h"
105 #include "tree-ssa.h"
106 #include "cfgloop.h"
107 #include "tree-data-ref.h"
108 #include "tree-scalar-evolution.h"
109 #include "tree-ssa-loop.h"
110 #include "tree-ssa-loop-niter.h"
111 #include "tree-ssa-loop-ivopts.h"
112 #include "tree-ssa-address.h"
113 #include "dbgcnt.h"
114 #include "tree-hash-traits.h"
115 #include "varasm.h"
116 #include "builtins.h"
117 #include "cfganal.h"
118 #include "internal-fn.h"
119 #include "fold-const.h"
120 #include "tree-ssa-sccvn.h"
121 #include "tree-cfgcleanup.h"
122 #include "tree-ssa-dse.h"
123 #include "tree-vectorizer.h"
124 #include "tree-eh.h"
126 /* Only handle PHIs with no more arguments unless we are asked to by
127 simd pragma. */
128 #define MAX_PHI_ARG_NUM \
129 ((unsigned) param_max_tree_if_conversion_phi_args)
131 /* True if we've converted a statement that was only executed when some
132 condition C was true, and if for correctness we need to predicate the
133 statement to ensure that it is a no-op when C is false. See
134 predicate_statements for the kinds of predication we support. */
135 static bool need_to_predicate;
137 /* True if we have to rewrite stmts that may invoke undefined behavior
138 when a condition C was false so it doesn't if it is always executed.
139 See predicate_statements for the kinds of predication we support. */
140 static bool need_to_rewrite_undefined;
142 /* Indicate if there are any complicated PHIs that need to be handled in
143 if-conversion. Complicated PHI has more than two arguments and can't
144 be degenerated to two arguments PHI. See more information in comment
145 before phi_convertible_by_degenerating_args. */
146 static bool any_complicated_phi;
148 /* Hash for struct innermost_loop_behavior. It depends on the user to
149 free the memory. */
151 struct innermost_loop_behavior_hash : nofree_ptr_hash <innermost_loop_behavior>
153 static inline hashval_t hash (const value_type &);
154 static inline bool equal (const value_type &,
155 const compare_type &);
158 inline hashval_t
159 innermost_loop_behavior_hash::hash (const value_type &e)
161 hashval_t hash;
163 hash = iterative_hash_expr (e->base_address, 0);
164 hash = iterative_hash_expr (e->offset, hash);
165 hash = iterative_hash_expr (e->init, hash);
166 return iterative_hash_expr (e->step, hash);
169 inline bool
170 innermost_loop_behavior_hash::equal (const value_type &e1,
171 const compare_type &e2)
173 if ((e1->base_address && !e2->base_address)
174 || (!e1->base_address && e2->base_address)
175 || (!e1->offset && e2->offset)
176 || (e1->offset && !e2->offset)
177 || (!e1->init && e2->init)
178 || (e1->init && !e2->init)
179 || (!e1->step && e2->step)
180 || (e1->step && !e2->step))
181 return false;
183 if (e1->base_address && e2->base_address
184 && !operand_equal_p (e1->base_address, e2->base_address, 0))
185 return false;
186 if (e1->offset && e2->offset
187 && !operand_equal_p (e1->offset, e2->offset, 0))
188 return false;
189 if (e1->init && e2->init
190 && !operand_equal_p (e1->init, e2->init, 0))
191 return false;
192 if (e1->step && e2->step
193 && !operand_equal_p (e1->step, e2->step, 0))
194 return false;
196 return true;
199 /* List of basic blocks in if-conversion-suitable order. */
200 static basic_block *ifc_bbs;
202 /* Hash table to store <DR's innermost loop behavior, DR> pairs. */
203 static hash_map<innermost_loop_behavior_hash,
204 data_reference_p> *innermost_DR_map;
206 /* Hash table to store <base reference, DR> pairs. */
207 static hash_map<tree_operand_hash, data_reference_p> *baseref_DR_map;
209 /* List of redundant SSA names: the first should be replaced by the second. */
210 static vec< std::pair<tree, tree> > redundant_ssa_names;
212 /* Structure used to predicate basic blocks. This is attached to the
213 ->aux field of the BBs in the loop to be if-converted. */
214 struct bb_predicate {
216 /* The condition under which this basic block is executed. */
217 tree predicate;
219 /* PREDICATE is gimplified, and the sequence of statements is
220 recorded here, in order to avoid the duplication of computations
221 that occur in previous conditions. See PR44483. */
222 gimple_seq predicate_gimplified_stmts;
225 /* Returns true when the basic block BB has a predicate. */
227 static inline bool
228 bb_has_predicate (basic_block bb)
230 return bb->aux != NULL;
233 /* Returns the gimplified predicate for basic block BB. */
235 static inline tree
236 bb_predicate (basic_block bb)
238 return ((struct bb_predicate *) bb->aux)->predicate;
241 /* Sets the gimplified predicate COND for basic block BB. */
243 static inline void
244 set_bb_predicate (basic_block bb, tree cond)
246 gcc_assert ((TREE_CODE (cond) == TRUTH_NOT_EXPR
247 && is_gimple_condexpr (TREE_OPERAND (cond, 0)))
248 || is_gimple_condexpr (cond));
249 ((struct bb_predicate *) bb->aux)->predicate = cond;
252 /* Returns the sequence of statements of the gimplification of the
253 predicate for basic block BB. */
255 static inline gimple_seq
256 bb_predicate_gimplified_stmts (basic_block bb)
258 return ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts;
261 /* Sets the sequence of statements STMTS of the gimplification of the
262 predicate for basic block BB. */
264 static inline void
265 set_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
267 ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts = stmts;
270 /* Adds the sequence of statements STMTS to the sequence of statements
271 of the predicate for basic block BB. */
273 static inline void
274 add_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
276 /* We might have updated some stmts in STMTS via force_gimple_operand
277 calling fold_stmt and that producing multiple stmts. Delink immediate
278 uses so update_ssa after loop versioning doesn't get confused for
279 the not yet inserted predicates.
280 ??? This should go away once we reliably avoid updating stmts
281 not in any BB. */
282 for (gimple_stmt_iterator gsi = gsi_start (stmts);
283 !gsi_end_p (gsi); gsi_next (&gsi))
285 gimple *stmt = gsi_stmt (gsi);
286 delink_stmt_imm_use (stmt);
287 gimple_set_modified (stmt, true);
289 gimple_seq_add_seq_without_update
290 (&(((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts), stmts);
293 /* Initializes to TRUE the predicate of basic block BB. */
295 static inline void
296 init_bb_predicate (basic_block bb)
298 bb->aux = XNEW (struct bb_predicate);
299 set_bb_predicate_gimplified_stmts (bb, NULL);
300 set_bb_predicate (bb, boolean_true_node);
303 /* Release the SSA_NAMEs associated with the predicate of basic block BB. */
305 static inline void
306 release_bb_predicate (basic_block bb)
308 gimple_seq stmts = bb_predicate_gimplified_stmts (bb);
309 if (stmts)
311 /* Ensure that these stmts haven't yet been added to a bb. */
312 if (flag_checking)
313 for (gimple_stmt_iterator i = gsi_start (stmts);
314 !gsi_end_p (i); gsi_next (&i))
315 gcc_assert (! gimple_bb (gsi_stmt (i)));
317 /* Discard them. */
318 gimple_seq_discard (stmts);
319 set_bb_predicate_gimplified_stmts (bb, NULL);
323 /* Free the predicate of basic block BB. */
325 static inline void
326 free_bb_predicate (basic_block bb)
328 if (!bb_has_predicate (bb))
329 return;
331 release_bb_predicate (bb);
332 free (bb->aux);
333 bb->aux = NULL;
336 /* Reinitialize predicate of BB with the true predicate. */
338 static inline void
339 reset_bb_predicate (basic_block bb)
341 if (!bb_has_predicate (bb))
342 init_bb_predicate (bb);
343 else
345 release_bb_predicate (bb);
346 set_bb_predicate (bb, boolean_true_node);
350 /* Returns a new SSA_NAME of type TYPE that is assigned the value of
351 the expression EXPR. Inserts the statement created for this
352 computation before GSI and leaves the iterator GSI at the same
353 statement. */
355 static tree
356 ifc_temp_var (tree type, tree expr, gimple_stmt_iterator *gsi)
358 tree new_name = make_temp_ssa_name (type, NULL, "_ifc_");
359 gimple *stmt = gimple_build_assign (new_name, expr);
360 gimple_set_vuse (stmt, gimple_vuse (gsi_stmt (*gsi)));
361 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
362 return new_name;
365 /* Return true when COND is a false predicate. */
367 static inline bool
368 is_false_predicate (tree cond)
370 return (cond != NULL_TREE
371 && (cond == boolean_false_node
372 || integer_zerop (cond)));
375 /* Return true when COND is a true predicate. */
377 static inline bool
378 is_true_predicate (tree cond)
380 return (cond == NULL_TREE
381 || cond == boolean_true_node
382 || integer_onep (cond));
385 /* Returns true when BB has a predicate that is not trivial: true or
386 NULL_TREE. */
388 static inline bool
389 is_predicated (basic_block bb)
391 return !is_true_predicate (bb_predicate (bb));
394 /* Parses the predicate COND and returns its comparison code and
395 operands OP0 and OP1. */
397 static enum tree_code
398 parse_predicate (tree cond, tree *op0, tree *op1)
400 gimple *s;
402 if (TREE_CODE (cond) == SSA_NAME
403 && is_gimple_assign (s = SSA_NAME_DEF_STMT (cond)))
405 if (TREE_CODE_CLASS (gimple_assign_rhs_code (s)) == tcc_comparison)
407 *op0 = gimple_assign_rhs1 (s);
408 *op1 = gimple_assign_rhs2 (s);
409 return gimple_assign_rhs_code (s);
412 else if (gimple_assign_rhs_code (s) == TRUTH_NOT_EXPR)
414 tree op = gimple_assign_rhs1 (s);
415 tree type = TREE_TYPE (op);
416 enum tree_code code = parse_predicate (op, op0, op1);
418 return code == ERROR_MARK ? ERROR_MARK
419 : invert_tree_comparison (code, HONOR_NANS (type));
422 return ERROR_MARK;
425 if (COMPARISON_CLASS_P (cond))
427 *op0 = TREE_OPERAND (cond, 0);
428 *op1 = TREE_OPERAND (cond, 1);
429 return TREE_CODE (cond);
432 return ERROR_MARK;
435 /* Returns the fold of predicate C1 OR C2 at location LOC. */
437 static tree
438 fold_or_predicates (location_t loc, tree c1, tree c2)
440 tree op1a, op1b, op2a, op2b;
441 enum tree_code code1 = parse_predicate (c1, &op1a, &op1b);
442 enum tree_code code2 = parse_predicate (c2, &op2a, &op2b);
444 if (code1 != ERROR_MARK && code2 != ERROR_MARK)
446 tree t = maybe_fold_or_comparisons (boolean_type_node, code1, op1a, op1b,
447 code2, op2a, op2b);
448 if (t)
449 return t;
452 return fold_build2_loc (loc, TRUTH_OR_EXPR, boolean_type_node, c1, c2);
455 /* Returns either a COND_EXPR or the folded expression if the folded
456 expression is a MIN_EXPR, a MAX_EXPR, an ABS_EXPR,
457 a constant or a SSA_NAME. */
459 static tree
460 fold_build_cond_expr (tree type, tree cond, tree rhs, tree lhs)
462 tree rhs1, lhs1, cond_expr;
464 /* If COND is comparison r != 0 and r has boolean type, convert COND
465 to SSA_NAME to accept by vect bool pattern. */
466 if (TREE_CODE (cond) == NE_EXPR)
468 tree op0 = TREE_OPERAND (cond, 0);
469 tree op1 = TREE_OPERAND (cond, 1);
470 if (TREE_CODE (op0) == SSA_NAME
471 && TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE
472 && (integer_zerop (op1)))
473 cond = op0;
475 cond_expr = fold_ternary (COND_EXPR, type, cond, rhs, lhs);
477 if (cond_expr == NULL_TREE)
478 return build3 (COND_EXPR, type, cond, rhs, lhs);
480 STRIP_USELESS_TYPE_CONVERSION (cond_expr);
482 if (is_gimple_val (cond_expr))
483 return cond_expr;
485 if (TREE_CODE (cond_expr) == ABS_EXPR)
487 rhs1 = TREE_OPERAND (cond_expr, 1);
488 STRIP_USELESS_TYPE_CONVERSION (rhs1);
489 if (is_gimple_val (rhs1))
490 return build1 (ABS_EXPR, type, rhs1);
493 if (TREE_CODE (cond_expr) == MIN_EXPR
494 || TREE_CODE (cond_expr) == MAX_EXPR)
496 lhs1 = TREE_OPERAND (cond_expr, 0);
497 STRIP_USELESS_TYPE_CONVERSION (lhs1);
498 rhs1 = TREE_OPERAND (cond_expr, 1);
499 STRIP_USELESS_TYPE_CONVERSION (rhs1);
500 if (is_gimple_val (rhs1) && is_gimple_val (lhs1))
501 return build2 (TREE_CODE (cond_expr), type, lhs1, rhs1);
503 return build3 (COND_EXPR, type, cond, rhs, lhs);
506 /* Add condition NC to the predicate list of basic block BB. LOOP is
507 the loop to be if-converted. Use predicate of cd-equivalent block
508 for join bb if it exists: we call basic blocks bb1 and bb2
509 cd-equivalent if they are executed under the same condition. */
511 static inline void
512 add_to_predicate_list (class loop *loop, basic_block bb, tree nc)
514 tree bc, *tp;
515 basic_block dom_bb;
517 if (is_true_predicate (nc))
518 return;
520 /* If dominance tells us this basic block is always executed,
521 don't record any predicates for it. */
522 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
523 return;
525 dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb);
526 /* We use notion of cd equivalence to get simpler predicate for
527 join block, e.g. if join block has 2 predecessors with predicates
528 p1 & p2 and p1 & !p2, we'd like to get p1 for it instead of
529 p1 & p2 | p1 & !p2. */
530 if (dom_bb != loop->header
531 && get_immediate_dominator (CDI_POST_DOMINATORS, dom_bb) == bb)
533 gcc_assert (flow_bb_inside_loop_p (loop, dom_bb));
534 bc = bb_predicate (dom_bb);
535 if (!is_true_predicate (bc))
536 set_bb_predicate (bb, bc);
537 else
538 gcc_assert (is_true_predicate (bb_predicate (bb)));
539 if (dump_file && (dump_flags & TDF_DETAILS))
540 fprintf (dump_file, "Use predicate of bb#%d for bb#%d\n",
541 dom_bb->index, bb->index);
542 return;
545 if (!is_predicated (bb))
546 bc = nc;
547 else
549 bc = bb_predicate (bb);
550 bc = fold_or_predicates (EXPR_LOCATION (bc), nc, bc);
551 if (is_true_predicate (bc))
553 reset_bb_predicate (bb);
554 return;
558 /* Allow a TRUTH_NOT_EXPR around the main predicate. */
559 if (TREE_CODE (bc) == TRUTH_NOT_EXPR)
560 tp = &TREE_OPERAND (bc, 0);
561 else
562 tp = &bc;
563 if (!is_gimple_condexpr (*tp))
565 gimple_seq stmts;
566 *tp = force_gimple_operand_1 (*tp, &stmts, is_gimple_condexpr, NULL_TREE);
567 add_bb_predicate_gimplified_stmts (bb, stmts);
569 set_bb_predicate (bb, bc);
572 /* Add the condition COND to the previous condition PREV_COND, and add
573 this to the predicate list of the destination of edge E. LOOP is
574 the loop to be if-converted. */
576 static void
577 add_to_dst_predicate_list (class loop *loop, edge e,
578 tree prev_cond, tree cond)
580 if (!flow_bb_inside_loop_p (loop, e->dest))
581 return;
583 if (!is_true_predicate (prev_cond))
584 cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
585 prev_cond, cond);
587 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, e->dest))
588 add_to_predicate_list (loop, e->dest, cond);
591 /* Return true if one of the successor edges of BB exits LOOP. */
593 static bool
594 bb_with_exit_edge_p (class loop *loop, basic_block bb)
596 edge e;
597 edge_iterator ei;
599 FOR_EACH_EDGE (e, ei, bb->succs)
600 if (loop_exit_edge_p (loop, e))
601 return true;
603 return false;
606 /* Given PHI which has more than two arguments, this function checks if
607 it's if-convertible by degenerating its arguments. Specifically, if
608 below two conditions are satisfied:
610 1) Number of PHI arguments with different values equals to 2 and one
611 argument has the only occurrence.
612 2) The edge corresponding to the unique argument isn't critical edge.
614 Such PHI can be handled as PHIs have only two arguments. For example,
615 below PHI:
617 res = PHI <A_1(e1), A_1(e2), A_2(e3)>;
619 can be transformed into:
621 res = (predicate of e3) ? A_2 : A_1;
623 Return TRUE if it is the case, FALSE otherwise. */
625 static bool
626 phi_convertible_by_degenerating_args (gphi *phi)
628 edge e;
629 tree arg, t1 = NULL, t2 = NULL;
630 unsigned int i, i1 = 0, i2 = 0, n1 = 0, n2 = 0;
631 unsigned int num_args = gimple_phi_num_args (phi);
633 gcc_assert (num_args > 2);
635 for (i = 0; i < num_args; i++)
637 arg = gimple_phi_arg_def (phi, i);
638 if (t1 == NULL || operand_equal_p (t1, arg, 0))
640 n1++;
641 i1 = i;
642 t1 = arg;
644 else if (t2 == NULL || operand_equal_p (t2, arg, 0))
646 n2++;
647 i2 = i;
648 t2 = arg;
650 else
651 return false;
654 if (n1 != 1 && n2 != 1)
655 return false;
657 /* Check if the edge corresponding to the unique arg is critical. */
658 e = gimple_phi_arg_edge (phi, (n1 == 1) ? i1 : i2);
659 if (EDGE_COUNT (e->src->succs) > 1)
660 return false;
662 return true;
665 /* Return true when PHI is if-convertible. PHI is part of loop LOOP
666 and it belongs to basic block BB. Note at this point, it is sure
667 that PHI is if-convertible. This function updates global variable
668 ANY_COMPLICATED_PHI if PHI is complicated. */
670 static bool
671 if_convertible_phi_p (class loop *loop, basic_block bb, gphi *phi)
673 if (dump_file && (dump_flags & TDF_DETAILS))
675 fprintf (dump_file, "-------------------------\n");
676 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
679 if (bb != loop->header
680 && gimple_phi_num_args (phi) > 2
681 && !phi_convertible_by_degenerating_args (phi))
682 any_complicated_phi = true;
684 return true;
687 /* Records the status of a data reference. This struct is attached to
688 each DR->aux field. */
690 struct ifc_dr {
691 bool rw_unconditionally;
692 bool w_unconditionally;
693 bool written_at_least_once;
695 tree rw_predicate;
696 tree w_predicate;
697 tree base_w_predicate;
700 #define IFC_DR(DR) ((struct ifc_dr *) (DR)->aux)
701 #define DR_BASE_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->written_at_least_once)
702 #define DR_RW_UNCONDITIONALLY(DR) (IFC_DR (DR)->rw_unconditionally)
703 #define DR_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->w_unconditionally)
705 /* Iterates over DR's and stores refs, DR and base refs, DR pairs in
706 HASH tables. While storing them in HASH table, it checks if the
707 reference is unconditionally read or written and stores that as a flag
708 information. For base reference it checks if it is written atlest once
709 unconditionally and stores it as flag information along with DR.
710 In other words for every data reference A in STMT there exist other
711 accesses to a data reference with the same base with predicates that
712 add up (OR-up) to the true predicate: this ensures that the data
713 reference A is touched (read or written) on every iteration of the
714 if-converted loop. */
715 static void
716 hash_memrefs_baserefs_and_store_DRs_read_written_info (data_reference_p a)
719 data_reference_p *master_dr, *base_master_dr;
720 tree base_ref = DR_BASE_OBJECT (a);
721 innermost_loop_behavior *innermost = &DR_INNERMOST (a);
722 tree ca = bb_predicate (gimple_bb (DR_STMT (a)));
723 bool exist1, exist2;
725 master_dr = &innermost_DR_map->get_or_insert (innermost, &exist1);
726 if (!exist1)
727 *master_dr = a;
729 if (DR_IS_WRITE (a))
731 IFC_DR (*master_dr)->w_predicate
732 = fold_or_predicates (UNKNOWN_LOCATION, ca,
733 IFC_DR (*master_dr)->w_predicate);
734 if (is_true_predicate (IFC_DR (*master_dr)->w_predicate))
735 DR_W_UNCONDITIONALLY (*master_dr) = true;
737 IFC_DR (*master_dr)->rw_predicate
738 = fold_or_predicates (UNKNOWN_LOCATION, ca,
739 IFC_DR (*master_dr)->rw_predicate);
740 if (is_true_predicate (IFC_DR (*master_dr)->rw_predicate))
741 DR_RW_UNCONDITIONALLY (*master_dr) = true;
743 if (DR_IS_WRITE (a))
745 base_master_dr = &baseref_DR_map->get_or_insert (base_ref, &exist2);
746 if (!exist2)
747 *base_master_dr = a;
748 IFC_DR (*base_master_dr)->base_w_predicate
749 = fold_or_predicates (UNKNOWN_LOCATION, ca,
750 IFC_DR (*base_master_dr)->base_w_predicate);
751 if (is_true_predicate (IFC_DR (*base_master_dr)->base_w_predicate))
752 DR_BASE_W_UNCONDITIONALLY (*base_master_dr) = true;
756 /* Return TRUE if can prove the index IDX of an array reference REF is
757 within array bound. Return false otherwise. */
759 static bool
760 idx_within_array_bound (tree ref, tree *idx, void *dta)
762 wi::overflow_type overflow;
763 widest_int niter, valid_niter, delta, wi_step;
764 tree ev, init, step;
765 tree low, high;
766 class loop *loop = (class loop*) dta;
768 /* Only support within-bound access for array references. */
769 if (TREE_CODE (ref) != ARRAY_REF)
770 return false;
772 /* For arrays at the end of the structure, we are not guaranteed that they
773 do not really extend over their declared size. However, for arrays of
774 size greater than one, this is unlikely to be intended. */
775 if (array_at_struct_end_p (ref))
776 return false;
778 ev = analyze_scalar_evolution (loop, *idx);
779 ev = instantiate_parameters (loop, ev);
780 init = initial_condition (ev);
781 step = evolution_part_in_loop_num (ev, loop->num);
783 if (!init || TREE_CODE (init) != INTEGER_CST
784 || (step && TREE_CODE (step) != INTEGER_CST))
785 return false;
787 low = array_ref_low_bound (ref);
788 high = array_ref_up_bound (ref);
790 /* The case of nonconstant bounds could be handled, but it would be
791 complicated. */
792 if (TREE_CODE (low) != INTEGER_CST
793 || !high || TREE_CODE (high) != INTEGER_CST)
794 return false;
796 /* Check if the intial idx is within bound. */
797 if (wi::to_widest (init) < wi::to_widest (low)
798 || wi::to_widest (init) > wi::to_widest (high))
799 return false;
801 /* The idx is always within bound. */
802 if (!step || integer_zerop (step))
803 return true;
805 if (!max_loop_iterations (loop, &niter))
806 return false;
808 if (wi::to_widest (step) < 0)
810 delta = wi::to_widest (init) - wi::to_widest (low);
811 wi_step = -wi::to_widest (step);
813 else
815 delta = wi::to_widest (high) - wi::to_widest (init);
816 wi_step = wi::to_widest (step);
819 valid_niter = wi::div_floor (delta, wi_step, SIGNED, &overflow);
820 /* The iteration space of idx is within array bound. */
821 if (!overflow && niter <= valid_niter)
822 return true;
824 return false;
827 /* Return TRUE if ref is a within bound array reference. */
829 static bool
830 ref_within_array_bound (gimple *stmt, tree ref)
832 class loop *loop = loop_containing_stmt (stmt);
834 gcc_assert (loop != NULL);
835 return for_each_index (&ref, idx_within_array_bound, loop);
839 /* Given a memory reference expression T, return TRUE if base object
840 it refers to is writable. The base object of a memory reference
841 is the main object being referenced, which is returned by function
842 get_base_address. */
844 static bool
845 base_object_writable (tree ref)
847 tree base_tree = get_base_address (ref);
849 return (base_tree
850 && DECL_P (base_tree)
851 && decl_binds_to_current_def_p (base_tree)
852 && !TREE_READONLY (base_tree));
855 /* Return true when the memory references of STMT won't trap in the
856 if-converted code. There are two things that we have to check for:
858 - writes to memory occur to writable memory: if-conversion of
859 memory writes transforms the conditional memory writes into
860 unconditional writes, i.e. "if (cond) A[i] = foo" is transformed
861 into "A[i] = cond ? foo : A[i]", and as the write to memory may not
862 be executed at all in the original code, it may be a readonly
863 memory. To check that A is not const-qualified, we check that
864 there exists at least an unconditional write to A in the current
865 function.
867 - reads or writes to memory are valid memory accesses for every
868 iteration. To check that the memory accesses are correctly formed
869 and that we are allowed to read and write in these locations, we
870 check that the memory accesses to be if-converted occur at every
871 iteration unconditionally.
873 Returns true for the memory reference in STMT, same memory reference
874 is read or written unconditionally atleast once and the base memory
875 reference is written unconditionally once. This is to check reference
876 will not write fault. Also retuns true if the memory reference is
877 unconditionally read once then we are conditionally writing to memory
878 which is defined as read and write and is bound to the definition
879 we are seeing. */
880 static bool
881 ifcvt_memrefs_wont_trap (gimple *stmt, vec<data_reference_p> drs)
883 /* If DR didn't see a reference here we can't use it to tell
884 whether the ref traps or not. */
885 if (gimple_uid (stmt) == 0)
886 return false;
888 data_reference_p *master_dr, *base_master_dr;
889 data_reference_p a = drs[gimple_uid (stmt) - 1];
891 tree base = DR_BASE_OBJECT (a);
892 innermost_loop_behavior *innermost = &DR_INNERMOST (a);
894 gcc_assert (DR_STMT (a) == stmt);
895 gcc_assert (DR_BASE_ADDRESS (a) || DR_OFFSET (a)
896 || DR_INIT (a) || DR_STEP (a));
898 master_dr = innermost_DR_map->get (innermost);
899 gcc_assert (master_dr != NULL);
901 base_master_dr = baseref_DR_map->get (base);
903 /* If a is unconditionally written to it doesn't trap. */
904 if (DR_W_UNCONDITIONALLY (*master_dr))
905 return true;
907 /* If a is unconditionally accessed then ...
909 Even a is conditional access, we can treat it as an unconditional
910 one if it's an array reference and all its index are within array
911 bound. */
912 if (DR_RW_UNCONDITIONALLY (*master_dr)
913 || ref_within_array_bound (stmt, DR_REF (a)))
915 /* an unconditional read won't trap. */
916 if (DR_IS_READ (a))
917 return true;
919 /* an unconditionaly write won't trap if the base is written
920 to unconditionally. */
921 if (base_master_dr
922 && DR_BASE_W_UNCONDITIONALLY (*base_master_dr))
923 return flag_store_data_races;
924 /* or the base is known to be not readonly. */
925 else if (base_object_writable (DR_REF (a)))
926 return flag_store_data_races;
929 return false;
932 /* Return true if STMT could be converted into a masked load or store
933 (conditional load or store based on a mask computed from bb predicate). */
935 static bool
936 ifcvt_can_use_mask_load_store (gimple *stmt)
938 /* Check whether this is a load or store. */
939 tree lhs = gimple_assign_lhs (stmt);
940 bool is_load;
941 tree ref;
942 if (gimple_store_p (stmt))
944 if (!is_gimple_val (gimple_assign_rhs1 (stmt)))
945 return false;
946 is_load = false;
947 ref = lhs;
949 else if (gimple_assign_load_p (stmt))
951 is_load = true;
952 ref = gimple_assign_rhs1 (stmt);
954 else
955 return false;
957 if (may_be_nonaddressable_p (ref))
958 return false;
960 /* Mask should be integer mode of the same size as the load/store
961 mode. */
962 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
963 if (!int_mode_for_mode (mode).exists () || VECTOR_MODE_P (mode))
964 return false;
966 if (can_vec_mask_load_store_p (mode, VOIDmode, is_load))
967 return true;
969 return false;
972 /* Return true if STMT could be converted from an operation that is
973 unconditional to one that is conditional on a bb predicate mask. */
975 static bool
976 ifcvt_can_predicate (gimple *stmt)
978 basic_block bb = gimple_bb (stmt);
980 if (!(flag_tree_loop_vectorize || bb->loop_father->force_vectorize)
981 || bb->loop_father->dont_vectorize
982 || gimple_has_volatile_ops (stmt))
983 return false;
985 if (gimple_assign_single_p (stmt))
986 return ifcvt_can_use_mask_load_store (stmt);
988 tree_code code = gimple_assign_rhs_code (stmt);
989 tree lhs_type = TREE_TYPE (gimple_assign_lhs (stmt));
990 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
991 if (!types_compatible_p (lhs_type, rhs_type))
992 return false;
993 internal_fn cond_fn = get_conditional_internal_fn (code);
994 return (cond_fn != IFN_LAST
995 && vectorized_internal_fn_supported_p (cond_fn, lhs_type));
998 /* Return true when STMT is if-convertible.
1000 GIMPLE_ASSIGN statement is not if-convertible if,
1001 - it is not movable,
1002 - it could trap,
1003 - LHS is not var decl. */
1005 static bool
1006 if_convertible_gimple_assign_stmt_p (gimple *stmt,
1007 vec<data_reference_p> refs)
1009 tree lhs = gimple_assign_lhs (stmt);
1011 if (dump_file && (dump_flags & TDF_DETAILS))
1013 fprintf (dump_file, "-------------------------\n");
1014 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1017 if (!is_gimple_reg_type (TREE_TYPE (lhs)))
1018 return false;
1020 /* Some of these constrains might be too conservative. */
1021 if (stmt_ends_bb_p (stmt)
1022 || gimple_has_volatile_ops (stmt)
1023 || (TREE_CODE (lhs) == SSA_NAME
1024 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
1025 || gimple_has_side_effects (stmt))
1027 if (dump_file && (dump_flags & TDF_DETAILS))
1028 fprintf (dump_file, "stmt not suitable for ifcvt\n");
1029 return false;
1032 /* tree-into-ssa.c uses GF_PLF_1, so avoid it, because
1033 in between if_convertible_loop_p and combine_blocks
1034 we can perform loop versioning. */
1035 gimple_set_plf (stmt, GF_PLF_2, false);
1037 if ((! gimple_vuse (stmt)
1038 || gimple_could_trap_p_1 (stmt, false, false)
1039 || ! ifcvt_memrefs_wont_trap (stmt, refs))
1040 && gimple_could_trap_p (stmt))
1042 if (ifcvt_can_predicate (stmt))
1044 gimple_set_plf (stmt, GF_PLF_2, true);
1045 need_to_predicate = true;
1046 return true;
1048 if (dump_file && (dump_flags & TDF_DETAILS))
1049 fprintf (dump_file, "tree could trap...\n");
1050 return false;
1052 else if ((INTEGRAL_TYPE_P (TREE_TYPE (lhs))
1053 || POINTER_TYPE_P (TREE_TYPE (lhs)))
1054 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs))
1055 && arith_code_with_undefined_signed_overflow
1056 (gimple_assign_rhs_code (stmt)))
1057 /* We have to rewrite stmts with undefined overflow. */
1058 need_to_rewrite_undefined = true;
1060 /* When if-converting stores force versioning, likewise if we
1061 ended up generating store data races. */
1062 if (gimple_vdef (stmt))
1063 need_to_predicate = true;
1065 return true;
1068 /* Return true when STMT is if-convertible.
1070 A statement is if-convertible if:
1071 - it is an if-convertible GIMPLE_ASSIGN,
1072 - it is a GIMPLE_LABEL or a GIMPLE_COND,
1073 - it is builtins call. */
1075 static bool
1076 if_convertible_stmt_p (gimple *stmt, vec<data_reference_p> refs)
1078 switch (gimple_code (stmt))
1080 case GIMPLE_LABEL:
1081 case GIMPLE_DEBUG:
1082 case GIMPLE_COND:
1083 return true;
1085 case GIMPLE_ASSIGN:
1086 return if_convertible_gimple_assign_stmt_p (stmt, refs);
1088 case GIMPLE_CALL:
1090 tree fndecl = gimple_call_fndecl (stmt);
1091 if (fndecl)
1093 int flags = gimple_call_flags (stmt);
1094 if ((flags & ECF_CONST)
1095 && !(flags & ECF_LOOPING_CONST_OR_PURE)
1096 /* We can only vectorize some builtins at the moment,
1097 so restrict if-conversion to those. */
1098 && fndecl_built_in_p (fndecl))
1099 return true;
1101 return false;
1104 default:
1105 /* Don't know what to do with 'em so don't do anything. */
1106 if (dump_file && (dump_flags & TDF_DETAILS))
1108 fprintf (dump_file, "don't know what to do\n");
1109 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1111 return false;
1115 /* Assumes that BB has more than 1 predecessors.
1116 Returns false if at least one successor is not on critical edge
1117 and true otherwise. */
1119 static inline bool
1120 all_preds_critical_p (basic_block bb)
1122 edge e;
1123 edge_iterator ei;
1125 FOR_EACH_EDGE (e, ei, bb->preds)
1126 if (EDGE_COUNT (e->src->succs) == 1)
1127 return false;
1128 return true;
1131 /* Return true when BB is if-convertible. This routine does not check
1132 basic block's statements and phis.
1134 A basic block is not if-convertible if:
1135 - it is non-empty and it is after the exit block (in BFS order),
1136 - it is after the exit block but before the latch,
1137 - its edges are not normal.
1139 EXIT_BB is the basic block containing the exit of the LOOP. BB is
1140 inside LOOP. */
1142 static bool
1143 if_convertible_bb_p (class loop *loop, basic_block bb, basic_block exit_bb)
1145 edge e;
1146 edge_iterator ei;
1148 if (dump_file && (dump_flags & TDF_DETAILS))
1149 fprintf (dump_file, "----------[%d]-------------\n", bb->index);
1151 if (EDGE_COUNT (bb->succs) > 2)
1152 return false;
1154 gimple *last = last_stmt (bb);
1155 if (gcall *call = safe_dyn_cast <gcall *> (last))
1156 if (gimple_call_ctrl_altering_p (call))
1157 return false;
1159 if (exit_bb)
1161 if (bb != loop->latch)
1163 if (dump_file && (dump_flags & TDF_DETAILS))
1164 fprintf (dump_file, "basic block after exit bb but before latch\n");
1165 return false;
1167 else if (!empty_block_p (bb))
1169 if (dump_file && (dump_flags & TDF_DETAILS))
1170 fprintf (dump_file, "non empty basic block after exit bb\n");
1171 return false;
1173 else if (bb == loop->latch
1174 && bb != exit_bb
1175 && !dominated_by_p (CDI_DOMINATORS, bb, exit_bb))
1177 if (dump_file && (dump_flags & TDF_DETAILS))
1178 fprintf (dump_file, "latch is not dominated by exit_block\n");
1179 return false;
1183 /* Be less adventurous and handle only normal edges. */
1184 FOR_EACH_EDGE (e, ei, bb->succs)
1185 if (e->flags & (EDGE_EH | EDGE_ABNORMAL | EDGE_IRREDUCIBLE_LOOP))
1187 if (dump_file && (dump_flags & TDF_DETAILS))
1188 fprintf (dump_file, "Difficult to handle edges\n");
1189 return false;
1192 return true;
1195 /* Return true when all predecessor blocks of BB are visited. The
1196 VISITED bitmap keeps track of the visited blocks. */
1198 static bool
1199 pred_blocks_visited_p (basic_block bb, bitmap *visited)
1201 edge e;
1202 edge_iterator ei;
1203 FOR_EACH_EDGE (e, ei, bb->preds)
1204 if (!bitmap_bit_p (*visited, e->src->index))
1205 return false;
1207 return true;
1210 /* Get body of a LOOP in suitable order for if-conversion. It is
1211 caller's responsibility to deallocate basic block list.
1212 If-conversion suitable order is, breadth first sort (BFS) order
1213 with an additional constraint: select a block only if all its
1214 predecessors are already selected. */
1216 static basic_block *
1217 get_loop_body_in_if_conv_order (const class loop *loop)
1219 basic_block *blocks, *blocks_in_bfs_order;
1220 basic_block bb;
1221 bitmap visited;
1222 unsigned int index = 0;
1223 unsigned int visited_count = 0;
1225 gcc_assert (loop->num_nodes);
1226 gcc_assert (loop->latch != EXIT_BLOCK_PTR_FOR_FN (cfun));
1228 blocks = XCNEWVEC (basic_block, loop->num_nodes);
1229 visited = BITMAP_ALLOC (NULL);
1231 blocks_in_bfs_order = get_loop_body_in_bfs_order (loop);
1233 index = 0;
1234 while (index < loop->num_nodes)
1236 bb = blocks_in_bfs_order [index];
1238 if (bb->flags & BB_IRREDUCIBLE_LOOP)
1240 free (blocks_in_bfs_order);
1241 BITMAP_FREE (visited);
1242 free (blocks);
1243 return NULL;
1246 if (!bitmap_bit_p (visited, bb->index))
1248 if (pred_blocks_visited_p (bb, &visited)
1249 || bb == loop->header)
1251 /* This block is now visited. */
1252 bitmap_set_bit (visited, bb->index);
1253 blocks[visited_count++] = bb;
1257 index++;
1259 if (index == loop->num_nodes
1260 && visited_count != loop->num_nodes)
1261 /* Not done yet. */
1262 index = 0;
1264 free (blocks_in_bfs_order);
1265 BITMAP_FREE (visited);
1266 return blocks;
1269 /* Returns true when the analysis of the predicates for all the basic
1270 blocks in LOOP succeeded.
1272 predicate_bbs first allocates the predicates of the basic blocks.
1273 These fields are then initialized with the tree expressions
1274 representing the predicates under which a basic block is executed
1275 in the LOOP. As the loop->header is executed at each iteration, it
1276 has the "true" predicate. Other statements executed under a
1277 condition are predicated with that condition, for example
1279 | if (x)
1280 | S1;
1281 | else
1282 | S2;
1284 S1 will be predicated with "x", and
1285 S2 will be predicated with "!x". */
1287 static void
1288 predicate_bbs (loop_p loop)
1290 unsigned int i;
1292 for (i = 0; i < loop->num_nodes; i++)
1293 init_bb_predicate (ifc_bbs[i]);
1295 for (i = 0; i < loop->num_nodes; i++)
1297 basic_block bb = ifc_bbs[i];
1298 tree cond;
1299 gimple *stmt;
1301 /* The loop latch and loop exit block are always executed and
1302 have no extra conditions to be processed: skip them. */
1303 if (bb == loop->latch
1304 || bb_with_exit_edge_p (loop, bb))
1306 reset_bb_predicate (bb);
1307 continue;
1310 cond = bb_predicate (bb);
1311 stmt = last_stmt (bb);
1312 if (stmt && gimple_code (stmt) == GIMPLE_COND)
1314 tree c2;
1315 edge true_edge, false_edge;
1316 location_t loc = gimple_location (stmt);
1317 tree c = build2_loc (loc, gimple_cond_code (stmt),
1318 boolean_type_node,
1319 gimple_cond_lhs (stmt),
1320 gimple_cond_rhs (stmt));
1322 /* Add new condition into destination's predicate list. */
1323 extract_true_false_edges_from_block (gimple_bb (stmt),
1324 &true_edge, &false_edge);
1326 /* If C is true, then TRUE_EDGE is taken. */
1327 add_to_dst_predicate_list (loop, true_edge, unshare_expr (cond),
1328 unshare_expr (c));
1330 /* If C is false, then FALSE_EDGE is taken. */
1331 c2 = build1_loc (loc, TRUTH_NOT_EXPR, boolean_type_node,
1332 unshare_expr (c));
1333 add_to_dst_predicate_list (loop, false_edge,
1334 unshare_expr (cond), c2);
1336 cond = NULL_TREE;
1339 /* If current bb has only one successor, then consider it as an
1340 unconditional goto. */
1341 if (single_succ_p (bb))
1343 basic_block bb_n = single_succ (bb);
1345 /* The successor bb inherits the predicate of its
1346 predecessor. If there is no predicate in the predecessor
1347 bb, then consider the successor bb as always executed. */
1348 if (cond == NULL_TREE)
1349 cond = boolean_true_node;
1351 add_to_predicate_list (loop, bb_n, cond);
1355 /* The loop header is always executed. */
1356 reset_bb_predicate (loop->header);
1357 gcc_assert (bb_predicate_gimplified_stmts (loop->header) == NULL
1358 && bb_predicate_gimplified_stmts (loop->latch) == NULL);
1361 /* Build region by adding loop pre-header and post-header blocks. */
1363 static vec<basic_block>
1364 build_region (class loop *loop)
1366 vec<basic_block> region = vNULL;
1367 basic_block exit_bb = NULL;
1369 gcc_assert (ifc_bbs);
1370 /* The first element is loop pre-header. */
1371 region.safe_push (loop_preheader_edge (loop)->src);
1373 for (unsigned int i = 0; i < loop->num_nodes; i++)
1375 basic_block bb = ifc_bbs[i];
1376 region.safe_push (bb);
1377 /* Find loop postheader. */
1378 edge e;
1379 edge_iterator ei;
1380 FOR_EACH_EDGE (e, ei, bb->succs)
1381 if (loop_exit_edge_p (loop, e))
1383 exit_bb = e->dest;
1384 break;
1387 /* The last element is loop post-header. */
1388 gcc_assert (exit_bb);
1389 region.safe_push (exit_bb);
1390 return region;
1393 /* Return true when LOOP is if-convertible. This is a helper function
1394 for if_convertible_loop_p. REFS and DDRS are initialized and freed
1395 in if_convertible_loop_p. */
1397 static bool
1398 if_convertible_loop_p_1 (class loop *loop, vec<data_reference_p> *refs)
1400 unsigned int i;
1401 basic_block exit_bb = NULL;
1402 vec<basic_block> region;
1404 if (find_data_references_in_loop (loop, refs) == chrec_dont_know)
1405 return false;
1407 calculate_dominance_info (CDI_DOMINATORS);
1409 /* Allow statements that can be handled during if-conversion. */
1410 ifc_bbs = get_loop_body_in_if_conv_order (loop);
1411 if (!ifc_bbs)
1413 if (dump_file && (dump_flags & TDF_DETAILS))
1414 fprintf (dump_file, "Irreducible loop\n");
1415 return false;
1418 for (i = 0; i < loop->num_nodes; i++)
1420 basic_block bb = ifc_bbs[i];
1422 if (!if_convertible_bb_p (loop, bb, exit_bb))
1423 return false;
1425 if (bb_with_exit_edge_p (loop, bb))
1426 exit_bb = bb;
1429 for (i = 0; i < loop->num_nodes; i++)
1431 basic_block bb = ifc_bbs[i];
1432 gimple_stmt_iterator gsi;
1434 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1435 switch (gimple_code (gsi_stmt (gsi)))
1437 case GIMPLE_LABEL:
1438 case GIMPLE_ASSIGN:
1439 case GIMPLE_CALL:
1440 case GIMPLE_DEBUG:
1441 case GIMPLE_COND:
1442 gimple_set_uid (gsi_stmt (gsi), 0);
1443 break;
1444 default:
1445 return false;
1449 data_reference_p dr;
1451 innermost_DR_map
1452 = new hash_map<innermost_loop_behavior_hash, data_reference_p>;
1453 baseref_DR_map = new hash_map<tree_operand_hash, data_reference_p>;
1455 /* Compute post-dominator tree locally. */
1456 region = build_region (loop);
1457 calculate_dominance_info_for_region (CDI_POST_DOMINATORS, region);
1459 predicate_bbs (loop);
1461 /* Free post-dominator tree since it is not used after predication. */
1462 free_dominance_info_for_region (cfun, CDI_POST_DOMINATORS, region);
1463 region.release ();
1465 for (i = 0; refs->iterate (i, &dr); i++)
1467 tree ref = DR_REF (dr);
1469 dr->aux = XNEW (struct ifc_dr);
1470 DR_BASE_W_UNCONDITIONALLY (dr) = false;
1471 DR_RW_UNCONDITIONALLY (dr) = false;
1472 DR_W_UNCONDITIONALLY (dr) = false;
1473 IFC_DR (dr)->rw_predicate = boolean_false_node;
1474 IFC_DR (dr)->w_predicate = boolean_false_node;
1475 IFC_DR (dr)->base_w_predicate = boolean_false_node;
1476 if (gimple_uid (DR_STMT (dr)) == 0)
1477 gimple_set_uid (DR_STMT (dr), i + 1);
1479 /* If DR doesn't have innermost loop behavior or it's a compound
1480 memory reference, we synthesize its innermost loop behavior
1481 for hashing. */
1482 if (TREE_CODE (ref) == COMPONENT_REF
1483 || TREE_CODE (ref) == IMAGPART_EXPR
1484 || TREE_CODE (ref) == REALPART_EXPR
1485 || !(DR_BASE_ADDRESS (dr) || DR_OFFSET (dr)
1486 || DR_INIT (dr) || DR_STEP (dr)))
1488 while (TREE_CODE (ref) == COMPONENT_REF
1489 || TREE_CODE (ref) == IMAGPART_EXPR
1490 || TREE_CODE (ref) == REALPART_EXPR)
1491 ref = TREE_OPERAND (ref, 0);
1493 memset (&DR_INNERMOST (dr), 0, sizeof (DR_INNERMOST (dr)));
1494 DR_BASE_ADDRESS (dr) = ref;
1496 hash_memrefs_baserefs_and_store_DRs_read_written_info (dr);
1499 for (i = 0; i < loop->num_nodes; i++)
1501 basic_block bb = ifc_bbs[i];
1502 gimple_stmt_iterator itr;
1504 /* Check the if-convertibility of statements in predicated BBs. */
1505 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1506 for (itr = gsi_start_bb (bb); !gsi_end_p (itr); gsi_next (&itr))
1507 if (!if_convertible_stmt_p (gsi_stmt (itr), *refs))
1508 return false;
1511 /* Checking PHIs needs to be done after stmts, as the fact whether there
1512 are any masked loads or stores affects the tests. */
1513 for (i = 0; i < loop->num_nodes; i++)
1515 basic_block bb = ifc_bbs[i];
1516 gphi_iterator itr;
1518 for (itr = gsi_start_phis (bb); !gsi_end_p (itr); gsi_next (&itr))
1519 if (!if_convertible_phi_p (loop, bb, itr.phi ()))
1520 return false;
1523 if (dump_file)
1524 fprintf (dump_file, "Applying if-conversion\n");
1526 return true;
1529 /* Return true when LOOP is if-convertible.
1530 LOOP is if-convertible if:
1531 - it is innermost,
1532 - it has two or more basic blocks,
1533 - it has only one exit,
1534 - loop header is not the exit edge,
1535 - if its basic blocks and phi nodes are if convertible. */
1537 static bool
1538 if_convertible_loop_p (class loop *loop)
1540 edge e;
1541 edge_iterator ei;
1542 bool res = false;
1543 vec<data_reference_p> refs;
1545 /* Handle only innermost loop. */
1546 if (!loop || loop->inner)
1548 if (dump_file && (dump_flags & TDF_DETAILS))
1549 fprintf (dump_file, "not innermost loop\n");
1550 return false;
1553 /* If only one block, no need for if-conversion. */
1554 if (loop->num_nodes <= 2)
1556 if (dump_file && (dump_flags & TDF_DETAILS))
1557 fprintf (dump_file, "less than 2 basic blocks\n");
1558 return false;
1561 /* More than one loop exit is too much to handle. */
1562 if (!single_exit (loop))
1564 if (dump_file && (dump_flags & TDF_DETAILS))
1565 fprintf (dump_file, "multiple exits\n");
1566 return false;
1569 /* If one of the loop header's edge is an exit edge then do not
1570 apply if-conversion. */
1571 FOR_EACH_EDGE (e, ei, loop->header->succs)
1572 if (loop_exit_edge_p (loop, e))
1573 return false;
1575 refs.create (5);
1576 res = if_convertible_loop_p_1 (loop, &refs);
1578 data_reference_p dr;
1579 unsigned int i;
1580 for (i = 0; refs.iterate (i, &dr); i++)
1581 free (dr->aux);
1583 free_data_refs (refs);
1585 delete innermost_DR_map;
1586 innermost_DR_map = NULL;
1588 delete baseref_DR_map;
1589 baseref_DR_map = NULL;
1591 return res;
1594 /* Return reduc_1 if has_nop.
1596 if (...)
1597 tmp1 = (unsigned type) reduc_1;
1598 tmp2 = tmp1 + rhs2;
1599 reduc_3 = (signed type) tmp2. */
1600 static tree
1601 strip_nop_cond_scalar_reduction (bool has_nop, tree op)
1603 if (!has_nop)
1604 return op;
1606 if (TREE_CODE (op) != SSA_NAME)
1607 return NULL_TREE;
1609 gassign *stmt = safe_dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op));
1610 if (!stmt
1611 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt))
1612 || !tree_nop_conversion_p (TREE_TYPE (op), TREE_TYPE
1613 (gimple_assign_rhs1 (stmt))))
1614 return NULL_TREE;
1616 return gimple_assign_rhs1 (stmt);
1619 /* Returns true if def-stmt for phi argument ARG is simple increment/decrement
1620 which is in predicated basic block.
1621 In fact, the following PHI pattern is searching:
1622 loop-header:
1623 reduc_1 = PHI <..., reduc_2>
1625 if (...)
1626 reduc_3 = ...
1627 reduc_2 = PHI <reduc_1, reduc_3>
1629 ARG_0 and ARG_1 are correspondent PHI arguments.
1630 REDUC, OP0 and OP1 contain reduction stmt and its operands.
1631 EXTENDED is true if PHI has > 2 arguments. */
1633 static bool
1634 is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1,
1635 tree *op0, tree *op1, bool extended, bool* has_nop,
1636 gimple **nop_reduc)
1638 tree lhs, r_op1, r_op2, r_nop1, r_nop2;
1639 gimple *stmt;
1640 gimple *header_phi = NULL;
1641 enum tree_code reduction_op;
1642 basic_block bb = gimple_bb (phi);
1643 class loop *loop = bb->loop_father;
1644 edge latch_e = loop_latch_edge (loop);
1645 imm_use_iterator imm_iter;
1646 use_operand_p use_p;
1647 edge e;
1648 edge_iterator ei;
1649 bool result = *has_nop = false;
1650 if (TREE_CODE (arg_0) != SSA_NAME || TREE_CODE (arg_1) != SSA_NAME)
1651 return false;
1653 if (!extended && gimple_code (SSA_NAME_DEF_STMT (arg_0)) == GIMPLE_PHI)
1655 lhs = arg_1;
1656 header_phi = SSA_NAME_DEF_STMT (arg_0);
1657 stmt = SSA_NAME_DEF_STMT (arg_1);
1659 else if (gimple_code (SSA_NAME_DEF_STMT (arg_1)) == GIMPLE_PHI)
1661 lhs = arg_0;
1662 header_phi = SSA_NAME_DEF_STMT (arg_1);
1663 stmt = SSA_NAME_DEF_STMT (arg_0);
1665 else
1666 return false;
1667 if (gimple_bb (header_phi) != loop->header)
1668 return false;
1670 if (PHI_ARG_DEF_FROM_EDGE (header_phi, latch_e) != PHI_RESULT (phi))
1671 return false;
1673 if (gimple_code (stmt) != GIMPLE_ASSIGN
1674 || gimple_has_volatile_ops (stmt))
1675 return false;
1677 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
1678 return false;
1680 if (!is_predicated (gimple_bb (stmt)))
1681 return false;
1683 /* Check that stmt-block is predecessor of phi-block. */
1684 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->succs)
1685 if (e->dest == bb)
1687 result = true;
1688 break;
1690 if (!result)
1691 return false;
1693 if (!has_single_use (lhs))
1694 return false;
1696 reduction_op = gimple_assign_rhs_code (stmt);
1698 /* Catch something like below
1700 loop-header:
1701 reduc_1 = PHI <..., reduc_2>
1703 if (...)
1704 tmp1 = (unsigned type) reduc_1;
1705 tmp2 = tmp1 + rhs2;
1706 reduc_3 = (signed type) tmp2;
1708 reduc_2 = PHI <reduc_1, reduc_3>
1710 and convert to
1712 reduc_2 = PHI <0, reduc_3>
1713 tmp1 = (unsigned type)reduce_1;
1714 ifcvt = cond_expr ? rhs2 : 0
1715 tmp2 = tmp1 +/- ifcvt;
1716 reduce_1 = (signed type)tmp2; */
1718 if (CONVERT_EXPR_CODE_P (reduction_op))
1720 lhs = gimple_assign_rhs1 (stmt);
1721 if (TREE_CODE (lhs) != SSA_NAME
1722 || !has_single_use (lhs))
1723 return false;
1725 *nop_reduc = stmt;
1726 stmt = SSA_NAME_DEF_STMT (lhs);
1727 if (gimple_bb (stmt) != gimple_bb (*nop_reduc)
1728 || !is_gimple_assign (stmt))
1729 return false;
1731 *has_nop = true;
1732 reduction_op = gimple_assign_rhs_code (stmt);
1735 if (reduction_op != PLUS_EXPR
1736 && reduction_op != MINUS_EXPR
1737 && reduction_op != BIT_IOR_EXPR
1738 && reduction_op != BIT_XOR_EXPR
1739 && reduction_op != BIT_AND_EXPR)
1740 return false;
1741 r_op1 = gimple_assign_rhs1 (stmt);
1742 r_op2 = gimple_assign_rhs2 (stmt);
1744 r_nop1 = strip_nop_cond_scalar_reduction (*has_nop, r_op1);
1745 r_nop2 = strip_nop_cond_scalar_reduction (*has_nop, r_op2);
1747 /* Make R_OP1 to hold reduction variable. */
1748 if (r_nop2 == PHI_RESULT (header_phi)
1749 && commutative_tree_code (reduction_op))
1751 std::swap (r_op1, r_op2);
1752 std::swap (r_nop1, r_nop2);
1754 else if (r_nop1 != PHI_RESULT (header_phi))
1755 return false;
1757 if (*has_nop)
1759 /* Check that R_NOP1 is used in nop_stmt or in PHI only. */
1760 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_nop1)
1762 gimple *use_stmt = USE_STMT (use_p);
1763 if (is_gimple_debug (use_stmt))
1764 continue;
1765 if (use_stmt == SSA_NAME_DEF_STMT (r_op1))
1766 continue;
1767 if (use_stmt != phi)
1768 return false;
1772 /* Check that R_OP1 is used in reduction stmt or in PHI only. */
1773 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_op1)
1775 gimple *use_stmt = USE_STMT (use_p);
1776 if (is_gimple_debug (use_stmt))
1777 continue;
1778 if (use_stmt == stmt)
1779 continue;
1780 if (gimple_code (use_stmt) != GIMPLE_PHI)
1781 return false;
1784 *op0 = r_op1; *op1 = r_op2;
1785 *reduc = stmt;
1786 return true;
1789 /* Converts conditional scalar reduction into unconditional form, e.g.
1790 bb_4
1791 if (_5 != 0) goto bb_5 else goto bb_6
1792 end_bb_4
1793 bb_5
1794 res_6 = res_13 + 1;
1795 end_bb_5
1796 bb_6
1797 # res_2 = PHI <res_13(4), res_6(5)>
1798 end_bb_6
1800 will be converted into sequence
1801 _ifc__1 = _5 != 0 ? 1 : 0;
1802 res_2 = res_13 + _ifc__1;
1803 Argument SWAP tells that arguments of conditional expression should be
1804 swapped.
1805 Returns rhs of resulting PHI assignment. */
1807 static tree
1808 convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi,
1809 tree cond, tree op0, tree op1, bool swap,
1810 bool has_nop, gimple* nop_reduc)
1812 gimple_stmt_iterator stmt_it;
1813 gimple *new_assign;
1814 tree rhs;
1815 tree rhs1 = gimple_assign_rhs1 (reduc);
1816 tree tmp = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, "_ifc_");
1817 tree c;
1818 enum tree_code reduction_op = gimple_assign_rhs_code (reduc);
1819 tree op_nochange = neutral_op_for_reduction (TREE_TYPE (rhs1), reduction_op, NULL);
1820 gimple_seq stmts = NULL;
1822 if (dump_file && (dump_flags & TDF_DETAILS))
1824 fprintf (dump_file, "Found cond scalar reduction.\n");
1825 print_gimple_stmt (dump_file, reduc, 0, TDF_SLIM);
1828 /* Build cond expression using COND and constant operand
1829 of reduction rhs. */
1830 c = fold_build_cond_expr (TREE_TYPE (rhs1),
1831 unshare_expr (cond),
1832 swap ? op_nochange : op1,
1833 swap ? op1 : op_nochange);
1835 /* Create assignment stmt and insert it at GSI. */
1836 new_assign = gimple_build_assign (tmp, c);
1837 gsi_insert_before (gsi, new_assign, GSI_SAME_STMT);
1838 /* Build rhs for unconditional increment/decrement/logic_operation. */
1839 rhs = gimple_build (&stmts, reduction_op,
1840 TREE_TYPE (rhs1), op0, tmp);
1842 if (has_nop)
1844 rhs = gimple_convert (&stmts,
1845 TREE_TYPE (gimple_assign_lhs (nop_reduc)), rhs);
1846 stmt_it = gsi_for_stmt (nop_reduc);
1847 gsi_remove (&stmt_it, true);
1848 release_defs (nop_reduc);
1850 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
1852 /* Delete original reduction stmt. */
1853 stmt_it = gsi_for_stmt (reduc);
1854 gsi_remove (&stmt_it, true);
1855 release_defs (reduc);
1856 return rhs;
1859 /* Produce condition for all occurrences of ARG in PHI node. */
1861 static tree
1862 gen_phi_arg_condition (gphi *phi, vec<int> *occur,
1863 gimple_stmt_iterator *gsi)
1865 int len;
1866 int i;
1867 tree cond = NULL_TREE;
1868 tree c;
1869 edge e;
1871 len = occur->length ();
1872 gcc_assert (len > 0);
1873 for (i = 0; i < len; i++)
1875 e = gimple_phi_arg_edge (phi, (*occur)[i]);
1876 c = bb_predicate (e->src);
1877 if (is_true_predicate (c))
1879 cond = c;
1880 break;
1882 c = force_gimple_operand_gsi_1 (gsi, unshare_expr (c),
1883 is_gimple_condexpr, NULL_TREE,
1884 true, GSI_SAME_STMT);
1885 if (cond != NULL_TREE)
1887 /* Must build OR expression. */
1888 cond = fold_or_predicates (EXPR_LOCATION (c), c, cond);
1889 cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond),
1890 is_gimple_condexpr, NULL_TREE,
1891 true, GSI_SAME_STMT);
1893 else
1894 cond = c;
1896 gcc_assert (cond != NULL_TREE);
1897 return cond;
1900 /* Local valueization callback that follows all-use SSA edges. */
1902 static tree
1903 ifcvt_follow_ssa_use_edges (tree val)
1905 return val;
1908 /* Replace a scalar PHI node with a COND_EXPR using COND as condition.
1909 This routine can handle PHI nodes with more than two arguments.
1911 For example,
1912 S1: A = PHI <x1(1), x2(5)>
1913 is converted into,
1914 S2: A = cond ? x1 : x2;
1916 The generated code is inserted at GSI that points to the top of
1917 basic block's statement list.
1918 If PHI node has more than two arguments a chain of conditional
1919 expression is produced. */
1922 static void
1923 predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
1925 gimple *new_stmt = NULL, *reduc, *nop_reduc;
1926 tree rhs, res, arg0, arg1, op0, op1, scev;
1927 tree cond;
1928 unsigned int index0;
1929 unsigned int max, args_len;
1930 edge e;
1931 basic_block bb;
1932 unsigned int i;
1933 bool has_nop;
1935 res = gimple_phi_result (phi);
1936 if (virtual_operand_p (res))
1937 return;
1939 if ((rhs = degenerate_phi_result (phi))
1940 || ((scev = analyze_scalar_evolution (gimple_bb (phi)->loop_father,
1941 res))
1942 && !chrec_contains_undetermined (scev)
1943 && scev != res
1944 && (rhs = gimple_phi_arg_def (phi, 0))))
1946 if (dump_file && (dump_flags & TDF_DETAILS))
1948 fprintf (dump_file, "Degenerate phi!\n");
1949 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
1951 new_stmt = gimple_build_assign (res, rhs);
1952 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1953 update_stmt (new_stmt);
1954 return;
1957 bb = gimple_bb (phi);
1958 if (EDGE_COUNT (bb->preds) == 2)
1960 /* Predicate ordinary PHI node with 2 arguments. */
1961 edge first_edge, second_edge;
1962 basic_block true_bb;
1963 first_edge = EDGE_PRED (bb, 0);
1964 second_edge = EDGE_PRED (bb, 1);
1965 cond = bb_predicate (first_edge->src);
1966 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1967 std::swap (first_edge, second_edge);
1968 if (EDGE_COUNT (first_edge->src->succs) > 1)
1970 cond = bb_predicate (second_edge->src);
1971 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1972 cond = TREE_OPERAND (cond, 0);
1973 else
1974 first_edge = second_edge;
1976 else
1977 cond = bb_predicate (first_edge->src);
1978 /* Gimplify the condition to a valid cond-expr conditonal operand. */
1979 cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond),
1980 is_gimple_condexpr, NULL_TREE,
1981 true, GSI_SAME_STMT);
1982 true_bb = first_edge->src;
1983 if (EDGE_PRED (bb, 1)->src == true_bb)
1985 arg0 = gimple_phi_arg_def (phi, 1);
1986 arg1 = gimple_phi_arg_def (phi, 0);
1988 else
1990 arg0 = gimple_phi_arg_def (phi, 0);
1991 arg1 = gimple_phi_arg_def (phi, 1);
1993 if (is_cond_scalar_reduction (phi, &reduc, arg0, arg1,
1994 &op0, &op1, false, &has_nop,
1995 &nop_reduc))
1997 /* Convert reduction stmt into vectorizable form. */
1998 rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
1999 true_bb != gimple_bb (reduc),
2000 has_nop, nop_reduc);
2001 redundant_ssa_names.safe_push (std::make_pair (res, rhs));
2003 else
2004 /* Build new RHS using selected condition and arguments. */
2005 rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
2006 arg0, arg1);
2007 new_stmt = gimple_build_assign (res, rhs);
2008 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2009 gimple_stmt_iterator new_gsi = gsi_for_stmt (new_stmt);
2010 if (fold_stmt (&new_gsi, ifcvt_follow_ssa_use_edges))
2012 new_stmt = gsi_stmt (new_gsi);
2013 update_stmt (new_stmt);
2016 if (dump_file && (dump_flags & TDF_DETAILS))
2018 fprintf (dump_file, "new phi replacement stmt\n");
2019 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
2021 return;
2024 /* Create hashmap for PHI node which contain vector of argument indexes
2025 having the same value. */
2026 bool swap = false;
2027 hash_map<tree_operand_hash, auto_vec<int> > phi_arg_map;
2028 unsigned int num_args = gimple_phi_num_args (phi);
2029 int max_ind = -1;
2030 /* Vector of different PHI argument values. */
2031 auto_vec<tree> args (num_args);
2033 /* Compute phi_arg_map. */
2034 for (i = 0; i < num_args; i++)
2036 tree arg;
2038 arg = gimple_phi_arg_def (phi, i);
2039 if (!phi_arg_map.get (arg))
2040 args.quick_push (arg);
2041 phi_arg_map.get_or_insert (arg).safe_push (i);
2044 /* Determine element with max number of occurrences. */
2045 max_ind = -1;
2046 max = 1;
2047 args_len = args.length ();
2048 for (i = 0; i < args_len; i++)
2050 unsigned int len;
2051 if ((len = phi_arg_map.get (args[i])->length ()) > max)
2053 max_ind = (int) i;
2054 max = len;
2058 /* Put element with max number of occurences to the end of ARGS. */
2059 if (max_ind != -1 && max_ind +1 != (int) args_len)
2060 std::swap (args[args_len - 1], args[max_ind]);
2062 /* Handle one special case when number of arguments with different values
2063 is equal 2 and one argument has the only occurrence. Such PHI can be
2064 handled as if would have only 2 arguments. */
2065 if (args_len == 2 && phi_arg_map.get (args[0])->length () == 1)
2067 vec<int> *indexes;
2068 indexes = phi_arg_map.get (args[0]);
2069 index0 = (*indexes)[0];
2070 arg0 = args[0];
2071 arg1 = args[1];
2072 e = gimple_phi_arg_edge (phi, index0);
2073 cond = bb_predicate (e->src);
2074 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2076 swap = true;
2077 cond = TREE_OPERAND (cond, 0);
2079 /* Gimplify the condition to a valid cond-expr conditonal operand. */
2080 cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond),
2081 is_gimple_condexpr, NULL_TREE,
2082 true, GSI_SAME_STMT);
2083 if (!(is_cond_scalar_reduction (phi, &reduc, arg0 , arg1,
2084 &op0, &op1, true, &has_nop, &nop_reduc)))
2085 rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
2086 swap? arg1 : arg0,
2087 swap? arg0 : arg1);
2088 else
2090 /* Convert reduction stmt into vectorizable form. */
2091 rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
2092 swap,has_nop, nop_reduc);
2093 redundant_ssa_names.safe_push (std::make_pair (res, rhs));
2095 new_stmt = gimple_build_assign (res, rhs);
2096 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2097 update_stmt (new_stmt);
2099 else
2101 /* Common case. */
2102 vec<int> *indexes;
2103 tree type = TREE_TYPE (gimple_phi_result (phi));
2104 tree lhs;
2105 arg1 = args[1];
2106 for (i = 0; i < args_len; i++)
2108 arg0 = args[i];
2109 indexes = phi_arg_map.get (args[i]);
2110 if (i != args_len - 1)
2111 lhs = make_temp_ssa_name (type, NULL, "_ifc_");
2112 else
2113 lhs = res;
2114 cond = gen_phi_arg_condition (phi, indexes, gsi);
2115 rhs = fold_build_cond_expr (type, unshare_expr (cond),
2116 arg0, arg1);
2117 new_stmt = gimple_build_assign (lhs, rhs);
2118 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2119 update_stmt (new_stmt);
2120 arg1 = lhs;
2124 if (dump_file && (dump_flags & TDF_DETAILS))
2126 fprintf (dump_file, "new extended phi replacement stmt\n");
2127 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
2131 /* Replaces in LOOP all the scalar phi nodes other than those in the
2132 LOOP->header block with conditional modify expressions. */
2134 static void
2135 predicate_all_scalar_phis (class loop *loop)
2137 basic_block bb;
2138 unsigned int orig_loop_num_nodes = loop->num_nodes;
2139 unsigned int i;
2141 for (i = 1; i < orig_loop_num_nodes; i++)
2143 gphi *phi;
2144 gimple_stmt_iterator gsi;
2145 gphi_iterator phi_gsi;
2146 bb = ifc_bbs[i];
2148 if (bb == loop->header)
2149 continue;
2151 phi_gsi = gsi_start_phis (bb);
2152 if (gsi_end_p (phi_gsi))
2153 continue;
2155 gsi = gsi_after_labels (bb);
2156 while (!gsi_end_p (phi_gsi))
2158 phi = phi_gsi.phi ();
2159 if (virtual_operand_p (gimple_phi_result (phi)))
2160 gsi_next (&phi_gsi);
2161 else
2163 predicate_scalar_phi (phi, &gsi);
2164 remove_phi_node (&phi_gsi, false);
2170 /* Insert in each basic block of LOOP the statements produced by the
2171 gimplification of the predicates. */
2173 static void
2174 insert_gimplified_predicates (loop_p loop)
2176 unsigned int i;
2178 for (i = 0; i < loop->num_nodes; i++)
2180 basic_block bb = ifc_bbs[i];
2181 gimple_seq stmts;
2182 if (!is_predicated (bb))
2183 gcc_assert (bb_predicate_gimplified_stmts (bb) == NULL);
2184 if (!is_predicated (bb))
2186 /* Do not insert statements for a basic block that is not
2187 predicated. Also make sure that the predicate of the
2188 basic block is set to true. */
2189 reset_bb_predicate (bb);
2190 continue;
2193 stmts = bb_predicate_gimplified_stmts (bb);
2194 if (stmts)
2196 if (need_to_predicate)
2198 /* Insert the predicate of the BB just after the label,
2199 as the if-conversion of memory writes will use this
2200 predicate. */
2201 gimple_stmt_iterator gsi = gsi_after_labels (bb);
2202 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2204 else
2206 /* Insert the predicate of the BB at the end of the BB
2207 as this would reduce the register pressure: the only
2208 use of this predicate will be in successor BBs. */
2209 gimple_stmt_iterator gsi = gsi_last_bb (bb);
2211 if (gsi_end_p (gsi)
2212 || stmt_ends_bb_p (gsi_stmt (gsi)))
2213 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2214 else
2215 gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
2218 /* Once the sequence is code generated, set it to NULL. */
2219 set_bb_predicate_gimplified_stmts (bb, NULL);
2224 /* Helper function for predicate_statements. Returns index of existent
2225 mask if it was created for given SIZE and -1 otherwise. */
2227 static int
2228 mask_exists (int size, const vec<int> &vec)
2230 unsigned int ix;
2231 int v;
2232 FOR_EACH_VEC_ELT (vec, ix, v)
2233 if (v == size)
2234 return (int) ix;
2235 return -1;
2238 /* Helper function for predicate_statements. STMT is a memory read or
2239 write and it needs to be predicated by MASK. Return a statement
2240 that does so. */
2242 static gimple *
2243 predicate_load_or_store (gimple_stmt_iterator *gsi, gassign *stmt, tree mask)
2245 gcall *new_stmt;
2247 tree lhs = gimple_assign_lhs (stmt);
2248 tree rhs = gimple_assign_rhs1 (stmt);
2249 tree ref = TREE_CODE (lhs) == SSA_NAME ? rhs : lhs;
2250 mark_addressable (ref);
2251 tree addr = force_gimple_operand_gsi (gsi, build_fold_addr_expr (ref),
2252 true, NULL_TREE, true, GSI_SAME_STMT);
2253 tree ptr = build_int_cst (reference_alias_ptr_type (ref),
2254 get_object_alignment (ref));
2255 /* Copy points-to info if possible. */
2256 if (TREE_CODE (addr) == SSA_NAME && !SSA_NAME_PTR_INFO (addr))
2257 copy_ref_info (build2 (MEM_REF, TREE_TYPE (ref), addr, ptr),
2258 ref);
2259 if (TREE_CODE (lhs) == SSA_NAME)
2261 new_stmt
2262 = gimple_build_call_internal (IFN_MASK_LOAD, 3, addr,
2263 ptr, mask);
2264 gimple_call_set_lhs (new_stmt, lhs);
2265 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
2267 else
2269 new_stmt
2270 = gimple_build_call_internal (IFN_MASK_STORE, 4, addr, ptr,
2271 mask, rhs);
2272 gimple_move_vops (new_stmt, stmt);
2274 gimple_call_set_nothrow (new_stmt, true);
2275 return new_stmt;
2278 /* STMT uses OP_LHS. Check whether it is equivalent to:
2280 ... = OP_MASK ? OP_LHS : X;
2282 Return X if so, otherwise return null. OP_MASK is an SSA_NAME that is
2283 known to have value OP_COND. */
2285 static tree
2286 check_redundant_cond_expr (gimple *stmt, tree op_mask, tree op_cond,
2287 tree op_lhs)
2289 gassign *assign = dyn_cast <gassign *> (stmt);
2290 if (!assign || gimple_assign_rhs_code (assign) != COND_EXPR)
2291 return NULL_TREE;
2293 tree use_cond = gimple_assign_rhs1 (assign);
2294 tree if_true = gimple_assign_rhs2 (assign);
2295 tree if_false = gimple_assign_rhs3 (assign);
2297 if ((use_cond == op_mask || operand_equal_p (use_cond, op_cond, 0))
2298 && if_true == op_lhs)
2299 return if_false;
2301 if (inverse_conditions_p (use_cond, op_cond) && if_false == op_lhs)
2302 return if_true;
2304 return NULL_TREE;
2307 /* Return true if VALUE is available for use at STMT. SSA_NAMES is
2308 the set of SSA names defined earlier in STMT's block. */
2310 static bool
2311 value_available_p (gimple *stmt, hash_set<tree_ssa_name_hash> *ssa_names,
2312 tree value)
2314 if (is_gimple_min_invariant (value))
2315 return true;
2317 if (TREE_CODE (value) == SSA_NAME)
2319 if (SSA_NAME_IS_DEFAULT_DEF (value))
2320 return true;
2322 basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (value));
2323 basic_block use_bb = gimple_bb (stmt);
2324 return (def_bb == use_bb
2325 ? ssa_names->contains (value)
2326 : dominated_by_p (CDI_DOMINATORS, use_bb, def_bb));
2329 return false;
2332 /* Helper function for predicate_statements. STMT is a potentially-trapping
2333 arithmetic operation that needs to be predicated by MASK, an SSA_NAME that
2334 has value COND. Return a statement that does so. SSA_NAMES is the set of
2335 SSA names defined earlier in STMT's block. */
2337 static gimple *
2338 predicate_rhs_code (gassign *stmt, tree mask, tree cond,
2339 hash_set<tree_ssa_name_hash> *ssa_names)
2341 tree lhs = gimple_assign_lhs (stmt);
2342 tree_code code = gimple_assign_rhs_code (stmt);
2343 unsigned int nops = gimple_num_ops (stmt);
2344 internal_fn cond_fn = get_conditional_internal_fn (code);
2346 /* Construct the arguments to the conditional internal function. */
2347 auto_vec<tree, 8> args;
2348 args.safe_grow (nops + 1, true);
2349 args[0] = mask;
2350 for (unsigned int i = 1; i < nops; ++i)
2351 args[i] = gimple_op (stmt, i);
2352 args[nops] = NULL_TREE;
2354 /* Look for uses of the result to see whether they are COND_EXPRs that can
2355 be folded into the conditional call. */
2356 imm_use_iterator imm_iter;
2357 gimple *use_stmt;
2358 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs)
2360 tree new_else = check_redundant_cond_expr (use_stmt, mask, cond, lhs);
2361 if (new_else && value_available_p (stmt, ssa_names, new_else))
2363 if (!args[nops])
2364 args[nops] = new_else;
2365 if (operand_equal_p (new_else, args[nops], 0))
2367 /* We have:
2369 LHS = IFN_COND (MASK, ..., ELSE);
2370 X = MASK ? LHS : ELSE;
2372 which makes X equivalent to LHS. */
2373 tree use_lhs = gimple_assign_lhs (use_stmt);
2374 redundant_ssa_names.safe_push (std::make_pair (use_lhs, lhs));
2378 if (!args[nops])
2379 args[nops] = targetm.preferred_else_value (cond_fn, TREE_TYPE (lhs),
2380 nops - 1, &args[1]);
2382 /* Create and insert the call. */
2383 gcall *new_stmt = gimple_build_call_internal_vec (cond_fn, args);
2384 gimple_call_set_lhs (new_stmt, lhs);
2385 gimple_call_set_nothrow (new_stmt, true);
2387 return new_stmt;
2390 /* Predicate each write to memory in LOOP.
2392 This function transforms control flow constructs containing memory
2393 writes of the form:
2395 | for (i = 0; i < N; i++)
2396 | if (cond)
2397 | A[i] = expr;
2399 into the following form that does not contain control flow:
2401 | for (i = 0; i < N; i++)
2402 | A[i] = cond ? expr : A[i];
2404 The original CFG looks like this:
2406 | bb_0
2407 | i = 0
2408 | end_bb_0
2410 | bb_1
2411 | if (i < N) goto bb_5 else goto bb_2
2412 | end_bb_1
2414 | bb_2
2415 | cond = some_computation;
2416 | if (cond) goto bb_3 else goto bb_4
2417 | end_bb_2
2419 | bb_3
2420 | A[i] = expr;
2421 | goto bb_4
2422 | end_bb_3
2424 | bb_4
2425 | goto bb_1
2426 | end_bb_4
2428 insert_gimplified_predicates inserts the computation of the COND
2429 expression at the beginning of the destination basic block:
2431 | bb_0
2432 | i = 0
2433 | end_bb_0
2435 | bb_1
2436 | if (i < N) goto bb_5 else goto bb_2
2437 | end_bb_1
2439 | bb_2
2440 | cond = some_computation;
2441 | if (cond) goto bb_3 else goto bb_4
2442 | end_bb_2
2444 | bb_3
2445 | cond = some_computation;
2446 | A[i] = expr;
2447 | goto bb_4
2448 | end_bb_3
2450 | bb_4
2451 | goto bb_1
2452 | end_bb_4
2454 predicate_statements is then predicating the memory write as follows:
2456 | bb_0
2457 | i = 0
2458 | end_bb_0
2460 | bb_1
2461 | if (i < N) goto bb_5 else goto bb_2
2462 | end_bb_1
2464 | bb_2
2465 | if (cond) goto bb_3 else goto bb_4
2466 | end_bb_2
2468 | bb_3
2469 | cond = some_computation;
2470 | A[i] = cond ? expr : A[i];
2471 | goto bb_4
2472 | end_bb_3
2474 | bb_4
2475 | goto bb_1
2476 | end_bb_4
2478 and finally combine_blocks removes the basic block boundaries making
2479 the loop vectorizable:
2481 | bb_0
2482 | i = 0
2483 | if (i < N) goto bb_5 else goto bb_1
2484 | end_bb_0
2486 | bb_1
2487 | cond = some_computation;
2488 | A[i] = cond ? expr : A[i];
2489 | if (i < N) goto bb_5 else goto bb_4
2490 | end_bb_1
2492 | bb_4
2493 | goto bb_1
2494 | end_bb_4
2497 static void
2498 predicate_statements (loop_p loop)
2500 unsigned int i, orig_loop_num_nodes = loop->num_nodes;
2501 auto_vec<int, 1> vect_sizes;
2502 auto_vec<tree, 1> vect_masks;
2503 hash_set<tree_ssa_name_hash> ssa_names;
2505 for (i = 1; i < orig_loop_num_nodes; i++)
2507 gimple_stmt_iterator gsi;
2508 basic_block bb = ifc_bbs[i];
2509 tree cond = bb_predicate (bb);
2510 bool swap;
2511 int index;
2513 if (is_true_predicate (cond))
2514 continue;
2516 swap = false;
2517 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2519 swap = true;
2520 cond = TREE_OPERAND (cond, 0);
2523 vect_sizes.truncate (0);
2524 vect_masks.truncate (0);
2526 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
2528 gassign *stmt = dyn_cast <gassign *> (gsi_stmt (gsi));
2529 tree lhs;
2530 if (!stmt)
2532 else if (is_false_predicate (cond)
2533 && gimple_vdef (stmt))
2535 unlink_stmt_vdef (stmt);
2536 gsi_remove (&gsi, true);
2537 release_defs (stmt);
2538 continue;
2540 else if (gimple_plf (stmt, GF_PLF_2))
2542 tree lhs = gimple_assign_lhs (stmt);
2543 tree mask;
2544 gimple *new_stmt;
2545 gimple_seq stmts = NULL;
2546 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
2547 /* We checked before setting GF_PLF_2 that an equivalent
2548 integer mode exists. */
2549 int bitsize = GET_MODE_BITSIZE (mode).to_constant ();
2550 if (!vect_sizes.is_empty ()
2551 && (index = mask_exists (bitsize, vect_sizes)) != -1)
2552 /* Use created mask. */
2553 mask = vect_masks[index];
2554 else
2556 if (COMPARISON_CLASS_P (cond))
2557 mask = gimple_build (&stmts, TREE_CODE (cond),
2558 boolean_type_node,
2559 TREE_OPERAND (cond, 0),
2560 TREE_OPERAND (cond, 1));
2561 else
2562 mask = cond;
2564 if (swap)
2566 tree true_val
2567 = constant_boolean_node (true, TREE_TYPE (mask));
2568 mask = gimple_build (&stmts, BIT_XOR_EXPR,
2569 TREE_TYPE (mask), mask, true_val);
2571 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2573 /* Save mask and its size for further use. */
2574 vect_sizes.safe_push (bitsize);
2575 vect_masks.safe_push (mask);
2577 if (gimple_assign_single_p (stmt))
2578 new_stmt = predicate_load_or_store (&gsi, stmt, mask);
2579 else
2580 new_stmt = predicate_rhs_code (stmt, mask, cond, &ssa_names);
2582 gsi_replace (&gsi, new_stmt, true);
2584 else if (((lhs = gimple_assign_lhs (stmt)), true)
2585 && (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2586 || POINTER_TYPE_P (TREE_TYPE (lhs)))
2587 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs))
2588 && arith_code_with_undefined_signed_overflow
2589 (gimple_assign_rhs_code (stmt)))
2591 gsi_remove (&gsi, true);
2592 gimple_seq stmts = rewrite_to_defined_overflow (stmt);
2593 bool first = true;
2594 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
2595 !gsi_end_p (gsi2);)
2597 gassign *stmt2 = as_a <gassign *> (gsi_stmt (gsi2));
2598 gsi_remove (&gsi2, false);
2599 if (first)
2601 gsi_insert_before (&gsi, stmt2, GSI_NEW_STMT);
2602 first = false;
2604 else
2605 gsi_insert_after (&gsi, stmt2, GSI_NEW_STMT);
2608 else if (gimple_vdef (stmt))
2610 tree lhs = gimple_assign_lhs (stmt);
2611 tree rhs = gimple_assign_rhs1 (stmt);
2612 tree type = TREE_TYPE (lhs);
2614 lhs = ifc_temp_var (type, unshare_expr (lhs), &gsi);
2615 rhs = ifc_temp_var (type, unshare_expr (rhs), &gsi);
2616 if (swap)
2617 std::swap (lhs, rhs);
2618 cond = force_gimple_operand_gsi_1 (&gsi, unshare_expr (cond),
2619 is_gimple_condexpr, NULL_TREE,
2620 true, GSI_SAME_STMT);
2621 rhs = fold_build_cond_expr (type, unshare_expr (cond), rhs, lhs);
2622 gimple_assign_set_rhs1 (stmt, ifc_temp_var (type, rhs, &gsi));
2623 update_stmt (stmt);
2625 lhs = gimple_get_lhs (gsi_stmt (gsi));
2626 if (lhs && TREE_CODE (lhs) == SSA_NAME)
2627 ssa_names.add (lhs);
2628 gsi_next (&gsi);
2630 ssa_names.empty ();
2634 /* Remove all GIMPLE_CONDs and GIMPLE_LABELs of all the basic blocks
2635 other than the exit and latch of the LOOP. Also resets the
2636 GIMPLE_DEBUG information. */
2638 static void
2639 remove_conditions_and_labels (loop_p loop)
2641 gimple_stmt_iterator gsi;
2642 unsigned int i;
2644 for (i = 0; i < loop->num_nodes; i++)
2646 basic_block bb = ifc_bbs[i];
2648 if (bb_with_exit_edge_p (loop, bb)
2649 || bb == loop->latch)
2650 continue;
2652 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
2653 switch (gimple_code (gsi_stmt (gsi)))
2655 case GIMPLE_COND:
2656 case GIMPLE_LABEL:
2657 gsi_remove (&gsi, true);
2658 break;
2660 case GIMPLE_DEBUG:
2661 /* ??? Should there be conditional GIMPLE_DEBUG_BINDs? */
2662 if (gimple_debug_bind_p (gsi_stmt (gsi)))
2664 gimple_debug_bind_reset_value (gsi_stmt (gsi));
2665 update_stmt (gsi_stmt (gsi));
2667 gsi_next (&gsi);
2668 break;
2670 default:
2671 gsi_next (&gsi);
2676 /* Combine all the basic blocks from LOOP into one or two super basic
2677 blocks. Replace PHI nodes with conditional modify expressions. */
2679 static void
2680 combine_blocks (class loop *loop)
2682 basic_block bb, exit_bb, merge_target_bb;
2683 unsigned int orig_loop_num_nodes = loop->num_nodes;
2684 unsigned int i;
2685 edge e;
2686 edge_iterator ei;
2688 remove_conditions_and_labels (loop);
2689 insert_gimplified_predicates (loop);
2690 predicate_all_scalar_phis (loop);
2692 if (need_to_predicate || need_to_rewrite_undefined)
2693 predicate_statements (loop);
2695 /* Merge basic blocks. */
2696 exit_bb = NULL;
2697 bool *predicated = XNEWVEC (bool, orig_loop_num_nodes);
2698 for (i = 0; i < orig_loop_num_nodes; i++)
2700 bb = ifc_bbs[i];
2701 predicated[i] = !is_true_predicate (bb_predicate (bb));
2702 free_bb_predicate (bb);
2703 if (bb_with_exit_edge_p (loop, bb))
2705 gcc_assert (exit_bb == NULL);
2706 exit_bb = bb;
2709 gcc_assert (exit_bb != loop->latch);
2711 merge_target_bb = loop->header;
2713 /* Get at the virtual def valid for uses starting at the first block
2714 we merge into the header. Without a virtual PHI the loop has the
2715 same virtual use on all stmts. */
2716 gphi *vphi = get_virtual_phi (loop->header);
2717 tree last_vdef = NULL_TREE;
2718 if (vphi)
2720 last_vdef = gimple_phi_result (vphi);
2721 for (gimple_stmt_iterator gsi = gsi_start_bb (loop->header);
2722 ! gsi_end_p (gsi); gsi_next (&gsi))
2723 if (gimple_vdef (gsi_stmt (gsi)))
2724 last_vdef = gimple_vdef (gsi_stmt (gsi));
2726 for (i = 1; i < orig_loop_num_nodes; i++)
2728 gimple_stmt_iterator gsi;
2729 gimple_stmt_iterator last;
2731 bb = ifc_bbs[i];
2733 if (bb == exit_bb || bb == loop->latch)
2734 continue;
2736 /* We release virtual PHIs late because we have to propagate them
2737 out using the current VUSE. The def might be the one used
2738 after the loop. */
2739 vphi = get_virtual_phi (bb);
2740 if (vphi)
2742 /* When there's just loads inside the loop a stray virtual
2743 PHI merging the uses can appear, update last_vdef from
2744 it. */
2745 if (!last_vdef)
2746 last_vdef = gimple_phi_arg_def (vphi, 0);
2747 imm_use_iterator iter;
2748 use_operand_p use_p;
2749 gimple *use_stmt;
2750 FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi))
2752 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2753 SET_USE (use_p, last_vdef);
2755 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (vphi)))
2756 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (last_vdef) = 1;
2757 gsi = gsi_for_stmt (vphi);
2758 remove_phi_node (&gsi, true);
2761 /* Make stmts member of loop->header and clear range info from all stmts
2762 in BB which is now no longer executed conditional on a predicate we
2763 could have derived it from. */
2764 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2766 gimple *stmt = gsi_stmt (gsi);
2767 gimple_set_bb (stmt, merge_target_bb);
2768 /* Update virtual operands. */
2769 if (last_vdef)
2771 use_operand_p use_p = ssa_vuse_operand (stmt);
2772 if (use_p
2773 && USE_FROM_PTR (use_p) != last_vdef)
2774 SET_USE (use_p, last_vdef);
2775 if (gimple_vdef (stmt))
2776 last_vdef = gimple_vdef (stmt);
2778 else
2779 /* If this is the first load we arrive at update last_vdef
2780 so we handle stray PHIs correctly. */
2781 last_vdef = gimple_vuse (stmt);
2782 if (predicated[i])
2784 ssa_op_iter i;
2785 tree op;
2786 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF)
2787 reset_flow_sensitive_info (op);
2791 /* Update stmt list. */
2792 last = gsi_last_bb (merge_target_bb);
2793 gsi_insert_seq_after_without_update (&last, bb_seq (bb), GSI_NEW_STMT);
2794 set_bb_seq (bb, NULL);
2797 /* Fixup virtual operands in the exit block. */
2798 if (exit_bb
2799 && exit_bb != loop->header)
2801 /* We release virtual PHIs late because we have to propagate them
2802 out using the current VUSE. The def might be the one used
2803 after the loop. */
2804 vphi = get_virtual_phi (exit_bb);
2805 if (vphi)
2807 /* When there's just loads inside the loop a stray virtual
2808 PHI merging the uses can appear, update last_vdef from
2809 it. */
2810 if (!last_vdef)
2811 last_vdef = gimple_phi_arg_def (vphi, 0);
2812 imm_use_iterator iter;
2813 use_operand_p use_p;
2814 gimple *use_stmt;
2815 FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi))
2817 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2818 SET_USE (use_p, last_vdef);
2820 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (vphi)))
2821 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (last_vdef) = 1;
2822 gimple_stmt_iterator gsi = gsi_for_stmt (vphi);
2823 remove_phi_node (&gsi, true);
2827 /* Now remove all the edges in the loop, except for those from the exit
2828 block and delete the blocks we elided. */
2829 for (i = 1; i < orig_loop_num_nodes; i++)
2831 bb = ifc_bbs[i];
2833 for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));)
2835 if (e->src == exit_bb)
2836 ei_next (&ei);
2837 else
2838 remove_edge (e);
2841 for (i = 1; i < orig_loop_num_nodes; i++)
2843 bb = ifc_bbs[i];
2845 if (bb == exit_bb || bb == loop->latch)
2846 continue;
2848 delete_basic_block (bb);
2851 /* Re-connect the exit block. */
2852 if (exit_bb != NULL)
2854 if (exit_bb != loop->header)
2856 /* Connect this node to loop header. */
2857 make_single_succ_edge (loop->header, exit_bb, EDGE_FALLTHRU);
2858 set_immediate_dominator (CDI_DOMINATORS, exit_bb, loop->header);
2861 /* Redirect non-exit edges to loop->latch. */
2862 FOR_EACH_EDGE (e, ei, exit_bb->succs)
2864 if (!loop_exit_edge_p (loop, e))
2865 redirect_edge_and_branch (e, loop->latch);
2867 set_immediate_dominator (CDI_DOMINATORS, loop->latch, exit_bb);
2869 else
2871 /* If the loop does not have an exit, reconnect header and latch. */
2872 make_edge (loop->header, loop->latch, EDGE_FALLTHRU);
2873 set_immediate_dominator (CDI_DOMINATORS, loop->latch, loop->header);
2876 /* If possible, merge loop header to the block with the exit edge.
2877 This reduces the number of basic blocks to two, to please the
2878 vectorizer that handles only loops with two nodes. */
2879 if (exit_bb
2880 && exit_bb != loop->header)
2882 if (can_merge_blocks_p (loop->header, exit_bb))
2883 merge_blocks (loop->header, exit_bb);
2886 free (ifc_bbs);
2887 ifc_bbs = NULL;
2888 free (predicated);
2891 /* Version LOOP before if-converting it; the original loop
2892 will be if-converted, the new copy of the loop will not,
2893 and the LOOP_VECTORIZED internal call will be guarding which
2894 loop to execute. The vectorizer pass will fold this
2895 internal call into either true or false.
2897 Note that this function intentionally invalidates profile. Both edges
2898 out of LOOP_VECTORIZED must have 100% probability so the profile remains
2899 consistent after the condition is folded in the vectorizer. */
2901 static class loop *
2902 version_loop_for_if_conversion (class loop *loop, vec<gimple *> *preds)
2904 basic_block cond_bb;
2905 tree cond = make_ssa_name (boolean_type_node);
2906 class loop *new_loop;
2907 gimple *g;
2908 gimple_stmt_iterator gsi;
2909 unsigned int save_length;
2911 g = gimple_build_call_internal (IFN_LOOP_VECTORIZED, 2,
2912 build_int_cst (integer_type_node, loop->num),
2913 integer_zero_node);
2914 gimple_call_set_lhs (g, cond);
2916 /* Save BB->aux around loop_version as that uses the same field. */
2917 save_length = loop->inner ? loop->inner->num_nodes : loop->num_nodes;
2918 void **saved_preds = XALLOCAVEC (void *, save_length);
2919 for (unsigned i = 0; i < save_length; i++)
2920 saved_preds[i] = ifc_bbs[i]->aux;
2922 initialize_original_copy_tables ();
2923 /* At this point we invalidate porfile confistency until IFN_LOOP_VECTORIZED
2924 is re-merged in the vectorizer. */
2925 new_loop = loop_version (loop, cond, &cond_bb,
2926 profile_probability::always (),
2927 profile_probability::always (),
2928 profile_probability::always (),
2929 profile_probability::always (), true);
2930 free_original_copy_tables ();
2932 for (unsigned i = 0; i < save_length; i++)
2933 ifc_bbs[i]->aux = saved_preds[i];
2935 if (new_loop == NULL)
2936 return NULL;
2938 new_loop->dont_vectorize = true;
2939 new_loop->force_vectorize = false;
2940 gsi = gsi_last_bb (cond_bb);
2941 gimple_call_set_arg (g, 1, build_int_cst (integer_type_node, new_loop->num));
2942 if (preds)
2943 preds->safe_push (g);
2944 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2945 update_ssa (TODO_update_ssa);
2946 return new_loop;
2949 /* Return true when LOOP satisfies the follow conditions that will
2950 allow it to be recognized by the vectorizer for outer-loop
2951 vectorization:
2952 - The loop is not the root node of the loop tree.
2953 - The loop has exactly one inner loop.
2954 - The loop has a single exit.
2955 - The loop header has a single successor, which is the inner
2956 loop header.
2957 - Each of the inner and outer loop latches have a single
2958 predecessor.
2959 - The loop exit block has a single predecessor, which is the
2960 inner loop's exit block. */
2962 static bool
2963 versionable_outer_loop_p (class loop *loop)
2965 if (!loop_outer (loop)
2966 || loop->dont_vectorize
2967 || !loop->inner
2968 || loop->inner->next
2969 || !single_exit (loop)
2970 || !single_succ_p (loop->header)
2971 || single_succ (loop->header) != loop->inner->header
2972 || !single_pred_p (loop->latch)
2973 || !single_pred_p (loop->inner->latch))
2974 return false;
2976 basic_block outer_exit = single_pred (loop->latch);
2977 basic_block inner_exit = single_pred (loop->inner->latch);
2979 if (!single_pred_p (outer_exit) || single_pred (outer_exit) != inner_exit)
2980 return false;
2982 if (dump_file)
2983 fprintf (dump_file, "Found vectorizable outer loop for versioning\n");
2985 return true;
2988 /* Performs splitting of critical edges. Skip splitting and return false
2989 if LOOP will not be converted because:
2991 - LOOP is not well formed.
2992 - LOOP has PHI with more than MAX_PHI_ARG_NUM arguments.
2994 Last restriction is valid only if AGGRESSIVE_IF_CONV is false. */
2996 static bool
2997 ifcvt_split_critical_edges (class loop *loop, bool aggressive_if_conv)
2999 basic_block *body;
3000 basic_block bb;
3001 unsigned int num = loop->num_nodes;
3002 unsigned int i;
3003 gimple *stmt;
3004 edge e;
3005 edge_iterator ei;
3006 auto_vec<edge> critical_edges;
3008 /* Loop is not well formed. */
3009 if (num <= 2 || loop->inner || !single_exit (loop))
3010 return false;
3012 body = get_loop_body (loop);
3013 for (i = 0; i < num; i++)
3015 bb = body[i];
3016 if (!aggressive_if_conv
3017 && phi_nodes (bb)
3018 && EDGE_COUNT (bb->preds) > MAX_PHI_ARG_NUM)
3020 if (dump_file && (dump_flags & TDF_DETAILS))
3021 fprintf (dump_file,
3022 "BB %d has complicated PHI with more than %u args.\n",
3023 bb->index, MAX_PHI_ARG_NUM);
3025 free (body);
3026 return false;
3028 if (bb == loop->latch || bb_with_exit_edge_p (loop, bb))
3029 continue;
3031 stmt = last_stmt (bb);
3032 /* Skip basic blocks not ending with conditional branch. */
3033 if (!stmt || gimple_code (stmt) != GIMPLE_COND)
3034 continue;
3036 FOR_EACH_EDGE (e, ei, bb->succs)
3037 if (EDGE_CRITICAL_P (e) && e->dest->loop_father == loop)
3038 critical_edges.safe_push (e);
3040 free (body);
3042 while (critical_edges.length () > 0)
3044 e = critical_edges.pop ();
3045 /* Don't split if bb can be predicated along non-critical edge. */
3046 if (EDGE_COUNT (e->dest->preds) > 2 || all_preds_critical_p (e->dest))
3047 split_edge (e);
3050 return true;
3053 /* Delete redundant statements produced by predication which prevents
3054 loop vectorization. */
3056 static void
3057 ifcvt_local_dce (class loop *loop)
3059 gimple *stmt;
3060 gimple *stmt1;
3061 gimple *phi;
3062 gimple_stmt_iterator gsi;
3063 auto_vec<gimple *> worklist;
3064 enum gimple_code code;
3065 use_operand_p use_p;
3066 imm_use_iterator imm_iter;
3068 /* The loop has a single BB only. */
3069 basic_block bb = loop->header;
3070 tree latch_vdef = NULL_TREE;
3072 worklist.create (64);
3073 /* Consider all phi as live statements. */
3074 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3076 phi = gsi_stmt (gsi);
3077 gimple_set_plf (phi, GF_PLF_2, true);
3078 worklist.safe_push (phi);
3079 if (virtual_operand_p (gimple_phi_result (phi)))
3080 latch_vdef = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
3082 /* Consider load/store statements, CALL and COND as live. */
3083 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3085 stmt = gsi_stmt (gsi);
3086 if (is_gimple_debug (stmt))
3088 gimple_set_plf (stmt, GF_PLF_2, true);
3089 continue;
3091 if (gimple_store_p (stmt) || gimple_assign_load_p (stmt))
3093 gimple_set_plf (stmt, GF_PLF_2, true);
3094 worklist.safe_push (stmt);
3095 continue;
3097 code = gimple_code (stmt);
3098 if (code == GIMPLE_COND || code == GIMPLE_CALL)
3100 gimple_set_plf (stmt, GF_PLF_2, true);
3101 worklist.safe_push (stmt);
3102 continue;
3104 gimple_set_plf (stmt, GF_PLF_2, false);
3106 if (code == GIMPLE_ASSIGN)
3108 tree lhs = gimple_assign_lhs (stmt);
3109 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
3111 stmt1 = USE_STMT (use_p);
3112 if (!is_gimple_debug (stmt1) && gimple_bb (stmt1) != bb)
3114 gimple_set_plf (stmt, GF_PLF_2, true);
3115 worklist.safe_push (stmt);
3116 break;
3121 /* Propagate liveness through arguments of live stmt. */
3122 while (worklist.length () > 0)
3124 ssa_op_iter iter;
3125 use_operand_p use_p;
3126 tree use;
3128 stmt = worklist.pop ();
3129 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
3131 use = USE_FROM_PTR (use_p);
3132 if (TREE_CODE (use) != SSA_NAME)
3133 continue;
3134 stmt1 = SSA_NAME_DEF_STMT (use);
3135 if (gimple_bb (stmt1) != bb || gimple_plf (stmt1, GF_PLF_2))
3136 continue;
3137 gimple_set_plf (stmt1, GF_PLF_2, true);
3138 worklist.safe_push (stmt1);
3141 /* Delete dead statements. */
3142 gsi = gsi_last_bb (bb);
3143 while (!gsi_end_p (gsi))
3145 gimple_stmt_iterator gsiprev = gsi;
3146 gsi_prev (&gsiprev);
3147 stmt = gsi_stmt (gsi);
3148 if (gimple_store_p (stmt))
3150 tree lhs = gimple_get_lhs (stmt);
3151 ao_ref write;
3152 ao_ref_init (&write, lhs);
3154 if (dse_classify_store (&write, stmt, false, NULL, NULL, latch_vdef)
3155 == DSE_STORE_DEAD)
3156 delete_dead_or_redundant_assignment (&gsi, "dead");
3157 gsi = gsiprev;
3158 continue;
3161 if (gimple_plf (stmt, GF_PLF_2))
3163 gsi = gsiprev;
3164 continue;
3166 if (dump_file && (dump_flags & TDF_DETAILS))
3168 fprintf (dump_file, "Delete dead stmt in bb#%d\n", bb->index);
3169 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3171 gsi_remove (&gsi, true);
3172 release_defs (stmt);
3173 gsi = gsiprev;
3177 /* Return true if VALUE is already available on edge PE. */
3179 static bool
3180 ifcvt_available_on_edge_p (edge pe, tree value)
3182 if (is_gimple_min_invariant (value))
3183 return true;
3185 if (TREE_CODE (value) == SSA_NAME)
3187 basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (value));
3188 if (!def_bb || dominated_by_p (CDI_DOMINATORS, pe->dest, def_bb))
3189 return true;
3192 return false;
3195 /* Return true if STMT can be hoisted from if-converted loop LOOP to
3196 edge PE. */
3198 static bool
3199 ifcvt_can_hoist (class loop *loop, edge pe, gimple *stmt)
3201 if (auto *call = dyn_cast<gcall *> (stmt))
3203 if (gimple_call_internal_p (call)
3204 && internal_fn_mask_index (gimple_call_internal_fn (call)) >= 0)
3205 return false;
3207 else if (auto *assign = dyn_cast<gassign *> (stmt))
3209 if (gimple_assign_rhs_code (assign) == COND_EXPR)
3210 return false;
3212 else
3213 return false;
3215 if (gimple_has_side_effects (stmt)
3216 || gimple_could_trap_p (stmt)
3217 || stmt_could_throw_p (cfun, stmt)
3218 || gimple_vdef (stmt)
3219 || gimple_vuse (stmt))
3220 return false;
3222 int num_args = gimple_num_args (stmt);
3223 if (pe != loop_preheader_edge (loop))
3225 for (int i = 0; i < num_args; ++i)
3226 if (!ifcvt_available_on_edge_p (pe, gimple_arg (stmt, i)))
3227 return false;
3229 else
3231 for (int i = 0; i < num_args; ++i)
3232 if (!expr_invariant_in_loop_p (loop, gimple_arg (stmt, i)))
3233 return false;
3236 return true;
3239 /* Hoist invariant statements from LOOP to edge PE. */
3241 static void
3242 ifcvt_hoist_invariants (class loop *loop, edge pe)
3244 gimple_stmt_iterator hoist_gsi = {};
3245 unsigned int num_blocks = loop->num_nodes;
3246 basic_block *body = get_loop_body (loop);
3247 for (unsigned int i = 0; i < num_blocks; ++i)
3248 for (gimple_stmt_iterator gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi);)
3250 gimple *stmt = gsi_stmt (gsi);
3251 if (ifcvt_can_hoist (loop, pe, stmt))
3253 /* Once we've hoisted one statement, insert other statements
3254 after it. */
3255 gsi_remove (&gsi, false);
3256 if (hoist_gsi.ptr)
3257 gsi_insert_after (&hoist_gsi, stmt, GSI_NEW_STMT);
3258 else
3260 gsi_insert_on_edge_immediate (pe, stmt);
3261 hoist_gsi = gsi_for_stmt (stmt);
3263 continue;
3265 gsi_next (&gsi);
3267 free (body);
3270 /* If-convert LOOP when it is legal. For the moment this pass has no
3271 profitability analysis. Returns non-zero todo flags when something
3272 changed. */
3274 unsigned int
3275 tree_if_conversion (class loop *loop, vec<gimple *> *preds)
3277 unsigned int todo = 0;
3278 bool aggressive_if_conv;
3279 class loop *rloop;
3280 bitmap exit_bbs;
3281 edge pe;
3283 again:
3284 rloop = NULL;
3285 ifc_bbs = NULL;
3286 need_to_predicate = false;
3287 need_to_rewrite_undefined = false;
3288 any_complicated_phi = false;
3290 /* Apply more aggressive if-conversion when loop or its outer loop were
3291 marked with simd pragma. When that's the case, we try to if-convert
3292 loop containing PHIs with more than MAX_PHI_ARG_NUM arguments. */
3293 aggressive_if_conv = loop->force_vectorize;
3294 if (!aggressive_if_conv)
3296 class loop *outer_loop = loop_outer (loop);
3297 if (outer_loop && outer_loop->force_vectorize)
3298 aggressive_if_conv = true;
3301 if (!ifcvt_split_critical_edges (loop, aggressive_if_conv))
3302 goto cleanup;
3304 if (!if_convertible_loop_p (loop)
3305 || !dbg_cnt (if_conversion_tree))
3306 goto cleanup;
3308 if ((need_to_predicate || any_complicated_phi)
3309 && ((!flag_tree_loop_vectorize && !loop->force_vectorize)
3310 || loop->dont_vectorize))
3311 goto cleanup;
3313 /* The edge to insert invariant stmts on. */
3314 pe = loop_preheader_edge (loop);
3316 /* Since we have no cost model, always version loops unless the user
3317 specified -ftree-loop-if-convert or unless versioning is required.
3318 Either version this loop, or if the pattern is right for outer-loop
3319 vectorization, version the outer loop. In the latter case we will
3320 still if-convert the original inner loop. */
3321 if (need_to_predicate
3322 || any_complicated_phi
3323 || flag_tree_loop_if_convert != 1)
3325 class loop *vloop
3326 = (versionable_outer_loop_p (loop_outer (loop))
3327 ? loop_outer (loop) : loop);
3328 class loop *nloop = version_loop_for_if_conversion (vloop, preds);
3329 if (nloop == NULL)
3330 goto cleanup;
3331 if (vloop != loop)
3333 /* If versionable_outer_loop_p decided to version the
3334 outer loop, version also the inner loop of the non-vectorized
3335 loop copy. So we transform:
3336 loop1
3337 loop2
3338 into:
3339 if (LOOP_VECTORIZED (1, 3))
3341 loop1
3342 loop2
3344 else
3345 loop3 (copy of loop1)
3346 if (LOOP_VECTORIZED (4, 5))
3347 loop4 (copy of loop2)
3348 else
3349 loop5 (copy of loop4) */
3350 gcc_assert (nloop->inner && nloop->inner->next == NULL);
3351 rloop = nloop->inner;
3353 else
3354 /* If we versioned loop then make sure to insert invariant
3355 stmts before the .LOOP_VECTORIZED check since the vectorizer
3356 will re-use that for things like runtime alias versioning
3357 whose condition can end up using those invariants. */
3358 pe = single_pred_edge (gimple_bb (preds->last ()));
3361 /* Now all statements are if-convertible. Combine all the basic
3362 blocks into one huge basic block doing the if-conversion
3363 on-the-fly. */
3364 combine_blocks (loop);
3366 /* Perform local CSE, this esp. helps the vectorizer analysis if loads
3367 and stores are involved. CSE only the loop body, not the entry
3368 PHIs, those are to be kept in sync with the non-if-converted copy.
3369 ??? We'll still keep dead stores though. */
3370 exit_bbs = BITMAP_ALLOC (NULL);
3371 bitmap_set_bit (exit_bbs, single_exit (loop)->dest->index);
3372 bitmap_set_bit (exit_bbs, loop->latch->index);
3374 std::pair <tree, tree> *name_pair;
3375 unsigned ssa_names_idx;
3376 FOR_EACH_VEC_ELT (redundant_ssa_names, ssa_names_idx, name_pair)
3377 replace_uses_by (name_pair->first, name_pair->second);
3378 redundant_ssa_names.release ();
3380 todo |= do_rpo_vn (cfun, loop_preheader_edge (loop), exit_bbs);
3382 /* Delete dead predicate computations. */
3383 ifcvt_local_dce (loop);
3384 BITMAP_FREE (exit_bbs);
3386 ifcvt_hoist_invariants (loop, pe);
3388 todo |= TODO_cleanup_cfg;
3390 cleanup:
3391 if (ifc_bbs)
3393 unsigned int i;
3395 for (i = 0; i < loop->num_nodes; i++)
3396 free_bb_predicate (ifc_bbs[i]);
3398 free (ifc_bbs);
3399 ifc_bbs = NULL;
3401 if (rloop != NULL)
3403 loop = rloop;
3404 goto again;
3407 return todo;
3410 /* Tree if-conversion pass management. */
3412 namespace {
3414 const pass_data pass_data_if_conversion =
3416 GIMPLE_PASS, /* type */
3417 "ifcvt", /* name */
3418 OPTGROUP_NONE, /* optinfo_flags */
3419 TV_TREE_LOOP_IFCVT, /* tv_id */
3420 ( PROP_cfg | PROP_ssa ), /* properties_required */
3421 0, /* properties_provided */
3422 0, /* properties_destroyed */
3423 0, /* todo_flags_start */
3424 0, /* todo_flags_finish */
3427 class pass_if_conversion : public gimple_opt_pass
3429 public:
3430 pass_if_conversion (gcc::context *ctxt)
3431 : gimple_opt_pass (pass_data_if_conversion, ctxt)
3434 /* opt_pass methods: */
3435 virtual bool gate (function *);
3436 virtual unsigned int execute (function *);
3438 }; // class pass_if_conversion
3440 bool
3441 pass_if_conversion::gate (function *fun)
3443 return (((flag_tree_loop_vectorize || fun->has_force_vectorize_loops)
3444 && flag_tree_loop_if_convert != 0)
3445 || flag_tree_loop_if_convert == 1);
3448 unsigned int
3449 pass_if_conversion::execute (function *fun)
3451 unsigned todo = 0;
3453 if (number_of_loops (fun) <= 1)
3454 return 0;
3456 auto_vec<gimple *> preds;
3457 for (auto loop : loops_list (cfun, 0))
3458 if (flag_tree_loop_if_convert == 1
3459 || ((flag_tree_loop_vectorize || loop->force_vectorize)
3460 && !loop->dont_vectorize))
3461 todo |= tree_if_conversion (loop, &preds);
3463 if (todo)
3465 free_numbers_of_iterations_estimates (fun);
3466 scev_reset ();
3469 if (flag_checking)
3471 basic_block bb;
3472 FOR_EACH_BB_FN (bb, fun)
3473 gcc_assert (!bb->aux);
3476 /* Perform IL update now, it might elide some loops. */
3477 if (todo & TODO_cleanup_cfg)
3479 cleanup_tree_cfg ();
3480 if (need_ssa_update_p (fun))
3481 todo |= TODO_update_ssa;
3483 if (todo & TODO_update_ssa_any)
3484 update_ssa (todo & TODO_update_ssa_any);
3486 /* If if-conversion elided the loop fall back to the original one. */
3487 for (unsigned i = 0; i < preds.length (); ++i)
3489 gimple *g = preds[i];
3490 if (!gimple_bb (g))
3491 continue;
3492 unsigned ifcvt_loop = tree_to_uhwi (gimple_call_arg (g, 0));
3493 if (!get_loop (fun, ifcvt_loop))
3495 if (dump_file)
3496 fprintf (dump_file, "If-converted loop vanished\n");
3497 fold_loop_internal_call (g, boolean_false_node);
3501 return 0;
3504 } // anon namespace
3506 gimple_opt_pass *
3507 make_pass_if_conversion (gcc::context *ctxt)
3509 return new pass_if_conversion (ctxt);