Daily bump.
[official-gcc.git] / gcc / tree-if-conv.c
blobb165dc0c17f44ac0333bfd97c6b6f0362ea6a32e
1 /* If-conversion for vectorizer.
2 Copyright (C) 2004-2021 Free Software Foundation, Inc.
3 Contributed by Devang Patel <dpatel@apple.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass implements a tree level if-conversion of loops. Its
22 initial goal is to help the vectorizer to vectorize loops with
23 conditions.
25 A short description of if-conversion:
27 o Decide if a loop is if-convertible or not.
28 o Walk all loop basic blocks in breadth first order (BFS order).
29 o Remove conditional statements (at the end of basic block)
30 and propagate condition into destination basic blocks'
31 predicate list.
32 o Replace modify expression with conditional modify expression
33 using current basic block's condition.
34 o Merge all basic blocks
35 o Replace phi nodes with conditional modify expr
36 o Merge all basic blocks into header
38 Sample transformation:
40 INPUT
41 -----
43 # i_23 = PHI <0(0), i_18(10)>;
44 <L0>:;
45 j_15 = A[i_23];
46 if (j_15 > 41) goto <L1>; else goto <L17>;
48 <L17>:;
49 goto <bb 3> (<L3>);
51 <L1>:;
53 # iftmp.2_4 = PHI <0(8), 42(2)>;
54 <L3>:;
55 A[i_23] = iftmp.2_4;
56 i_18 = i_23 + 1;
57 if (i_18 <= 15) goto <L19>; else goto <L18>;
59 <L19>:;
60 goto <bb 1> (<L0>);
62 <L18>:;
64 OUTPUT
65 ------
67 # i_23 = PHI <0(0), i_18(10)>;
68 <L0>:;
69 j_15 = A[i_23];
71 <L3>:;
72 iftmp.2_4 = j_15 > 41 ? 42 : 0;
73 A[i_23] = iftmp.2_4;
74 i_18 = i_23 + 1;
75 if (i_18 <= 15) goto <L19>; else goto <L18>;
77 <L19>:;
78 goto <bb 1> (<L0>);
80 <L18>:;
83 #include "config.h"
84 #include "system.h"
85 #include "coretypes.h"
86 #include "backend.h"
87 #include "rtl.h"
88 #include "tree.h"
89 #include "gimple.h"
90 #include "cfghooks.h"
91 #include "tree-pass.h"
92 #include "ssa.h"
93 #include "expmed.h"
94 #include "optabs-query.h"
95 #include "gimple-pretty-print.h"
96 #include "alias.h"
97 #include "fold-const.h"
98 #include "stor-layout.h"
99 #include "gimple-fold.h"
100 #include "gimplify.h"
101 #include "gimple-iterator.h"
102 #include "gimplify-me.h"
103 #include "tree-cfg.h"
104 #include "tree-into-ssa.h"
105 #include "tree-ssa.h"
106 #include "cfgloop.h"
107 #include "tree-data-ref.h"
108 #include "tree-scalar-evolution.h"
109 #include "tree-ssa-loop.h"
110 #include "tree-ssa-loop-niter.h"
111 #include "tree-ssa-loop-ivopts.h"
112 #include "tree-ssa-address.h"
113 #include "dbgcnt.h"
114 #include "tree-hash-traits.h"
115 #include "varasm.h"
116 #include "builtins.h"
117 #include "cfganal.h"
118 #include "internal-fn.h"
119 #include "fold-const.h"
120 #include "tree-ssa-sccvn.h"
121 #include "tree-cfgcleanup.h"
122 #include "tree-ssa-dse.h"
124 /* Only handle PHIs with no more arguments unless we are asked to by
125 simd pragma. */
126 #define MAX_PHI_ARG_NUM \
127 ((unsigned) param_max_tree_if_conversion_phi_args)
129 /* True if we've converted a statement that was only executed when some
130 condition C was true, and if for correctness we need to predicate the
131 statement to ensure that it is a no-op when C is false. See
132 predicate_statements for the kinds of predication we support. */
133 static bool need_to_predicate;
135 /* True if we have to rewrite stmts that may invoke undefined behavior
136 when a condition C was false so it doesn't if it is always executed.
137 See predicate_statements for the kinds of predication we support. */
138 static bool need_to_rewrite_undefined;
140 /* Indicate if there are any complicated PHIs that need to be handled in
141 if-conversion. Complicated PHI has more than two arguments and can't
142 be degenerated to two arguments PHI. See more information in comment
143 before phi_convertible_by_degenerating_args. */
144 static bool any_complicated_phi;
146 /* Hash for struct innermost_loop_behavior. It depends on the user to
147 free the memory. */
149 struct innermost_loop_behavior_hash : nofree_ptr_hash <innermost_loop_behavior>
151 static inline hashval_t hash (const value_type &);
152 static inline bool equal (const value_type &,
153 const compare_type &);
156 inline hashval_t
157 innermost_loop_behavior_hash::hash (const value_type &e)
159 hashval_t hash;
161 hash = iterative_hash_expr (e->base_address, 0);
162 hash = iterative_hash_expr (e->offset, hash);
163 hash = iterative_hash_expr (e->init, hash);
164 return iterative_hash_expr (e->step, hash);
167 inline bool
168 innermost_loop_behavior_hash::equal (const value_type &e1,
169 const compare_type &e2)
171 if ((e1->base_address && !e2->base_address)
172 || (!e1->base_address && e2->base_address)
173 || (!e1->offset && e2->offset)
174 || (e1->offset && !e2->offset)
175 || (!e1->init && e2->init)
176 || (e1->init && !e2->init)
177 || (!e1->step && e2->step)
178 || (e1->step && !e2->step))
179 return false;
181 if (e1->base_address && e2->base_address
182 && !operand_equal_p (e1->base_address, e2->base_address, 0))
183 return false;
184 if (e1->offset && e2->offset
185 && !operand_equal_p (e1->offset, e2->offset, 0))
186 return false;
187 if (e1->init && e2->init
188 && !operand_equal_p (e1->init, e2->init, 0))
189 return false;
190 if (e1->step && e2->step
191 && !operand_equal_p (e1->step, e2->step, 0))
192 return false;
194 return true;
197 /* List of basic blocks in if-conversion-suitable order. */
198 static basic_block *ifc_bbs;
200 /* Hash table to store <DR's innermost loop behavior, DR> pairs. */
201 static hash_map<innermost_loop_behavior_hash,
202 data_reference_p> *innermost_DR_map;
204 /* Hash table to store <base reference, DR> pairs. */
205 static hash_map<tree_operand_hash, data_reference_p> *baseref_DR_map;
207 /* List of redundant SSA names: the first should be replaced by the second. */
208 static vec< std::pair<tree, tree> > redundant_ssa_names;
210 /* Structure used to predicate basic blocks. This is attached to the
211 ->aux field of the BBs in the loop to be if-converted. */
212 struct bb_predicate {
214 /* The condition under which this basic block is executed. */
215 tree predicate;
217 /* PREDICATE is gimplified, and the sequence of statements is
218 recorded here, in order to avoid the duplication of computations
219 that occur in previous conditions. See PR44483. */
220 gimple_seq predicate_gimplified_stmts;
223 /* Returns true when the basic block BB has a predicate. */
225 static inline bool
226 bb_has_predicate (basic_block bb)
228 return bb->aux != NULL;
231 /* Returns the gimplified predicate for basic block BB. */
233 static inline tree
234 bb_predicate (basic_block bb)
236 return ((struct bb_predicate *) bb->aux)->predicate;
239 /* Sets the gimplified predicate COND for basic block BB. */
241 static inline void
242 set_bb_predicate (basic_block bb, tree cond)
244 gcc_assert ((TREE_CODE (cond) == TRUTH_NOT_EXPR
245 && is_gimple_condexpr (TREE_OPERAND (cond, 0)))
246 || is_gimple_condexpr (cond));
247 ((struct bb_predicate *) bb->aux)->predicate = cond;
250 /* Returns the sequence of statements of the gimplification of the
251 predicate for basic block BB. */
253 static inline gimple_seq
254 bb_predicate_gimplified_stmts (basic_block bb)
256 return ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts;
259 /* Sets the sequence of statements STMTS of the gimplification of the
260 predicate for basic block BB. */
262 static inline void
263 set_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
265 ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts = stmts;
268 /* Adds the sequence of statements STMTS to the sequence of statements
269 of the predicate for basic block BB. */
271 static inline void
272 add_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
274 /* We might have updated some stmts in STMTS via force_gimple_operand
275 calling fold_stmt and that producing multiple stmts. Delink immediate
276 uses so update_ssa after loop versioning doesn't get confused for
277 the not yet inserted predicates.
278 ??? This should go away once we reliably avoid updating stmts
279 not in any BB. */
280 for (gimple_stmt_iterator gsi = gsi_start (stmts);
281 !gsi_end_p (gsi); gsi_next (&gsi))
283 gimple *stmt = gsi_stmt (gsi);
284 delink_stmt_imm_use (stmt);
285 gimple_set_modified (stmt, true);
287 gimple_seq_add_seq_without_update
288 (&(((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts), stmts);
291 /* Initializes to TRUE the predicate of basic block BB. */
293 static inline void
294 init_bb_predicate (basic_block bb)
296 bb->aux = XNEW (struct bb_predicate);
297 set_bb_predicate_gimplified_stmts (bb, NULL);
298 set_bb_predicate (bb, boolean_true_node);
301 /* Release the SSA_NAMEs associated with the predicate of basic block BB. */
303 static inline void
304 release_bb_predicate (basic_block bb)
306 gimple_seq stmts = bb_predicate_gimplified_stmts (bb);
307 if (stmts)
309 /* Ensure that these stmts haven't yet been added to a bb. */
310 if (flag_checking)
311 for (gimple_stmt_iterator i = gsi_start (stmts);
312 !gsi_end_p (i); gsi_next (&i))
313 gcc_assert (! gimple_bb (gsi_stmt (i)));
315 /* Discard them. */
316 gimple_seq_discard (stmts);
317 set_bb_predicate_gimplified_stmts (bb, NULL);
321 /* Free the predicate of basic block BB. */
323 static inline void
324 free_bb_predicate (basic_block bb)
326 if (!bb_has_predicate (bb))
327 return;
329 release_bb_predicate (bb);
330 free (bb->aux);
331 bb->aux = NULL;
334 /* Reinitialize predicate of BB with the true predicate. */
336 static inline void
337 reset_bb_predicate (basic_block bb)
339 if (!bb_has_predicate (bb))
340 init_bb_predicate (bb);
341 else
343 release_bb_predicate (bb);
344 set_bb_predicate (bb, boolean_true_node);
348 /* Returns a new SSA_NAME of type TYPE that is assigned the value of
349 the expression EXPR. Inserts the statement created for this
350 computation before GSI and leaves the iterator GSI at the same
351 statement. */
353 static tree
354 ifc_temp_var (tree type, tree expr, gimple_stmt_iterator *gsi)
356 tree new_name = make_temp_ssa_name (type, NULL, "_ifc_");
357 gimple *stmt = gimple_build_assign (new_name, expr);
358 gimple_set_vuse (stmt, gimple_vuse (gsi_stmt (*gsi)));
359 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
360 return new_name;
363 /* Return true when COND is a false predicate. */
365 static inline bool
366 is_false_predicate (tree cond)
368 return (cond != NULL_TREE
369 && (cond == boolean_false_node
370 || integer_zerop (cond)));
373 /* Return true when COND is a true predicate. */
375 static inline bool
376 is_true_predicate (tree cond)
378 return (cond == NULL_TREE
379 || cond == boolean_true_node
380 || integer_onep (cond));
383 /* Returns true when BB has a predicate that is not trivial: true or
384 NULL_TREE. */
386 static inline bool
387 is_predicated (basic_block bb)
389 return !is_true_predicate (bb_predicate (bb));
392 /* Parses the predicate COND and returns its comparison code and
393 operands OP0 and OP1. */
395 static enum tree_code
396 parse_predicate (tree cond, tree *op0, tree *op1)
398 gimple *s;
400 if (TREE_CODE (cond) == SSA_NAME
401 && is_gimple_assign (s = SSA_NAME_DEF_STMT (cond)))
403 if (TREE_CODE_CLASS (gimple_assign_rhs_code (s)) == tcc_comparison)
405 *op0 = gimple_assign_rhs1 (s);
406 *op1 = gimple_assign_rhs2 (s);
407 return gimple_assign_rhs_code (s);
410 else if (gimple_assign_rhs_code (s) == TRUTH_NOT_EXPR)
412 tree op = gimple_assign_rhs1 (s);
413 tree type = TREE_TYPE (op);
414 enum tree_code code = parse_predicate (op, op0, op1);
416 return code == ERROR_MARK ? ERROR_MARK
417 : invert_tree_comparison (code, HONOR_NANS (type));
420 return ERROR_MARK;
423 if (COMPARISON_CLASS_P (cond))
425 *op0 = TREE_OPERAND (cond, 0);
426 *op1 = TREE_OPERAND (cond, 1);
427 return TREE_CODE (cond);
430 return ERROR_MARK;
433 /* Returns the fold of predicate C1 OR C2 at location LOC. */
435 static tree
436 fold_or_predicates (location_t loc, tree c1, tree c2)
438 tree op1a, op1b, op2a, op2b;
439 enum tree_code code1 = parse_predicate (c1, &op1a, &op1b);
440 enum tree_code code2 = parse_predicate (c2, &op2a, &op2b);
442 if (code1 != ERROR_MARK && code2 != ERROR_MARK)
444 tree t = maybe_fold_or_comparisons (boolean_type_node, code1, op1a, op1b,
445 code2, op2a, op2b);
446 if (t)
447 return t;
450 return fold_build2_loc (loc, TRUTH_OR_EXPR, boolean_type_node, c1, c2);
453 /* Returns either a COND_EXPR or the folded expression if the folded
454 expression is a MIN_EXPR, a MAX_EXPR, an ABS_EXPR,
455 a constant or a SSA_NAME. */
457 static tree
458 fold_build_cond_expr (tree type, tree cond, tree rhs, tree lhs)
460 tree rhs1, lhs1, cond_expr;
462 /* If COND is comparison r != 0 and r has boolean type, convert COND
463 to SSA_NAME to accept by vect bool pattern. */
464 if (TREE_CODE (cond) == NE_EXPR)
466 tree op0 = TREE_OPERAND (cond, 0);
467 tree op1 = TREE_OPERAND (cond, 1);
468 if (TREE_CODE (op0) == SSA_NAME
469 && TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE
470 && (integer_zerop (op1)))
471 cond = op0;
473 cond_expr = fold_ternary (COND_EXPR, type, cond, rhs, lhs);
475 if (cond_expr == NULL_TREE)
476 return build3 (COND_EXPR, type, cond, rhs, lhs);
478 STRIP_USELESS_TYPE_CONVERSION (cond_expr);
480 if (is_gimple_val (cond_expr))
481 return cond_expr;
483 if (TREE_CODE (cond_expr) == ABS_EXPR)
485 rhs1 = TREE_OPERAND (cond_expr, 1);
486 STRIP_USELESS_TYPE_CONVERSION (rhs1);
487 if (is_gimple_val (rhs1))
488 return build1 (ABS_EXPR, type, rhs1);
491 if (TREE_CODE (cond_expr) == MIN_EXPR
492 || TREE_CODE (cond_expr) == MAX_EXPR)
494 lhs1 = TREE_OPERAND (cond_expr, 0);
495 STRIP_USELESS_TYPE_CONVERSION (lhs1);
496 rhs1 = TREE_OPERAND (cond_expr, 1);
497 STRIP_USELESS_TYPE_CONVERSION (rhs1);
498 if (is_gimple_val (rhs1) && is_gimple_val (lhs1))
499 return build2 (TREE_CODE (cond_expr), type, lhs1, rhs1);
501 return build3 (COND_EXPR, type, cond, rhs, lhs);
504 /* Add condition NC to the predicate list of basic block BB. LOOP is
505 the loop to be if-converted. Use predicate of cd-equivalent block
506 for join bb if it exists: we call basic blocks bb1 and bb2
507 cd-equivalent if they are executed under the same condition. */
509 static inline void
510 add_to_predicate_list (class loop *loop, basic_block bb, tree nc)
512 tree bc, *tp;
513 basic_block dom_bb;
515 if (is_true_predicate (nc))
516 return;
518 /* If dominance tells us this basic block is always executed,
519 don't record any predicates for it. */
520 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
521 return;
523 dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb);
524 /* We use notion of cd equivalence to get simpler predicate for
525 join block, e.g. if join block has 2 predecessors with predicates
526 p1 & p2 and p1 & !p2, we'd like to get p1 for it instead of
527 p1 & p2 | p1 & !p2. */
528 if (dom_bb != loop->header
529 && get_immediate_dominator (CDI_POST_DOMINATORS, dom_bb) == bb)
531 gcc_assert (flow_bb_inside_loop_p (loop, dom_bb));
532 bc = bb_predicate (dom_bb);
533 if (!is_true_predicate (bc))
534 set_bb_predicate (bb, bc);
535 else
536 gcc_assert (is_true_predicate (bb_predicate (bb)));
537 if (dump_file && (dump_flags & TDF_DETAILS))
538 fprintf (dump_file, "Use predicate of bb#%d for bb#%d\n",
539 dom_bb->index, bb->index);
540 return;
543 if (!is_predicated (bb))
544 bc = nc;
545 else
547 bc = bb_predicate (bb);
548 bc = fold_or_predicates (EXPR_LOCATION (bc), nc, bc);
549 if (is_true_predicate (bc))
551 reset_bb_predicate (bb);
552 return;
556 /* Allow a TRUTH_NOT_EXPR around the main predicate. */
557 if (TREE_CODE (bc) == TRUTH_NOT_EXPR)
558 tp = &TREE_OPERAND (bc, 0);
559 else
560 tp = &bc;
561 if (!is_gimple_condexpr (*tp))
563 gimple_seq stmts;
564 *tp = force_gimple_operand_1 (*tp, &stmts, is_gimple_condexpr, NULL_TREE);
565 add_bb_predicate_gimplified_stmts (bb, stmts);
567 set_bb_predicate (bb, bc);
570 /* Add the condition COND to the previous condition PREV_COND, and add
571 this to the predicate list of the destination of edge E. LOOP is
572 the loop to be if-converted. */
574 static void
575 add_to_dst_predicate_list (class loop *loop, edge e,
576 tree prev_cond, tree cond)
578 if (!flow_bb_inside_loop_p (loop, e->dest))
579 return;
581 if (!is_true_predicate (prev_cond))
582 cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
583 prev_cond, cond);
585 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, e->dest))
586 add_to_predicate_list (loop, e->dest, cond);
589 /* Return true if one of the successor edges of BB exits LOOP. */
591 static bool
592 bb_with_exit_edge_p (class loop *loop, basic_block bb)
594 edge e;
595 edge_iterator ei;
597 FOR_EACH_EDGE (e, ei, bb->succs)
598 if (loop_exit_edge_p (loop, e))
599 return true;
601 return false;
604 /* Given PHI which has more than two arguments, this function checks if
605 it's if-convertible by degenerating its arguments. Specifically, if
606 below two conditions are satisfied:
608 1) Number of PHI arguments with different values equals to 2 and one
609 argument has the only occurrence.
610 2) The edge corresponding to the unique argument isn't critical edge.
612 Such PHI can be handled as PHIs have only two arguments. For example,
613 below PHI:
615 res = PHI <A_1(e1), A_1(e2), A_2(e3)>;
617 can be transformed into:
619 res = (predicate of e3) ? A_2 : A_1;
621 Return TRUE if it is the case, FALSE otherwise. */
623 static bool
624 phi_convertible_by_degenerating_args (gphi *phi)
626 edge e;
627 tree arg, t1 = NULL, t2 = NULL;
628 unsigned int i, i1 = 0, i2 = 0, n1 = 0, n2 = 0;
629 unsigned int num_args = gimple_phi_num_args (phi);
631 gcc_assert (num_args > 2);
633 for (i = 0; i < num_args; i++)
635 arg = gimple_phi_arg_def (phi, i);
636 if (t1 == NULL || operand_equal_p (t1, arg, 0))
638 n1++;
639 i1 = i;
640 t1 = arg;
642 else if (t2 == NULL || operand_equal_p (t2, arg, 0))
644 n2++;
645 i2 = i;
646 t2 = arg;
648 else
649 return false;
652 if (n1 != 1 && n2 != 1)
653 return false;
655 /* Check if the edge corresponding to the unique arg is critical. */
656 e = gimple_phi_arg_edge (phi, (n1 == 1) ? i1 : i2);
657 if (EDGE_COUNT (e->src->succs) > 1)
658 return false;
660 return true;
663 /* Return true when PHI is if-convertible. PHI is part of loop LOOP
664 and it belongs to basic block BB. Note at this point, it is sure
665 that PHI is if-convertible. This function updates global variable
666 ANY_COMPLICATED_PHI if PHI is complicated. */
668 static bool
669 if_convertible_phi_p (class loop *loop, basic_block bb, gphi *phi)
671 if (dump_file && (dump_flags & TDF_DETAILS))
673 fprintf (dump_file, "-------------------------\n");
674 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
677 if (bb != loop->header
678 && gimple_phi_num_args (phi) > 2
679 && !phi_convertible_by_degenerating_args (phi))
680 any_complicated_phi = true;
682 return true;
685 /* Records the status of a data reference. This struct is attached to
686 each DR->aux field. */
688 struct ifc_dr {
689 bool rw_unconditionally;
690 bool w_unconditionally;
691 bool written_at_least_once;
693 tree rw_predicate;
694 tree w_predicate;
695 tree base_w_predicate;
698 #define IFC_DR(DR) ((struct ifc_dr *) (DR)->aux)
699 #define DR_BASE_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->written_at_least_once)
700 #define DR_RW_UNCONDITIONALLY(DR) (IFC_DR (DR)->rw_unconditionally)
701 #define DR_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->w_unconditionally)
703 /* Iterates over DR's and stores refs, DR and base refs, DR pairs in
704 HASH tables. While storing them in HASH table, it checks if the
705 reference is unconditionally read or written and stores that as a flag
706 information. For base reference it checks if it is written atlest once
707 unconditionally and stores it as flag information along with DR.
708 In other words for every data reference A in STMT there exist other
709 accesses to a data reference with the same base with predicates that
710 add up (OR-up) to the true predicate: this ensures that the data
711 reference A is touched (read or written) on every iteration of the
712 if-converted loop. */
713 static void
714 hash_memrefs_baserefs_and_store_DRs_read_written_info (data_reference_p a)
717 data_reference_p *master_dr, *base_master_dr;
718 tree base_ref = DR_BASE_OBJECT (a);
719 innermost_loop_behavior *innermost = &DR_INNERMOST (a);
720 tree ca = bb_predicate (gimple_bb (DR_STMT (a)));
721 bool exist1, exist2;
723 master_dr = &innermost_DR_map->get_or_insert (innermost, &exist1);
724 if (!exist1)
725 *master_dr = a;
727 if (DR_IS_WRITE (a))
729 IFC_DR (*master_dr)->w_predicate
730 = fold_or_predicates (UNKNOWN_LOCATION, ca,
731 IFC_DR (*master_dr)->w_predicate);
732 if (is_true_predicate (IFC_DR (*master_dr)->w_predicate))
733 DR_W_UNCONDITIONALLY (*master_dr) = true;
735 IFC_DR (*master_dr)->rw_predicate
736 = fold_or_predicates (UNKNOWN_LOCATION, ca,
737 IFC_DR (*master_dr)->rw_predicate);
738 if (is_true_predicate (IFC_DR (*master_dr)->rw_predicate))
739 DR_RW_UNCONDITIONALLY (*master_dr) = true;
741 if (DR_IS_WRITE (a))
743 base_master_dr = &baseref_DR_map->get_or_insert (base_ref, &exist2);
744 if (!exist2)
745 *base_master_dr = a;
746 IFC_DR (*base_master_dr)->base_w_predicate
747 = fold_or_predicates (UNKNOWN_LOCATION, ca,
748 IFC_DR (*base_master_dr)->base_w_predicate);
749 if (is_true_predicate (IFC_DR (*base_master_dr)->base_w_predicate))
750 DR_BASE_W_UNCONDITIONALLY (*base_master_dr) = true;
754 /* Return TRUE if can prove the index IDX of an array reference REF is
755 within array bound. Return false otherwise. */
757 static bool
758 idx_within_array_bound (tree ref, tree *idx, void *dta)
760 wi::overflow_type overflow;
761 widest_int niter, valid_niter, delta, wi_step;
762 tree ev, init, step;
763 tree low, high;
764 class loop *loop = (class loop*) dta;
766 /* Only support within-bound access for array references. */
767 if (TREE_CODE (ref) != ARRAY_REF)
768 return false;
770 /* For arrays at the end of the structure, we are not guaranteed that they
771 do not really extend over their declared size. However, for arrays of
772 size greater than one, this is unlikely to be intended. */
773 if (array_at_struct_end_p (ref))
774 return false;
776 ev = analyze_scalar_evolution (loop, *idx);
777 ev = instantiate_parameters (loop, ev);
778 init = initial_condition (ev);
779 step = evolution_part_in_loop_num (ev, loop->num);
781 if (!init || TREE_CODE (init) != INTEGER_CST
782 || (step && TREE_CODE (step) != INTEGER_CST))
783 return false;
785 low = array_ref_low_bound (ref);
786 high = array_ref_up_bound (ref);
788 /* The case of nonconstant bounds could be handled, but it would be
789 complicated. */
790 if (TREE_CODE (low) != INTEGER_CST
791 || !high || TREE_CODE (high) != INTEGER_CST)
792 return false;
794 /* Check if the intial idx is within bound. */
795 if (wi::to_widest (init) < wi::to_widest (low)
796 || wi::to_widest (init) > wi::to_widest (high))
797 return false;
799 /* The idx is always within bound. */
800 if (!step || integer_zerop (step))
801 return true;
803 if (!max_loop_iterations (loop, &niter))
804 return false;
806 if (wi::to_widest (step) < 0)
808 delta = wi::to_widest (init) - wi::to_widest (low);
809 wi_step = -wi::to_widest (step);
811 else
813 delta = wi::to_widest (high) - wi::to_widest (init);
814 wi_step = wi::to_widest (step);
817 valid_niter = wi::div_floor (delta, wi_step, SIGNED, &overflow);
818 /* The iteration space of idx is within array bound. */
819 if (!overflow && niter <= valid_niter)
820 return true;
822 return false;
825 /* Return TRUE if ref is a within bound array reference. */
827 static bool
828 ref_within_array_bound (gimple *stmt, tree ref)
830 class loop *loop = loop_containing_stmt (stmt);
832 gcc_assert (loop != NULL);
833 return for_each_index (&ref, idx_within_array_bound, loop);
837 /* Given a memory reference expression T, return TRUE if base object
838 it refers to is writable. The base object of a memory reference
839 is the main object being referenced, which is returned by function
840 get_base_address. */
842 static bool
843 base_object_writable (tree ref)
845 tree base_tree = get_base_address (ref);
847 return (base_tree
848 && DECL_P (base_tree)
849 && decl_binds_to_current_def_p (base_tree)
850 && !TREE_READONLY (base_tree));
853 /* Return true when the memory references of STMT won't trap in the
854 if-converted code. There are two things that we have to check for:
856 - writes to memory occur to writable memory: if-conversion of
857 memory writes transforms the conditional memory writes into
858 unconditional writes, i.e. "if (cond) A[i] = foo" is transformed
859 into "A[i] = cond ? foo : A[i]", and as the write to memory may not
860 be executed at all in the original code, it may be a readonly
861 memory. To check that A is not const-qualified, we check that
862 there exists at least an unconditional write to A in the current
863 function.
865 - reads or writes to memory are valid memory accesses for every
866 iteration. To check that the memory accesses are correctly formed
867 and that we are allowed to read and write in these locations, we
868 check that the memory accesses to be if-converted occur at every
869 iteration unconditionally.
871 Returns true for the memory reference in STMT, same memory reference
872 is read or written unconditionally atleast once and the base memory
873 reference is written unconditionally once. This is to check reference
874 will not write fault. Also retuns true if the memory reference is
875 unconditionally read once then we are conditionally writing to memory
876 which is defined as read and write and is bound to the definition
877 we are seeing. */
878 static bool
879 ifcvt_memrefs_wont_trap (gimple *stmt, vec<data_reference_p> drs)
881 /* If DR didn't see a reference here we can't use it to tell
882 whether the ref traps or not. */
883 if (gimple_uid (stmt) == 0)
884 return false;
886 data_reference_p *master_dr, *base_master_dr;
887 data_reference_p a = drs[gimple_uid (stmt) - 1];
889 tree base = DR_BASE_OBJECT (a);
890 innermost_loop_behavior *innermost = &DR_INNERMOST (a);
892 gcc_assert (DR_STMT (a) == stmt);
893 gcc_assert (DR_BASE_ADDRESS (a) || DR_OFFSET (a)
894 || DR_INIT (a) || DR_STEP (a));
896 master_dr = innermost_DR_map->get (innermost);
897 gcc_assert (master_dr != NULL);
899 base_master_dr = baseref_DR_map->get (base);
901 /* If a is unconditionally written to it doesn't trap. */
902 if (DR_W_UNCONDITIONALLY (*master_dr))
903 return true;
905 /* If a is unconditionally accessed then ...
907 Even a is conditional access, we can treat it as an unconditional
908 one if it's an array reference and all its index are within array
909 bound. */
910 if (DR_RW_UNCONDITIONALLY (*master_dr)
911 || ref_within_array_bound (stmt, DR_REF (a)))
913 /* an unconditional read won't trap. */
914 if (DR_IS_READ (a))
915 return true;
917 /* an unconditionaly write won't trap if the base is written
918 to unconditionally. */
919 if (base_master_dr
920 && DR_BASE_W_UNCONDITIONALLY (*base_master_dr))
921 return flag_store_data_races;
922 /* or the base is known to be not readonly. */
923 else if (base_object_writable (DR_REF (a)))
924 return flag_store_data_races;
927 return false;
930 /* Return true if STMT could be converted into a masked load or store
931 (conditional load or store based on a mask computed from bb predicate). */
933 static bool
934 ifcvt_can_use_mask_load_store (gimple *stmt)
936 /* Check whether this is a load or store. */
937 tree lhs = gimple_assign_lhs (stmt);
938 bool is_load;
939 tree ref;
940 if (gimple_store_p (stmt))
942 if (!is_gimple_val (gimple_assign_rhs1 (stmt)))
943 return false;
944 is_load = false;
945 ref = lhs;
947 else if (gimple_assign_load_p (stmt))
949 is_load = true;
950 ref = gimple_assign_rhs1 (stmt);
952 else
953 return false;
955 if (may_be_nonaddressable_p (ref))
956 return false;
958 /* Mask should be integer mode of the same size as the load/store
959 mode. */
960 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
961 if (!int_mode_for_mode (mode).exists () || VECTOR_MODE_P (mode))
962 return false;
964 if (can_vec_mask_load_store_p (mode, VOIDmode, is_load))
965 return true;
967 return false;
970 /* Return true if STMT could be converted from an operation that is
971 unconditional to one that is conditional on a bb predicate mask. */
973 static bool
974 ifcvt_can_predicate (gimple *stmt)
976 basic_block bb = gimple_bb (stmt);
978 if (!(flag_tree_loop_vectorize || bb->loop_father->force_vectorize)
979 || bb->loop_father->dont_vectorize
980 || gimple_has_volatile_ops (stmt))
981 return false;
983 if (gimple_assign_single_p (stmt))
984 return ifcvt_can_use_mask_load_store (stmt);
986 tree_code code = gimple_assign_rhs_code (stmt);
987 tree lhs_type = TREE_TYPE (gimple_assign_lhs (stmt));
988 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
989 if (!types_compatible_p (lhs_type, rhs_type))
990 return false;
991 internal_fn cond_fn = get_conditional_internal_fn (code);
992 return (cond_fn != IFN_LAST
993 && vectorized_internal_fn_supported_p (cond_fn, lhs_type));
996 /* Return true when STMT is if-convertible.
998 GIMPLE_ASSIGN statement is not if-convertible if,
999 - it is not movable,
1000 - it could trap,
1001 - LHS is not var decl. */
1003 static bool
1004 if_convertible_gimple_assign_stmt_p (gimple *stmt,
1005 vec<data_reference_p> refs)
1007 tree lhs = gimple_assign_lhs (stmt);
1009 if (dump_file && (dump_flags & TDF_DETAILS))
1011 fprintf (dump_file, "-------------------------\n");
1012 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1015 if (!is_gimple_reg_type (TREE_TYPE (lhs)))
1016 return false;
1018 /* Some of these constrains might be too conservative. */
1019 if (stmt_ends_bb_p (stmt)
1020 || gimple_has_volatile_ops (stmt)
1021 || (TREE_CODE (lhs) == SSA_NAME
1022 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
1023 || gimple_has_side_effects (stmt))
1025 if (dump_file && (dump_flags & TDF_DETAILS))
1026 fprintf (dump_file, "stmt not suitable for ifcvt\n");
1027 return false;
1030 /* tree-into-ssa.c uses GF_PLF_1, so avoid it, because
1031 in between if_convertible_loop_p and combine_blocks
1032 we can perform loop versioning. */
1033 gimple_set_plf (stmt, GF_PLF_2, false);
1035 if ((! gimple_vuse (stmt)
1036 || gimple_could_trap_p_1 (stmt, false, false)
1037 || ! ifcvt_memrefs_wont_trap (stmt, refs))
1038 && gimple_could_trap_p (stmt))
1040 if (ifcvt_can_predicate (stmt))
1042 gimple_set_plf (stmt, GF_PLF_2, true);
1043 need_to_predicate = true;
1044 return true;
1046 if (dump_file && (dump_flags & TDF_DETAILS))
1047 fprintf (dump_file, "tree could trap...\n");
1048 return false;
1050 else if ((INTEGRAL_TYPE_P (TREE_TYPE (lhs))
1051 || POINTER_TYPE_P (TREE_TYPE (lhs)))
1052 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs))
1053 && arith_code_with_undefined_signed_overflow
1054 (gimple_assign_rhs_code (stmt)))
1055 /* We have to rewrite stmts with undefined overflow. */
1056 need_to_rewrite_undefined = true;
1058 /* When if-converting stores force versioning, likewise if we
1059 ended up generating store data races. */
1060 if (gimple_vdef (stmt))
1061 need_to_predicate = true;
1063 return true;
1066 /* Return true when STMT is if-convertible.
1068 A statement is if-convertible if:
1069 - it is an if-convertible GIMPLE_ASSIGN,
1070 - it is a GIMPLE_LABEL or a GIMPLE_COND,
1071 - it is builtins call. */
1073 static bool
1074 if_convertible_stmt_p (gimple *stmt, vec<data_reference_p> refs)
1076 switch (gimple_code (stmt))
1078 case GIMPLE_LABEL:
1079 case GIMPLE_DEBUG:
1080 case GIMPLE_COND:
1081 return true;
1083 case GIMPLE_ASSIGN:
1084 return if_convertible_gimple_assign_stmt_p (stmt, refs);
1086 case GIMPLE_CALL:
1088 tree fndecl = gimple_call_fndecl (stmt);
1089 if (fndecl)
1091 int flags = gimple_call_flags (stmt);
1092 if ((flags & ECF_CONST)
1093 && !(flags & ECF_LOOPING_CONST_OR_PURE)
1094 /* We can only vectorize some builtins at the moment,
1095 so restrict if-conversion to those. */
1096 && fndecl_built_in_p (fndecl))
1097 return true;
1099 return false;
1102 default:
1103 /* Don't know what to do with 'em so don't do anything. */
1104 if (dump_file && (dump_flags & TDF_DETAILS))
1106 fprintf (dump_file, "don't know what to do\n");
1107 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1109 return false;
1112 return true;
1115 /* Assumes that BB has more than 1 predecessors.
1116 Returns false if at least one successor is not on critical edge
1117 and true otherwise. */
1119 static inline bool
1120 all_preds_critical_p (basic_block bb)
1122 edge e;
1123 edge_iterator ei;
1125 FOR_EACH_EDGE (e, ei, bb->preds)
1126 if (EDGE_COUNT (e->src->succs) == 1)
1127 return false;
1128 return true;
1131 /* Return true when BB is if-convertible. This routine does not check
1132 basic block's statements and phis.
1134 A basic block is not if-convertible if:
1135 - it is non-empty and it is after the exit block (in BFS order),
1136 - it is after the exit block but before the latch,
1137 - its edges are not normal.
1139 EXIT_BB is the basic block containing the exit of the LOOP. BB is
1140 inside LOOP. */
1142 static bool
1143 if_convertible_bb_p (class loop *loop, basic_block bb, basic_block exit_bb)
1145 edge e;
1146 edge_iterator ei;
1148 if (dump_file && (dump_flags & TDF_DETAILS))
1149 fprintf (dump_file, "----------[%d]-------------\n", bb->index);
1151 if (EDGE_COUNT (bb->succs) > 2)
1152 return false;
1154 gimple *last = last_stmt (bb);
1155 if (gcall *call = safe_dyn_cast <gcall *> (last))
1156 if (gimple_call_ctrl_altering_p (call))
1157 return false;
1159 if (exit_bb)
1161 if (bb != loop->latch)
1163 if (dump_file && (dump_flags & TDF_DETAILS))
1164 fprintf (dump_file, "basic block after exit bb but before latch\n");
1165 return false;
1167 else if (!empty_block_p (bb))
1169 if (dump_file && (dump_flags & TDF_DETAILS))
1170 fprintf (dump_file, "non empty basic block after exit bb\n");
1171 return false;
1173 else if (bb == loop->latch
1174 && bb != exit_bb
1175 && !dominated_by_p (CDI_DOMINATORS, bb, exit_bb))
1177 if (dump_file && (dump_flags & TDF_DETAILS))
1178 fprintf (dump_file, "latch is not dominated by exit_block\n");
1179 return false;
1183 /* Be less adventurous and handle only normal edges. */
1184 FOR_EACH_EDGE (e, ei, bb->succs)
1185 if (e->flags & (EDGE_EH | EDGE_ABNORMAL | EDGE_IRREDUCIBLE_LOOP))
1187 if (dump_file && (dump_flags & TDF_DETAILS))
1188 fprintf (dump_file, "Difficult to handle edges\n");
1189 return false;
1192 return true;
1195 /* Return true when all predecessor blocks of BB are visited. The
1196 VISITED bitmap keeps track of the visited blocks. */
1198 static bool
1199 pred_blocks_visited_p (basic_block bb, bitmap *visited)
1201 edge e;
1202 edge_iterator ei;
1203 FOR_EACH_EDGE (e, ei, bb->preds)
1204 if (!bitmap_bit_p (*visited, e->src->index))
1205 return false;
1207 return true;
1210 /* Get body of a LOOP in suitable order for if-conversion. It is
1211 caller's responsibility to deallocate basic block list.
1212 If-conversion suitable order is, breadth first sort (BFS) order
1213 with an additional constraint: select a block only if all its
1214 predecessors are already selected. */
1216 static basic_block *
1217 get_loop_body_in_if_conv_order (const class loop *loop)
1219 basic_block *blocks, *blocks_in_bfs_order;
1220 basic_block bb;
1221 bitmap visited;
1222 unsigned int index = 0;
1223 unsigned int visited_count = 0;
1225 gcc_assert (loop->num_nodes);
1226 gcc_assert (loop->latch != EXIT_BLOCK_PTR_FOR_FN (cfun));
1228 blocks = XCNEWVEC (basic_block, loop->num_nodes);
1229 visited = BITMAP_ALLOC (NULL);
1231 blocks_in_bfs_order = get_loop_body_in_bfs_order (loop);
1233 index = 0;
1234 while (index < loop->num_nodes)
1236 bb = blocks_in_bfs_order [index];
1238 if (bb->flags & BB_IRREDUCIBLE_LOOP)
1240 free (blocks_in_bfs_order);
1241 BITMAP_FREE (visited);
1242 free (blocks);
1243 return NULL;
1246 if (!bitmap_bit_p (visited, bb->index))
1248 if (pred_blocks_visited_p (bb, &visited)
1249 || bb == loop->header)
1251 /* This block is now visited. */
1252 bitmap_set_bit (visited, bb->index);
1253 blocks[visited_count++] = bb;
1257 index++;
1259 if (index == loop->num_nodes
1260 && visited_count != loop->num_nodes)
1261 /* Not done yet. */
1262 index = 0;
1264 free (blocks_in_bfs_order);
1265 BITMAP_FREE (visited);
1266 return blocks;
1269 /* Returns true when the analysis of the predicates for all the basic
1270 blocks in LOOP succeeded.
1272 predicate_bbs first allocates the predicates of the basic blocks.
1273 These fields are then initialized with the tree expressions
1274 representing the predicates under which a basic block is executed
1275 in the LOOP. As the loop->header is executed at each iteration, it
1276 has the "true" predicate. Other statements executed under a
1277 condition are predicated with that condition, for example
1279 | if (x)
1280 | S1;
1281 | else
1282 | S2;
1284 S1 will be predicated with "x", and
1285 S2 will be predicated with "!x". */
1287 static void
1288 predicate_bbs (loop_p loop)
1290 unsigned int i;
1292 for (i = 0; i < loop->num_nodes; i++)
1293 init_bb_predicate (ifc_bbs[i]);
1295 for (i = 0; i < loop->num_nodes; i++)
1297 basic_block bb = ifc_bbs[i];
1298 tree cond;
1299 gimple *stmt;
1301 /* The loop latch and loop exit block are always executed and
1302 have no extra conditions to be processed: skip them. */
1303 if (bb == loop->latch
1304 || bb_with_exit_edge_p (loop, bb))
1306 reset_bb_predicate (bb);
1307 continue;
1310 cond = bb_predicate (bb);
1311 stmt = last_stmt (bb);
1312 if (stmt && gimple_code (stmt) == GIMPLE_COND)
1314 tree c2;
1315 edge true_edge, false_edge;
1316 location_t loc = gimple_location (stmt);
1317 tree c = build2_loc (loc, gimple_cond_code (stmt),
1318 boolean_type_node,
1319 gimple_cond_lhs (stmt),
1320 gimple_cond_rhs (stmt));
1322 /* Add new condition into destination's predicate list. */
1323 extract_true_false_edges_from_block (gimple_bb (stmt),
1324 &true_edge, &false_edge);
1326 /* If C is true, then TRUE_EDGE is taken. */
1327 add_to_dst_predicate_list (loop, true_edge, unshare_expr (cond),
1328 unshare_expr (c));
1330 /* If C is false, then FALSE_EDGE is taken. */
1331 c2 = build1_loc (loc, TRUTH_NOT_EXPR, boolean_type_node,
1332 unshare_expr (c));
1333 add_to_dst_predicate_list (loop, false_edge,
1334 unshare_expr (cond), c2);
1336 cond = NULL_TREE;
1339 /* If current bb has only one successor, then consider it as an
1340 unconditional goto. */
1341 if (single_succ_p (bb))
1343 basic_block bb_n = single_succ (bb);
1345 /* The successor bb inherits the predicate of its
1346 predecessor. If there is no predicate in the predecessor
1347 bb, then consider the successor bb as always executed. */
1348 if (cond == NULL_TREE)
1349 cond = boolean_true_node;
1351 add_to_predicate_list (loop, bb_n, cond);
1355 /* The loop header is always executed. */
1356 reset_bb_predicate (loop->header);
1357 gcc_assert (bb_predicate_gimplified_stmts (loop->header) == NULL
1358 && bb_predicate_gimplified_stmts (loop->latch) == NULL);
1361 /* Build region by adding loop pre-header and post-header blocks. */
1363 static vec<basic_block>
1364 build_region (class loop *loop)
1366 vec<basic_block> region = vNULL;
1367 basic_block exit_bb = NULL;
1369 gcc_assert (ifc_bbs);
1370 /* The first element is loop pre-header. */
1371 region.safe_push (loop_preheader_edge (loop)->src);
1373 for (unsigned int i = 0; i < loop->num_nodes; i++)
1375 basic_block bb = ifc_bbs[i];
1376 region.safe_push (bb);
1377 /* Find loop postheader. */
1378 edge e;
1379 edge_iterator ei;
1380 FOR_EACH_EDGE (e, ei, bb->succs)
1381 if (loop_exit_edge_p (loop, e))
1383 exit_bb = e->dest;
1384 break;
1387 /* The last element is loop post-header. */
1388 gcc_assert (exit_bb);
1389 region.safe_push (exit_bb);
1390 return region;
1393 /* Return true when LOOP is if-convertible. This is a helper function
1394 for if_convertible_loop_p. REFS and DDRS are initialized and freed
1395 in if_convertible_loop_p. */
1397 static bool
1398 if_convertible_loop_p_1 (class loop *loop, vec<data_reference_p> *refs)
1400 unsigned int i;
1401 basic_block exit_bb = NULL;
1402 vec<basic_block> region;
1404 if (find_data_references_in_loop (loop, refs) == chrec_dont_know)
1405 return false;
1407 calculate_dominance_info (CDI_DOMINATORS);
1409 /* Allow statements that can be handled during if-conversion. */
1410 ifc_bbs = get_loop_body_in_if_conv_order (loop);
1411 if (!ifc_bbs)
1413 if (dump_file && (dump_flags & TDF_DETAILS))
1414 fprintf (dump_file, "Irreducible loop\n");
1415 return false;
1418 for (i = 0; i < loop->num_nodes; i++)
1420 basic_block bb = ifc_bbs[i];
1422 if (!if_convertible_bb_p (loop, bb, exit_bb))
1423 return false;
1425 if (bb_with_exit_edge_p (loop, bb))
1426 exit_bb = bb;
1429 for (i = 0; i < loop->num_nodes; i++)
1431 basic_block bb = ifc_bbs[i];
1432 gimple_stmt_iterator gsi;
1434 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1435 switch (gimple_code (gsi_stmt (gsi)))
1437 case GIMPLE_LABEL:
1438 case GIMPLE_ASSIGN:
1439 case GIMPLE_CALL:
1440 case GIMPLE_DEBUG:
1441 case GIMPLE_COND:
1442 gimple_set_uid (gsi_stmt (gsi), 0);
1443 break;
1444 default:
1445 return false;
1449 data_reference_p dr;
1451 innermost_DR_map
1452 = new hash_map<innermost_loop_behavior_hash, data_reference_p>;
1453 baseref_DR_map = new hash_map<tree_operand_hash, data_reference_p>;
1455 /* Compute post-dominator tree locally. */
1456 region = build_region (loop);
1457 calculate_dominance_info_for_region (CDI_POST_DOMINATORS, region);
1459 predicate_bbs (loop);
1461 /* Free post-dominator tree since it is not used after predication. */
1462 free_dominance_info_for_region (cfun, CDI_POST_DOMINATORS, region);
1463 region.release ();
1465 for (i = 0; refs->iterate (i, &dr); i++)
1467 tree ref = DR_REF (dr);
1469 dr->aux = XNEW (struct ifc_dr);
1470 DR_BASE_W_UNCONDITIONALLY (dr) = false;
1471 DR_RW_UNCONDITIONALLY (dr) = false;
1472 DR_W_UNCONDITIONALLY (dr) = false;
1473 IFC_DR (dr)->rw_predicate = boolean_false_node;
1474 IFC_DR (dr)->w_predicate = boolean_false_node;
1475 IFC_DR (dr)->base_w_predicate = boolean_false_node;
1476 if (gimple_uid (DR_STMT (dr)) == 0)
1477 gimple_set_uid (DR_STMT (dr), i + 1);
1479 /* If DR doesn't have innermost loop behavior or it's a compound
1480 memory reference, we synthesize its innermost loop behavior
1481 for hashing. */
1482 if (TREE_CODE (ref) == COMPONENT_REF
1483 || TREE_CODE (ref) == IMAGPART_EXPR
1484 || TREE_CODE (ref) == REALPART_EXPR
1485 || !(DR_BASE_ADDRESS (dr) || DR_OFFSET (dr)
1486 || DR_INIT (dr) || DR_STEP (dr)))
1488 while (TREE_CODE (ref) == COMPONENT_REF
1489 || TREE_CODE (ref) == IMAGPART_EXPR
1490 || TREE_CODE (ref) == REALPART_EXPR)
1491 ref = TREE_OPERAND (ref, 0);
1493 memset (&DR_INNERMOST (dr), 0, sizeof (DR_INNERMOST (dr)));
1494 DR_BASE_ADDRESS (dr) = ref;
1496 hash_memrefs_baserefs_and_store_DRs_read_written_info (dr);
1499 for (i = 0; i < loop->num_nodes; i++)
1501 basic_block bb = ifc_bbs[i];
1502 gimple_stmt_iterator itr;
1504 /* Check the if-convertibility of statements in predicated BBs. */
1505 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1506 for (itr = gsi_start_bb (bb); !gsi_end_p (itr); gsi_next (&itr))
1507 if (!if_convertible_stmt_p (gsi_stmt (itr), *refs))
1508 return false;
1511 /* Checking PHIs needs to be done after stmts, as the fact whether there
1512 are any masked loads or stores affects the tests. */
1513 for (i = 0; i < loop->num_nodes; i++)
1515 basic_block bb = ifc_bbs[i];
1516 gphi_iterator itr;
1518 for (itr = gsi_start_phis (bb); !gsi_end_p (itr); gsi_next (&itr))
1519 if (!if_convertible_phi_p (loop, bb, itr.phi ()))
1520 return false;
1523 if (dump_file)
1524 fprintf (dump_file, "Applying if-conversion\n");
1526 return true;
1529 /* Return true when LOOP is if-convertible.
1530 LOOP is if-convertible if:
1531 - it is innermost,
1532 - it has two or more basic blocks,
1533 - it has only one exit,
1534 - loop header is not the exit edge,
1535 - if its basic blocks and phi nodes are if convertible. */
1537 static bool
1538 if_convertible_loop_p (class loop *loop)
1540 edge e;
1541 edge_iterator ei;
1542 bool res = false;
1543 vec<data_reference_p> refs;
1545 /* Handle only innermost loop. */
1546 if (!loop || loop->inner)
1548 if (dump_file && (dump_flags & TDF_DETAILS))
1549 fprintf (dump_file, "not innermost loop\n");
1550 return false;
1553 /* If only one block, no need for if-conversion. */
1554 if (loop->num_nodes <= 2)
1556 if (dump_file && (dump_flags & TDF_DETAILS))
1557 fprintf (dump_file, "less than 2 basic blocks\n");
1558 return false;
1561 /* More than one loop exit is too much to handle. */
1562 if (!single_exit (loop))
1564 if (dump_file && (dump_flags & TDF_DETAILS))
1565 fprintf (dump_file, "multiple exits\n");
1566 return false;
1569 /* If one of the loop header's edge is an exit edge then do not
1570 apply if-conversion. */
1571 FOR_EACH_EDGE (e, ei, loop->header->succs)
1572 if (loop_exit_edge_p (loop, e))
1573 return false;
1575 refs.create (5);
1576 res = if_convertible_loop_p_1 (loop, &refs);
1578 data_reference_p dr;
1579 unsigned int i;
1580 for (i = 0; refs.iterate (i, &dr); i++)
1581 free (dr->aux);
1583 free_data_refs (refs);
1585 delete innermost_DR_map;
1586 innermost_DR_map = NULL;
1588 delete baseref_DR_map;
1589 baseref_DR_map = NULL;
1591 return res;
1594 /* Return reduc_1 if has_nop.
1596 if (...)
1597 tmp1 = (unsigned type) reduc_1;
1598 tmp2 = tmp1 + rhs2;
1599 reduc_3 = (signed type) tmp2. */
1600 static tree
1601 strip_nop_cond_scalar_reduction (bool has_nop, tree op)
1603 if (!has_nop)
1604 return op;
1606 if (TREE_CODE (op) != SSA_NAME)
1607 return NULL_TREE;
1609 gassign *stmt = safe_dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op));
1610 if (!stmt
1611 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt))
1612 || !tree_nop_conversion_p (TREE_TYPE (op), TREE_TYPE
1613 (gimple_assign_rhs1 (stmt))))
1614 return NULL_TREE;
1616 return gimple_assign_rhs1 (stmt);
1619 /* Returns true if def-stmt for phi argument ARG is simple increment/decrement
1620 which is in predicated basic block.
1621 In fact, the following PHI pattern is searching:
1622 loop-header:
1623 reduc_1 = PHI <..., reduc_2>
1625 if (...)
1626 reduc_3 = ...
1627 reduc_2 = PHI <reduc_1, reduc_3>
1629 ARG_0 and ARG_1 are correspondent PHI arguments.
1630 REDUC, OP0 and OP1 contain reduction stmt and its operands.
1631 EXTENDED is true if PHI has > 2 arguments. */
1633 static bool
1634 is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1,
1635 tree *op0, tree *op1, bool extended, bool* has_nop,
1636 gimple **nop_reduc)
1638 tree lhs, r_op1, r_op2, r_nop1, r_nop2;
1639 gimple *stmt;
1640 gimple *header_phi = NULL;
1641 enum tree_code reduction_op;
1642 basic_block bb = gimple_bb (phi);
1643 class loop *loop = bb->loop_father;
1644 edge latch_e = loop_latch_edge (loop);
1645 imm_use_iterator imm_iter;
1646 use_operand_p use_p;
1647 edge e;
1648 edge_iterator ei;
1649 bool result = *has_nop = false;
1650 if (TREE_CODE (arg_0) != SSA_NAME || TREE_CODE (arg_1) != SSA_NAME)
1651 return false;
1653 if (!extended && gimple_code (SSA_NAME_DEF_STMT (arg_0)) == GIMPLE_PHI)
1655 lhs = arg_1;
1656 header_phi = SSA_NAME_DEF_STMT (arg_0);
1657 stmt = SSA_NAME_DEF_STMT (arg_1);
1659 else if (gimple_code (SSA_NAME_DEF_STMT (arg_1)) == GIMPLE_PHI)
1661 lhs = arg_0;
1662 header_phi = SSA_NAME_DEF_STMT (arg_1);
1663 stmt = SSA_NAME_DEF_STMT (arg_0);
1665 else
1666 return false;
1667 if (gimple_bb (header_phi) != loop->header)
1668 return false;
1670 if (PHI_ARG_DEF_FROM_EDGE (header_phi, latch_e) != PHI_RESULT (phi))
1671 return false;
1673 if (gimple_code (stmt) != GIMPLE_ASSIGN
1674 || gimple_has_volatile_ops (stmt))
1675 return false;
1677 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
1678 return false;
1680 if (!is_predicated (gimple_bb (stmt)))
1681 return false;
1683 /* Check that stmt-block is predecessor of phi-block. */
1684 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->succs)
1685 if (e->dest == bb)
1687 result = true;
1688 break;
1690 if (!result)
1691 return false;
1693 if (!has_single_use (lhs))
1694 return false;
1696 reduction_op = gimple_assign_rhs_code (stmt);
1698 /* Catch something like below
1700 loop-header:
1701 reduc_1 = PHI <..., reduc_2>
1703 if (...)
1704 tmp1 = (unsigned type) reduc_1;
1705 tmp2 = tmp1 + rhs2;
1706 reduc_3 = (signed type) tmp2;
1708 reduc_2 = PHI <reduc_1, reduc_3>
1710 and convert to
1712 reduc_2 = PHI <0, reduc_3>
1713 tmp1 = (unsigned type)reduce_1;
1714 ifcvt = cond_expr ? rhs2 : 0
1715 tmp2 = tmp1 +/- ifcvt;
1716 reduce_1 = (signed type)tmp2; */
1718 if (CONVERT_EXPR_CODE_P (reduction_op))
1720 lhs = gimple_assign_rhs1 (stmt);
1721 if (TREE_CODE (lhs) != SSA_NAME
1722 || !has_single_use (lhs))
1723 return false;
1725 *nop_reduc = stmt;
1726 stmt = SSA_NAME_DEF_STMT (lhs);
1727 if (gimple_bb (stmt) != gimple_bb (*nop_reduc)
1728 || !is_gimple_assign (stmt))
1729 return false;
1731 *has_nop = true;
1732 reduction_op = gimple_assign_rhs_code (stmt);
1735 if (reduction_op != PLUS_EXPR && reduction_op != MINUS_EXPR)
1736 return false;
1737 r_op1 = gimple_assign_rhs1 (stmt);
1738 r_op2 = gimple_assign_rhs2 (stmt);
1740 r_nop1 = strip_nop_cond_scalar_reduction (*has_nop, r_op1);
1741 r_nop2 = strip_nop_cond_scalar_reduction (*has_nop, r_op2);
1743 /* Make R_OP1 to hold reduction variable. */
1744 if (r_nop2 == PHI_RESULT (header_phi)
1745 && reduction_op == PLUS_EXPR)
1747 std::swap (r_op1, r_op2);
1748 std::swap (r_nop1, r_nop2);
1750 else if (r_nop1 != PHI_RESULT (header_phi))
1751 return false;
1753 if (*has_nop)
1755 /* Check that R_NOP1 is used in nop_stmt or in PHI only. */
1756 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_nop1)
1758 gimple *use_stmt = USE_STMT (use_p);
1759 if (is_gimple_debug (use_stmt))
1760 continue;
1761 if (use_stmt == SSA_NAME_DEF_STMT (r_op1))
1762 continue;
1763 if (use_stmt != phi)
1764 return false;
1768 /* Check that R_OP1 is used in reduction stmt or in PHI only. */
1769 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_op1)
1771 gimple *use_stmt = USE_STMT (use_p);
1772 if (is_gimple_debug (use_stmt))
1773 continue;
1774 if (use_stmt == stmt)
1775 continue;
1776 if (gimple_code (use_stmt) != GIMPLE_PHI)
1777 return false;
1780 *op0 = r_op1; *op1 = r_op2;
1781 *reduc = stmt;
1782 return true;
1785 /* Converts conditional scalar reduction into unconditional form, e.g.
1786 bb_4
1787 if (_5 != 0) goto bb_5 else goto bb_6
1788 end_bb_4
1789 bb_5
1790 res_6 = res_13 + 1;
1791 end_bb_5
1792 bb_6
1793 # res_2 = PHI <res_13(4), res_6(5)>
1794 end_bb_6
1796 will be converted into sequence
1797 _ifc__1 = _5 != 0 ? 1 : 0;
1798 res_2 = res_13 + _ifc__1;
1799 Argument SWAP tells that arguments of conditional expression should be
1800 swapped.
1801 Returns rhs of resulting PHI assignment. */
1803 static tree
1804 convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi,
1805 tree cond, tree op0, tree op1, bool swap,
1806 bool has_nop, gimple* nop_reduc)
1808 gimple_stmt_iterator stmt_it;
1809 gimple *new_assign;
1810 tree rhs;
1811 tree rhs1 = gimple_assign_rhs1 (reduc);
1812 tree tmp = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, "_ifc_");
1813 tree c;
1814 tree zero = build_zero_cst (TREE_TYPE (rhs1));
1815 gimple_seq stmts = NULL;
1817 if (dump_file && (dump_flags & TDF_DETAILS))
1819 fprintf (dump_file, "Found cond scalar reduction.\n");
1820 print_gimple_stmt (dump_file, reduc, 0, TDF_SLIM);
1823 /* Build cond expression using COND and constant operand
1824 of reduction rhs. */
1825 c = fold_build_cond_expr (TREE_TYPE (rhs1),
1826 unshare_expr (cond),
1827 swap ? zero : op1,
1828 swap ? op1 : zero);
1830 /* Create assignment stmt and insert it at GSI. */
1831 new_assign = gimple_build_assign (tmp, c);
1832 gsi_insert_before (gsi, new_assign, GSI_SAME_STMT);
1833 /* Build rhs for unconditional increment/decrement. */
1834 rhs = gimple_build (&stmts, gimple_assign_rhs_code (reduc),
1835 TREE_TYPE (rhs1), op0, tmp);
1837 if (has_nop)
1839 rhs = gimple_convert (&stmts,
1840 TREE_TYPE (gimple_assign_lhs (nop_reduc)), rhs);
1841 stmt_it = gsi_for_stmt (nop_reduc);
1842 gsi_remove (&stmt_it, true);
1843 release_defs (nop_reduc);
1845 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
1847 /* Delete original reduction stmt. */
1848 stmt_it = gsi_for_stmt (reduc);
1849 gsi_remove (&stmt_it, true);
1850 release_defs (reduc);
1851 return rhs;
1854 /* Produce condition for all occurrences of ARG in PHI node. */
1856 static tree
1857 gen_phi_arg_condition (gphi *phi, vec<int> *occur,
1858 gimple_stmt_iterator *gsi)
1860 int len;
1861 int i;
1862 tree cond = NULL_TREE;
1863 tree c;
1864 edge e;
1866 len = occur->length ();
1867 gcc_assert (len > 0);
1868 for (i = 0; i < len; i++)
1870 e = gimple_phi_arg_edge (phi, (*occur)[i]);
1871 c = bb_predicate (e->src);
1872 if (is_true_predicate (c))
1874 cond = c;
1875 break;
1877 c = force_gimple_operand_gsi_1 (gsi, unshare_expr (c),
1878 is_gimple_condexpr, NULL_TREE,
1879 true, GSI_SAME_STMT);
1880 if (cond != NULL_TREE)
1882 /* Must build OR expression. */
1883 cond = fold_or_predicates (EXPR_LOCATION (c), c, cond);
1884 cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond),
1885 is_gimple_condexpr, NULL_TREE,
1886 true, GSI_SAME_STMT);
1888 else
1889 cond = c;
1891 gcc_assert (cond != NULL_TREE);
1892 return cond;
1895 /* Local valueization callback that follows all-use SSA edges. */
1897 static tree
1898 ifcvt_follow_ssa_use_edges (tree val)
1900 return val;
1903 /* Replace a scalar PHI node with a COND_EXPR using COND as condition.
1904 This routine can handle PHI nodes with more than two arguments.
1906 For example,
1907 S1: A = PHI <x1(1), x2(5)>
1908 is converted into,
1909 S2: A = cond ? x1 : x2;
1911 The generated code is inserted at GSI that points to the top of
1912 basic block's statement list.
1913 If PHI node has more than two arguments a chain of conditional
1914 expression is produced. */
1917 static void
1918 predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
1920 gimple *new_stmt = NULL, *reduc, *nop_reduc;
1921 tree rhs, res, arg0, arg1, op0, op1, scev;
1922 tree cond;
1923 unsigned int index0;
1924 unsigned int max, args_len;
1925 edge e;
1926 basic_block bb;
1927 unsigned int i;
1928 bool has_nop;
1930 res = gimple_phi_result (phi);
1931 if (virtual_operand_p (res))
1932 return;
1934 if ((rhs = degenerate_phi_result (phi))
1935 || ((scev = analyze_scalar_evolution (gimple_bb (phi)->loop_father,
1936 res))
1937 && !chrec_contains_undetermined (scev)
1938 && scev != res
1939 && (rhs = gimple_phi_arg_def (phi, 0))))
1941 if (dump_file && (dump_flags & TDF_DETAILS))
1943 fprintf (dump_file, "Degenerate phi!\n");
1944 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
1946 new_stmt = gimple_build_assign (res, rhs);
1947 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1948 update_stmt (new_stmt);
1949 return;
1952 bb = gimple_bb (phi);
1953 if (EDGE_COUNT (bb->preds) == 2)
1955 /* Predicate ordinary PHI node with 2 arguments. */
1956 edge first_edge, second_edge;
1957 basic_block true_bb;
1958 first_edge = EDGE_PRED (bb, 0);
1959 second_edge = EDGE_PRED (bb, 1);
1960 cond = bb_predicate (first_edge->src);
1961 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1962 std::swap (first_edge, second_edge);
1963 if (EDGE_COUNT (first_edge->src->succs) > 1)
1965 cond = bb_predicate (second_edge->src);
1966 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1967 cond = TREE_OPERAND (cond, 0);
1968 else
1969 first_edge = second_edge;
1971 else
1972 cond = bb_predicate (first_edge->src);
1973 /* Gimplify the condition to a valid cond-expr conditonal operand. */
1974 cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond),
1975 is_gimple_condexpr, NULL_TREE,
1976 true, GSI_SAME_STMT);
1977 true_bb = first_edge->src;
1978 if (EDGE_PRED (bb, 1)->src == true_bb)
1980 arg0 = gimple_phi_arg_def (phi, 1);
1981 arg1 = gimple_phi_arg_def (phi, 0);
1983 else
1985 arg0 = gimple_phi_arg_def (phi, 0);
1986 arg1 = gimple_phi_arg_def (phi, 1);
1988 if (is_cond_scalar_reduction (phi, &reduc, arg0, arg1,
1989 &op0, &op1, false, &has_nop,
1990 &nop_reduc))
1992 /* Convert reduction stmt into vectorizable form. */
1993 rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
1994 true_bb != gimple_bb (reduc),
1995 has_nop, nop_reduc);
1996 redundant_ssa_names.safe_push (std::make_pair (res, rhs));
1998 else
1999 /* Build new RHS using selected condition and arguments. */
2000 rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
2001 arg0, arg1);
2002 new_stmt = gimple_build_assign (res, rhs);
2003 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2004 gimple_stmt_iterator new_gsi = gsi_for_stmt (new_stmt);
2005 if (fold_stmt (&new_gsi, ifcvt_follow_ssa_use_edges))
2007 new_stmt = gsi_stmt (new_gsi);
2008 update_stmt (new_stmt);
2011 if (dump_file && (dump_flags & TDF_DETAILS))
2013 fprintf (dump_file, "new phi replacement stmt\n");
2014 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
2016 return;
2019 /* Create hashmap for PHI node which contain vector of argument indexes
2020 having the same value. */
2021 bool swap = false;
2022 hash_map<tree_operand_hash, auto_vec<int> > phi_arg_map;
2023 unsigned int num_args = gimple_phi_num_args (phi);
2024 int max_ind = -1;
2025 /* Vector of different PHI argument values. */
2026 auto_vec<tree> args (num_args);
2028 /* Compute phi_arg_map. */
2029 for (i = 0; i < num_args; i++)
2031 tree arg;
2033 arg = gimple_phi_arg_def (phi, i);
2034 if (!phi_arg_map.get (arg))
2035 args.quick_push (arg);
2036 phi_arg_map.get_or_insert (arg).safe_push (i);
2039 /* Determine element with max number of occurrences. */
2040 max_ind = -1;
2041 max = 1;
2042 args_len = args.length ();
2043 for (i = 0; i < args_len; i++)
2045 unsigned int len;
2046 if ((len = phi_arg_map.get (args[i])->length ()) > max)
2048 max_ind = (int) i;
2049 max = len;
2053 /* Put element with max number of occurences to the end of ARGS. */
2054 if (max_ind != -1 && max_ind +1 != (int) args_len)
2055 std::swap (args[args_len - 1], args[max_ind]);
2057 /* Handle one special case when number of arguments with different values
2058 is equal 2 and one argument has the only occurrence. Such PHI can be
2059 handled as if would have only 2 arguments. */
2060 if (args_len == 2 && phi_arg_map.get (args[0])->length () == 1)
2062 vec<int> *indexes;
2063 indexes = phi_arg_map.get (args[0]);
2064 index0 = (*indexes)[0];
2065 arg0 = args[0];
2066 arg1 = args[1];
2067 e = gimple_phi_arg_edge (phi, index0);
2068 cond = bb_predicate (e->src);
2069 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2071 swap = true;
2072 cond = TREE_OPERAND (cond, 0);
2074 /* Gimplify the condition to a valid cond-expr conditonal operand. */
2075 cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond),
2076 is_gimple_condexpr, NULL_TREE,
2077 true, GSI_SAME_STMT);
2078 if (!(is_cond_scalar_reduction (phi, &reduc, arg0 , arg1,
2079 &op0, &op1, true, &has_nop, &nop_reduc)))
2080 rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
2081 swap? arg1 : arg0,
2082 swap? arg0 : arg1);
2083 else
2085 /* Convert reduction stmt into vectorizable form. */
2086 rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
2087 swap,has_nop, nop_reduc);
2088 redundant_ssa_names.safe_push (std::make_pair (res, rhs));
2090 new_stmt = gimple_build_assign (res, rhs);
2091 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2092 update_stmt (new_stmt);
2094 else
2096 /* Common case. */
2097 vec<int> *indexes;
2098 tree type = TREE_TYPE (gimple_phi_result (phi));
2099 tree lhs;
2100 arg1 = args[1];
2101 for (i = 0; i < args_len; i++)
2103 arg0 = args[i];
2104 indexes = phi_arg_map.get (args[i]);
2105 if (i != args_len - 1)
2106 lhs = make_temp_ssa_name (type, NULL, "_ifc_");
2107 else
2108 lhs = res;
2109 cond = gen_phi_arg_condition (phi, indexes, gsi);
2110 rhs = fold_build_cond_expr (type, unshare_expr (cond),
2111 arg0, arg1);
2112 new_stmt = gimple_build_assign (lhs, rhs);
2113 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2114 update_stmt (new_stmt);
2115 arg1 = lhs;
2119 if (dump_file && (dump_flags & TDF_DETAILS))
2121 fprintf (dump_file, "new extended phi replacement stmt\n");
2122 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
2126 /* Replaces in LOOP all the scalar phi nodes other than those in the
2127 LOOP->header block with conditional modify expressions. */
2129 static void
2130 predicate_all_scalar_phis (class loop *loop)
2132 basic_block bb;
2133 unsigned int orig_loop_num_nodes = loop->num_nodes;
2134 unsigned int i;
2136 for (i = 1; i < orig_loop_num_nodes; i++)
2138 gphi *phi;
2139 gimple_stmt_iterator gsi;
2140 gphi_iterator phi_gsi;
2141 bb = ifc_bbs[i];
2143 if (bb == loop->header)
2144 continue;
2146 phi_gsi = gsi_start_phis (bb);
2147 if (gsi_end_p (phi_gsi))
2148 continue;
2150 gsi = gsi_after_labels (bb);
2151 while (!gsi_end_p (phi_gsi))
2153 phi = phi_gsi.phi ();
2154 if (virtual_operand_p (gimple_phi_result (phi)))
2155 gsi_next (&phi_gsi);
2156 else
2158 predicate_scalar_phi (phi, &gsi);
2159 remove_phi_node (&phi_gsi, false);
2165 /* Insert in each basic block of LOOP the statements produced by the
2166 gimplification of the predicates. */
2168 static void
2169 insert_gimplified_predicates (loop_p loop)
2171 unsigned int i;
2173 for (i = 0; i < loop->num_nodes; i++)
2175 basic_block bb = ifc_bbs[i];
2176 gimple_seq stmts;
2177 if (!is_predicated (bb))
2178 gcc_assert (bb_predicate_gimplified_stmts (bb) == NULL);
2179 if (!is_predicated (bb))
2181 /* Do not insert statements for a basic block that is not
2182 predicated. Also make sure that the predicate of the
2183 basic block is set to true. */
2184 reset_bb_predicate (bb);
2185 continue;
2188 stmts = bb_predicate_gimplified_stmts (bb);
2189 if (stmts)
2191 if (need_to_predicate)
2193 /* Insert the predicate of the BB just after the label,
2194 as the if-conversion of memory writes will use this
2195 predicate. */
2196 gimple_stmt_iterator gsi = gsi_after_labels (bb);
2197 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2199 else
2201 /* Insert the predicate of the BB at the end of the BB
2202 as this would reduce the register pressure: the only
2203 use of this predicate will be in successor BBs. */
2204 gimple_stmt_iterator gsi = gsi_last_bb (bb);
2206 if (gsi_end_p (gsi)
2207 || stmt_ends_bb_p (gsi_stmt (gsi)))
2208 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2209 else
2210 gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
2213 /* Once the sequence is code generated, set it to NULL. */
2214 set_bb_predicate_gimplified_stmts (bb, NULL);
2219 /* Helper function for predicate_statements. Returns index of existent
2220 mask if it was created for given SIZE and -1 otherwise. */
2222 static int
2223 mask_exists (int size, const vec<int> &vec)
2225 unsigned int ix;
2226 int v;
2227 FOR_EACH_VEC_ELT (vec, ix, v)
2228 if (v == size)
2229 return (int) ix;
2230 return -1;
2233 /* Helper function for predicate_statements. STMT is a memory read or
2234 write and it needs to be predicated by MASK. Return a statement
2235 that does so. */
2237 static gimple *
2238 predicate_load_or_store (gimple_stmt_iterator *gsi, gassign *stmt, tree mask)
2240 gcall *new_stmt;
2242 tree lhs = gimple_assign_lhs (stmt);
2243 tree rhs = gimple_assign_rhs1 (stmt);
2244 tree ref = TREE_CODE (lhs) == SSA_NAME ? rhs : lhs;
2245 mark_addressable (ref);
2246 tree addr = force_gimple_operand_gsi (gsi, build_fold_addr_expr (ref),
2247 true, NULL_TREE, true, GSI_SAME_STMT);
2248 tree ptr = build_int_cst (reference_alias_ptr_type (ref),
2249 get_object_alignment (ref));
2250 /* Copy points-to info if possible. */
2251 if (TREE_CODE (addr) == SSA_NAME && !SSA_NAME_PTR_INFO (addr))
2252 copy_ref_info (build2 (MEM_REF, TREE_TYPE (ref), addr, ptr),
2253 ref);
2254 if (TREE_CODE (lhs) == SSA_NAME)
2256 new_stmt
2257 = gimple_build_call_internal (IFN_MASK_LOAD, 3, addr,
2258 ptr, mask);
2259 gimple_call_set_lhs (new_stmt, lhs);
2260 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
2262 else
2264 new_stmt
2265 = gimple_build_call_internal (IFN_MASK_STORE, 4, addr, ptr,
2266 mask, rhs);
2267 gimple_move_vops (new_stmt, stmt);
2269 gimple_call_set_nothrow (new_stmt, true);
2270 return new_stmt;
2273 /* STMT uses OP_LHS. Check whether it is equivalent to:
2275 ... = OP_MASK ? OP_LHS : X;
2277 Return X if so, otherwise return null. OP_MASK is an SSA_NAME that is
2278 known to have value OP_COND. */
2280 static tree
2281 check_redundant_cond_expr (gimple *stmt, tree op_mask, tree op_cond,
2282 tree op_lhs)
2284 gassign *assign = dyn_cast <gassign *> (stmt);
2285 if (!assign || gimple_assign_rhs_code (assign) != COND_EXPR)
2286 return NULL_TREE;
2288 tree use_cond = gimple_assign_rhs1 (assign);
2289 tree if_true = gimple_assign_rhs2 (assign);
2290 tree if_false = gimple_assign_rhs3 (assign);
2292 if ((use_cond == op_mask || operand_equal_p (use_cond, op_cond, 0))
2293 && if_true == op_lhs)
2294 return if_false;
2296 if (inverse_conditions_p (use_cond, op_cond) && if_false == op_lhs)
2297 return if_true;
2299 return NULL_TREE;
2302 /* Return true if VALUE is available for use at STMT. SSA_NAMES is
2303 the set of SSA names defined earlier in STMT's block. */
2305 static bool
2306 value_available_p (gimple *stmt, hash_set<tree_ssa_name_hash> *ssa_names,
2307 tree value)
2309 if (is_gimple_min_invariant (value))
2310 return true;
2312 if (TREE_CODE (value) == SSA_NAME)
2314 if (SSA_NAME_IS_DEFAULT_DEF (value))
2315 return true;
2317 basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (value));
2318 basic_block use_bb = gimple_bb (stmt);
2319 return (def_bb == use_bb
2320 ? ssa_names->contains (value)
2321 : dominated_by_p (CDI_DOMINATORS, use_bb, def_bb));
2324 return false;
2327 /* Helper function for predicate_statements. STMT is a potentially-trapping
2328 arithmetic operation that needs to be predicated by MASK, an SSA_NAME that
2329 has value COND. Return a statement that does so. SSA_NAMES is the set of
2330 SSA names defined earlier in STMT's block. */
2332 static gimple *
2333 predicate_rhs_code (gassign *stmt, tree mask, tree cond,
2334 hash_set<tree_ssa_name_hash> *ssa_names)
2336 tree lhs = gimple_assign_lhs (stmt);
2337 tree_code code = gimple_assign_rhs_code (stmt);
2338 unsigned int nops = gimple_num_ops (stmt);
2339 internal_fn cond_fn = get_conditional_internal_fn (code);
2341 /* Construct the arguments to the conditional internal function. */
2342 auto_vec<tree, 8> args;
2343 args.safe_grow (nops + 1, true);
2344 args[0] = mask;
2345 for (unsigned int i = 1; i < nops; ++i)
2346 args[i] = gimple_op (stmt, i);
2347 args[nops] = NULL_TREE;
2349 /* Look for uses of the result to see whether they are COND_EXPRs that can
2350 be folded into the conditional call. */
2351 imm_use_iterator imm_iter;
2352 gimple *use_stmt;
2353 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs)
2355 tree new_else = check_redundant_cond_expr (use_stmt, mask, cond, lhs);
2356 if (new_else && value_available_p (stmt, ssa_names, new_else))
2358 if (!args[nops])
2359 args[nops] = new_else;
2360 if (operand_equal_p (new_else, args[nops], 0))
2362 /* We have:
2364 LHS = IFN_COND (MASK, ..., ELSE);
2365 X = MASK ? LHS : ELSE;
2367 which makes X equivalent to LHS. */
2368 tree use_lhs = gimple_assign_lhs (use_stmt);
2369 redundant_ssa_names.safe_push (std::make_pair (use_lhs, lhs));
2373 if (!args[nops])
2374 args[nops] = targetm.preferred_else_value (cond_fn, TREE_TYPE (lhs),
2375 nops - 1, &args[1]);
2377 /* Create and insert the call. */
2378 gcall *new_stmt = gimple_build_call_internal_vec (cond_fn, args);
2379 gimple_call_set_lhs (new_stmt, lhs);
2380 gimple_call_set_nothrow (new_stmt, true);
2382 return new_stmt;
2385 /* Predicate each write to memory in LOOP.
2387 This function transforms control flow constructs containing memory
2388 writes of the form:
2390 | for (i = 0; i < N; i++)
2391 | if (cond)
2392 | A[i] = expr;
2394 into the following form that does not contain control flow:
2396 | for (i = 0; i < N; i++)
2397 | A[i] = cond ? expr : A[i];
2399 The original CFG looks like this:
2401 | bb_0
2402 | i = 0
2403 | end_bb_0
2405 | bb_1
2406 | if (i < N) goto bb_5 else goto bb_2
2407 | end_bb_1
2409 | bb_2
2410 | cond = some_computation;
2411 | if (cond) goto bb_3 else goto bb_4
2412 | end_bb_2
2414 | bb_3
2415 | A[i] = expr;
2416 | goto bb_4
2417 | end_bb_3
2419 | bb_4
2420 | goto bb_1
2421 | end_bb_4
2423 insert_gimplified_predicates inserts the computation of the COND
2424 expression at the beginning of the destination basic block:
2426 | bb_0
2427 | i = 0
2428 | end_bb_0
2430 | bb_1
2431 | if (i < N) goto bb_5 else goto bb_2
2432 | end_bb_1
2434 | bb_2
2435 | cond = some_computation;
2436 | if (cond) goto bb_3 else goto bb_4
2437 | end_bb_2
2439 | bb_3
2440 | cond = some_computation;
2441 | A[i] = expr;
2442 | goto bb_4
2443 | end_bb_3
2445 | bb_4
2446 | goto bb_1
2447 | end_bb_4
2449 predicate_statements is then predicating the memory write as follows:
2451 | bb_0
2452 | i = 0
2453 | end_bb_0
2455 | bb_1
2456 | if (i < N) goto bb_5 else goto bb_2
2457 | end_bb_1
2459 | bb_2
2460 | if (cond) goto bb_3 else goto bb_4
2461 | end_bb_2
2463 | bb_3
2464 | cond = some_computation;
2465 | A[i] = cond ? expr : A[i];
2466 | goto bb_4
2467 | end_bb_3
2469 | bb_4
2470 | goto bb_1
2471 | end_bb_4
2473 and finally combine_blocks removes the basic block boundaries making
2474 the loop vectorizable:
2476 | bb_0
2477 | i = 0
2478 | if (i < N) goto bb_5 else goto bb_1
2479 | end_bb_0
2481 | bb_1
2482 | cond = some_computation;
2483 | A[i] = cond ? expr : A[i];
2484 | if (i < N) goto bb_5 else goto bb_4
2485 | end_bb_1
2487 | bb_4
2488 | goto bb_1
2489 | end_bb_4
2492 static void
2493 predicate_statements (loop_p loop, edge pe)
2495 unsigned int i, orig_loop_num_nodes = loop->num_nodes;
2496 auto_vec<int, 1> vect_sizes;
2497 auto_vec<tree, 1> vect_masks;
2498 hash_set<tree_ssa_name_hash> ssa_names;
2500 for (i = 1; i < orig_loop_num_nodes; i++)
2502 gimple_stmt_iterator gsi;
2503 basic_block bb = ifc_bbs[i];
2504 tree cond = bb_predicate (bb);
2505 bool swap;
2506 int index;
2508 if (is_true_predicate (cond))
2509 continue;
2511 swap = false;
2512 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2514 swap = true;
2515 cond = TREE_OPERAND (cond, 0);
2518 vect_sizes.truncate (0);
2519 vect_masks.truncate (0);
2521 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
2523 gassign *stmt = dyn_cast <gassign *> (gsi_stmt (gsi));
2524 tree lhs;
2525 if (!stmt)
2527 else if (is_false_predicate (cond)
2528 && gimple_vdef (stmt))
2530 unlink_stmt_vdef (stmt);
2531 gsi_remove (&gsi, true);
2532 release_defs (stmt);
2533 continue;
2535 else if (gimple_plf (stmt, GF_PLF_2))
2537 tree lhs = gimple_assign_lhs (stmt);
2538 tree mask;
2539 gimple *new_stmt;
2540 gimple_seq stmts = NULL;
2541 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
2542 /* We checked before setting GF_PLF_2 that an equivalent
2543 integer mode exists. */
2544 int bitsize = GET_MODE_BITSIZE (mode).to_constant ();
2545 if (!vect_sizes.is_empty ()
2546 && (index = mask_exists (bitsize, vect_sizes)) != -1)
2547 /* Use created mask. */
2548 mask = vect_masks[index];
2549 else
2551 if (COMPARISON_CLASS_P (cond))
2552 mask = gimple_build (&stmts, TREE_CODE (cond),
2553 boolean_type_node,
2554 TREE_OPERAND (cond, 0),
2555 TREE_OPERAND (cond, 1));
2556 else
2557 mask = cond;
2559 if (swap)
2561 tree true_val
2562 = constant_boolean_node (true, TREE_TYPE (mask));
2563 mask = gimple_build (&stmts, BIT_XOR_EXPR,
2564 TREE_TYPE (mask), mask, true_val);
2566 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2568 /* Save mask and its size for further use. */
2569 vect_sizes.safe_push (bitsize);
2570 vect_masks.safe_push (mask);
2572 if (gimple_assign_single_p (stmt))
2573 new_stmt = predicate_load_or_store (&gsi, stmt, mask);
2574 else
2575 new_stmt = predicate_rhs_code (stmt, mask, cond, &ssa_names);
2577 gsi_replace (&gsi, new_stmt, true);
2579 else if (((lhs = gimple_assign_lhs (stmt)), true)
2580 && (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2581 || POINTER_TYPE_P (TREE_TYPE (lhs)))
2582 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs))
2583 && arith_code_with_undefined_signed_overflow
2584 (gimple_assign_rhs_code (stmt)))
2586 gsi_remove (&gsi, true);
2587 gimple_seq stmts = rewrite_to_defined_overflow (stmt);
2588 bool first = true;
2589 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
2590 !gsi_end_p (gsi2);)
2592 gassign *stmt2 = as_a <gassign *> (gsi_stmt (gsi2));
2593 gsi_remove (&gsi2, false);
2594 /* Make sure to move invariant conversions out of the
2595 loop. */
2596 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt2))
2597 && expr_invariant_in_loop_p (loop,
2598 gimple_assign_rhs1 (stmt2)))
2599 gsi_insert_on_edge_immediate (pe, stmt2);
2600 else if (first)
2602 gsi_insert_before (&gsi, stmt2, GSI_NEW_STMT);
2603 first = false;
2605 else
2606 gsi_insert_after (&gsi, stmt2, GSI_NEW_STMT);
2609 else if (gimple_vdef (stmt))
2611 tree lhs = gimple_assign_lhs (stmt);
2612 tree rhs = gimple_assign_rhs1 (stmt);
2613 tree type = TREE_TYPE (lhs);
2615 lhs = ifc_temp_var (type, unshare_expr (lhs), &gsi);
2616 rhs = ifc_temp_var (type, unshare_expr (rhs), &gsi);
2617 if (swap)
2618 std::swap (lhs, rhs);
2619 cond = force_gimple_operand_gsi_1 (&gsi, unshare_expr (cond),
2620 is_gimple_condexpr, NULL_TREE,
2621 true, GSI_SAME_STMT);
2622 rhs = fold_build_cond_expr (type, unshare_expr (cond), rhs, lhs);
2623 gimple_assign_set_rhs1 (stmt, ifc_temp_var (type, rhs, &gsi));
2624 update_stmt (stmt);
2626 lhs = gimple_get_lhs (gsi_stmt (gsi));
2627 if (lhs && TREE_CODE (lhs) == SSA_NAME)
2628 ssa_names.add (lhs);
2629 gsi_next (&gsi);
2631 ssa_names.empty ();
2635 /* Remove all GIMPLE_CONDs and GIMPLE_LABELs of all the basic blocks
2636 other than the exit and latch of the LOOP. Also resets the
2637 GIMPLE_DEBUG information. */
2639 static void
2640 remove_conditions_and_labels (loop_p loop)
2642 gimple_stmt_iterator gsi;
2643 unsigned int i;
2645 for (i = 0; i < loop->num_nodes; i++)
2647 basic_block bb = ifc_bbs[i];
2649 if (bb_with_exit_edge_p (loop, bb)
2650 || bb == loop->latch)
2651 continue;
2653 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
2654 switch (gimple_code (gsi_stmt (gsi)))
2656 case GIMPLE_COND:
2657 case GIMPLE_LABEL:
2658 gsi_remove (&gsi, true);
2659 break;
2661 case GIMPLE_DEBUG:
2662 /* ??? Should there be conditional GIMPLE_DEBUG_BINDs? */
2663 if (gimple_debug_bind_p (gsi_stmt (gsi)))
2665 gimple_debug_bind_reset_value (gsi_stmt (gsi));
2666 update_stmt (gsi_stmt (gsi));
2668 gsi_next (&gsi);
2669 break;
2671 default:
2672 gsi_next (&gsi);
2677 /* Combine all the basic blocks from LOOP into one or two super basic
2678 blocks. Replace PHI nodes with conditional modify expressions. */
2680 static void
2681 combine_blocks (class loop *loop, edge pe)
2683 basic_block bb, exit_bb, merge_target_bb;
2684 unsigned int orig_loop_num_nodes = loop->num_nodes;
2685 unsigned int i;
2686 edge e;
2687 edge_iterator ei;
2689 remove_conditions_and_labels (loop);
2690 insert_gimplified_predicates (loop);
2691 predicate_all_scalar_phis (loop);
2693 if (need_to_predicate || need_to_rewrite_undefined)
2694 predicate_statements (loop, pe);
2696 /* Merge basic blocks. */
2697 exit_bb = NULL;
2698 bool *predicated = XNEWVEC (bool, orig_loop_num_nodes);
2699 for (i = 0; i < orig_loop_num_nodes; i++)
2701 bb = ifc_bbs[i];
2702 predicated[i] = !is_true_predicate (bb_predicate (bb));
2703 free_bb_predicate (bb);
2704 if (bb_with_exit_edge_p (loop, bb))
2706 gcc_assert (exit_bb == NULL);
2707 exit_bb = bb;
2710 gcc_assert (exit_bb != loop->latch);
2712 merge_target_bb = loop->header;
2714 /* Get at the virtual def valid for uses starting at the first block
2715 we merge into the header. Without a virtual PHI the loop has the
2716 same virtual use on all stmts. */
2717 gphi *vphi = get_virtual_phi (loop->header);
2718 tree last_vdef = NULL_TREE;
2719 if (vphi)
2721 last_vdef = gimple_phi_result (vphi);
2722 for (gimple_stmt_iterator gsi = gsi_start_bb (loop->header);
2723 ! gsi_end_p (gsi); gsi_next (&gsi))
2724 if (gimple_vdef (gsi_stmt (gsi)))
2725 last_vdef = gimple_vdef (gsi_stmt (gsi));
2727 for (i = 1; i < orig_loop_num_nodes; i++)
2729 gimple_stmt_iterator gsi;
2730 gimple_stmt_iterator last;
2732 bb = ifc_bbs[i];
2734 if (bb == exit_bb || bb == loop->latch)
2735 continue;
2737 /* We release virtual PHIs late because we have to propagate them
2738 out using the current VUSE. The def might be the one used
2739 after the loop. */
2740 vphi = get_virtual_phi (bb);
2741 if (vphi)
2743 /* When there's just loads inside the loop a stray virtual
2744 PHI merging the uses can appear, update last_vdef from
2745 it. */
2746 if (!last_vdef)
2747 last_vdef = gimple_phi_arg_def (vphi, 0);
2748 imm_use_iterator iter;
2749 use_operand_p use_p;
2750 gimple *use_stmt;
2751 FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi))
2753 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2754 SET_USE (use_p, last_vdef);
2756 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (vphi)))
2757 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (last_vdef) = 1;
2758 gsi = gsi_for_stmt (vphi);
2759 remove_phi_node (&gsi, true);
2762 /* Make stmts member of loop->header and clear range info from all stmts
2763 in BB which is now no longer executed conditional on a predicate we
2764 could have derived it from. */
2765 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2767 gimple *stmt = gsi_stmt (gsi);
2768 gimple_set_bb (stmt, merge_target_bb);
2769 /* Update virtual operands. */
2770 if (last_vdef)
2772 use_operand_p use_p = ssa_vuse_operand (stmt);
2773 if (use_p
2774 && USE_FROM_PTR (use_p) != last_vdef)
2775 SET_USE (use_p, last_vdef);
2776 if (gimple_vdef (stmt))
2777 last_vdef = gimple_vdef (stmt);
2779 else
2780 /* If this is the first load we arrive at update last_vdef
2781 so we handle stray PHIs correctly. */
2782 last_vdef = gimple_vuse (stmt);
2783 if (predicated[i])
2785 ssa_op_iter i;
2786 tree op;
2787 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF)
2788 reset_flow_sensitive_info (op);
2792 /* Update stmt list. */
2793 last = gsi_last_bb (merge_target_bb);
2794 gsi_insert_seq_after_without_update (&last, bb_seq (bb), GSI_NEW_STMT);
2795 set_bb_seq (bb, NULL);
2798 /* Fixup virtual operands in the exit block. */
2799 if (exit_bb
2800 && exit_bb != loop->header)
2802 /* We release virtual PHIs late because we have to propagate them
2803 out using the current VUSE. The def might be the one used
2804 after the loop. */
2805 vphi = get_virtual_phi (exit_bb);
2806 if (vphi)
2808 /* When there's just loads inside the loop a stray virtual
2809 PHI merging the uses can appear, update last_vdef from
2810 it. */
2811 if (!last_vdef)
2812 last_vdef = gimple_phi_arg_def (vphi, 0);
2813 imm_use_iterator iter;
2814 use_operand_p use_p;
2815 gimple *use_stmt;
2816 FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi))
2818 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2819 SET_USE (use_p, last_vdef);
2821 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (vphi)))
2822 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (last_vdef) = 1;
2823 gimple_stmt_iterator gsi = gsi_for_stmt (vphi);
2824 remove_phi_node (&gsi, true);
2828 /* Now remove all the edges in the loop, except for those from the exit
2829 block and delete the blocks we elided. */
2830 for (i = 1; i < orig_loop_num_nodes; i++)
2832 bb = ifc_bbs[i];
2834 for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));)
2836 if (e->src == exit_bb)
2837 ei_next (&ei);
2838 else
2839 remove_edge (e);
2842 for (i = 1; i < orig_loop_num_nodes; i++)
2844 bb = ifc_bbs[i];
2846 if (bb == exit_bb || bb == loop->latch)
2847 continue;
2849 delete_basic_block (bb);
2852 /* Re-connect the exit block. */
2853 if (exit_bb != NULL)
2855 if (exit_bb != loop->header)
2857 /* Connect this node to loop header. */
2858 make_single_succ_edge (loop->header, exit_bb, EDGE_FALLTHRU);
2859 set_immediate_dominator (CDI_DOMINATORS, exit_bb, loop->header);
2862 /* Redirect non-exit edges to loop->latch. */
2863 FOR_EACH_EDGE (e, ei, exit_bb->succs)
2865 if (!loop_exit_edge_p (loop, e))
2866 redirect_edge_and_branch (e, loop->latch);
2868 set_immediate_dominator (CDI_DOMINATORS, loop->latch, exit_bb);
2870 else
2872 /* If the loop does not have an exit, reconnect header and latch. */
2873 make_edge (loop->header, loop->latch, EDGE_FALLTHRU);
2874 set_immediate_dominator (CDI_DOMINATORS, loop->latch, loop->header);
2877 /* If possible, merge loop header to the block with the exit edge.
2878 This reduces the number of basic blocks to two, to please the
2879 vectorizer that handles only loops with two nodes. */
2880 if (exit_bb
2881 && exit_bb != loop->header)
2883 if (can_merge_blocks_p (loop->header, exit_bb))
2884 merge_blocks (loop->header, exit_bb);
2887 free (ifc_bbs);
2888 ifc_bbs = NULL;
2889 free (predicated);
2892 /* Version LOOP before if-converting it; the original loop
2893 will be if-converted, the new copy of the loop will not,
2894 and the LOOP_VECTORIZED internal call will be guarding which
2895 loop to execute. The vectorizer pass will fold this
2896 internal call into either true or false.
2898 Note that this function intentionally invalidates profile. Both edges
2899 out of LOOP_VECTORIZED must have 100% probability so the profile remains
2900 consistent after the condition is folded in the vectorizer. */
2902 static class loop *
2903 version_loop_for_if_conversion (class loop *loop, vec<gimple *> *preds)
2905 basic_block cond_bb;
2906 tree cond = make_ssa_name (boolean_type_node);
2907 class loop *new_loop;
2908 gimple *g;
2909 gimple_stmt_iterator gsi;
2910 unsigned int save_length;
2912 g = gimple_build_call_internal (IFN_LOOP_VECTORIZED, 2,
2913 build_int_cst (integer_type_node, loop->num),
2914 integer_zero_node);
2915 gimple_call_set_lhs (g, cond);
2917 /* Save BB->aux around loop_version as that uses the same field. */
2918 save_length = loop->inner ? loop->inner->num_nodes : loop->num_nodes;
2919 void **saved_preds = XALLOCAVEC (void *, save_length);
2920 for (unsigned i = 0; i < save_length; i++)
2921 saved_preds[i] = ifc_bbs[i]->aux;
2923 initialize_original_copy_tables ();
2924 /* At this point we invalidate porfile confistency until IFN_LOOP_VECTORIZED
2925 is re-merged in the vectorizer. */
2926 new_loop = loop_version (loop, cond, &cond_bb,
2927 profile_probability::always (),
2928 profile_probability::always (),
2929 profile_probability::always (),
2930 profile_probability::always (), true);
2931 free_original_copy_tables ();
2933 for (unsigned i = 0; i < save_length; i++)
2934 ifc_bbs[i]->aux = saved_preds[i];
2936 if (new_loop == NULL)
2937 return NULL;
2939 new_loop->dont_vectorize = true;
2940 new_loop->force_vectorize = false;
2941 gsi = gsi_last_bb (cond_bb);
2942 gimple_call_set_arg (g, 1, build_int_cst (integer_type_node, new_loop->num));
2943 if (preds)
2944 preds->safe_push (g);
2945 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2946 update_ssa (TODO_update_ssa);
2947 return new_loop;
2950 /* Return true when LOOP satisfies the follow conditions that will
2951 allow it to be recognized by the vectorizer for outer-loop
2952 vectorization:
2953 - The loop is not the root node of the loop tree.
2954 - The loop has exactly one inner loop.
2955 - The loop has a single exit.
2956 - The loop header has a single successor, which is the inner
2957 loop header.
2958 - Each of the inner and outer loop latches have a single
2959 predecessor.
2960 - The loop exit block has a single predecessor, which is the
2961 inner loop's exit block. */
2963 static bool
2964 versionable_outer_loop_p (class loop *loop)
2966 if (!loop_outer (loop)
2967 || loop->dont_vectorize
2968 || !loop->inner
2969 || loop->inner->next
2970 || !single_exit (loop)
2971 || !single_succ_p (loop->header)
2972 || single_succ (loop->header) != loop->inner->header
2973 || !single_pred_p (loop->latch)
2974 || !single_pred_p (loop->inner->latch))
2975 return false;
2977 basic_block outer_exit = single_pred (loop->latch);
2978 basic_block inner_exit = single_pred (loop->inner->latch);
2980 if (!single_pred_p (outer_exit) || single_pred (outer_exit) != inner_exit)
2981 return false;
2983 if (dump_file)
2984 fprintf (dump_file, "Found vectorizable outer loop for versioning\n");
2986 return true;
2989 /* Performs splitting of critical edges. Skip splitting and return false
2990 if LOOP will not be converted because:
2992 - LOOP is not well formed.
2993 - LOOP has PHI with more than MAX_PHI_ARG_NUM arguments.
2995 Last restriction is valid only if AGGRESSIVE_IF_CONV is false. */
2997 static bool
2998 ifcvt_split_critical_edges (class loop *loop, bool aggressive_if_conv)
3000 basic_block *body;
3001 basic_block bb;
3002 unsigned int num = loop->num_nodes;
3003 unsigned int i;
3004 gimple *stmt;
3005 edge e;
3006 edge_iterator ei;
3007 auto_vec<edge> critical_edges;
3009 /* Loop is not well formed. */
3010 if (num <= 2 || loop->inner || !single_exit (loop))
3011 return false;
3013 body = get_loop_body (loop);
3014 for (i = 0; i < num; i++)
3016 bb = body[i];
3017 if (!aggressive_if_conv
3018 && phi_nodes (bb)
3019 && EDGE_COUNT (bb->preds) > MAX_PHI_ARG_NUM)
3021 if (dump_file && (dump_flags & TDF_DETAILS))
3022 fprintf (dump_file,
3023 "BB %d has complicated PHI with more than %u args.\n",
3024 bb->index, MAX_PHI_ARG_NUM);
3026 free (body);
3027 return false;
3029 if (bb == loop->latch || bb_with_exit_edge_p (loop, bb))
3030 continue;
3032 stmt = last_stmt (bb);
3033 /* Skip basic blocks not ending with conditional branch. */
3034 if (!stmt || gimple_code (stmt) != GIMPLE_COND)
3035 continue;
3037 FOR_EACH_EDGE (e, ei, bb->succs)
3038 if (EDGE_CRITICAL_P (e) && e->dest->loop_father == loop)
3039 critical_edges.safe_push (e);
3041 free (body);
3043 while (critical_edges.length () > 0)
3045 e = critical_edges.pop ();
3046 /* Don't split if bb can be predicated along non-critical edge. */
3047 if (EDGE_COUNT (e->dest->preds) > 2 || all_preds_critical_p (e->dest))
3048 split_edge (e);
3051 return true;
3054 /* Delete redundant statements produced by predication which prevents
3055 loop vectorization. */
3057 static void
3058 ifcvt_local_dce (class loop *loop)
3060 gimple *stmt;
3061 gimple *stmt1;
3062 gimple *phi;
3063 gimple_stmt_iterator gsi;
3064 auto_vec<gimple *> worklist;
3065 enum gimple_code code;
3066 use_operand_p use_p;
3067 imm_use_iterator imm_iter;
3069 /* The loop has a single BB only. */
3070 basic_block bb = loop->header;
3071 tree latch_vdef = NULL_TREE;
3073 worklist.create (64);
3074 /* Consider all phi as live statements. */
3075 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3077 phi = gsi_stmt (gsi);
3078 gimple_set_plf (phi, GF_PLF_2, true);
3079 worklist.safe_push (phi);
3080 if (virtual_operand_p (gimple_phi_result (phi)))
3081 latch_vdef = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
3083 /* Consider load/store statements, CALL and COND as live. */
3084 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3086 stmt = gsi_stmt (gsi);
3087 if (is_gimple_debug (stmt))
3089 gimple_set_plf (stmt, GF_PLF_2, true);
3090 continue;
3092 if (gimple_store_p (stmt) || gimple_assign_load_p (stmt))
3094 gimple_set_plf (stmt, GF_PLF_2, true);
3095 worklist.safe_push (stmt);
3096 continue;
3098 code = gimple_code (stmt);
3099 if (code == GIMPLE_COND || code == GIMPLE_CALL)
3101 gimple_set_plf (stmt, GF_PLF_2, true);
3102 worklist.safe_push (stmt);
3103 continue;
3105 gimple_set_plf (stmt, GF_PLF_2, false);
3107 if (code == GIMPLE_ASSIGN)
3109 tree lhs = gimple_assign_lhs (stmt);
3110 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
3112 stmt1 = USE_STMT (use_p);
3113 if (!is_gimple_debug (stmt1) && gimple_bb (stmt1) != bb)
3115 gimple_set_plf (stmt, GF_PLF_2, true);
3116 worklist.safe_push (stmt);
3117 break;
3122 /* Propagate liveness through arguments of live stmt. */
3123 while (worklist.length () > 0)
3125 ssa_op_iter iter;
3126 use_operand_p use_p;
3127 tree use;
3129 stmt = worklist.pop ();
3130 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
3132 use = USE_FROM_PTR (use_p);
3133 if (TREE_CODE (use) != SSA_NAME)
3134 continue;
3135 stmt1 = SSA_NAME_DEF_STMT (use);
3136 if (gimple_bb (stmt1) != bb || gimple_plf (stmt1, GF_PLF_2))
3137 continue;
3138 gimple_set_plf (stmt1, GF_PLF_2, true);
3139 worklist.safe_push (stmt1);
3142 /* Delete dead statements. */
3143 gsi = gsi_last_bb (bb);
3144 while (!gsi_end_p (gsi))
3146 gimple_stmt_iterator gsiprev = gsi;
3147 gsi_prev (&gsiprev);
3148 stmt = gsi_stmt (gsi);
3149 if (gimple_store_p (stmt))
3151 tree lhs = gimple_get_lhs (stmt);
3152 ao_ref write;
3153 ao_ref_init (&write, lhs);
3155 if (dse_classify_store (&write, stmt, false, NULL, NULL, latch_vdef)
3156 == DSE_STORE_DEAD)
3157 delete_dead_or_redundant_assignment (&gsi, "dead");
3158 gsi = gsiprev;
3159 continue;
3162 if (gimple_plf (stmt, GF_PLF_2))
3164 gsi = gsiprev;
3165 continue;
3167 if (dump_file && (dump_flags & TDF_DETAILS))
3169 fprintf (dump_file, "Delete dead stmt in bb#%d\n", bb->index);
3170 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3172 gsi_remove (&gsi, true);
3173 release_defs (stmt);
3174 gsi = gsiprev;
3178 /* If-convert LOOP when it is legal. For the moment this pass has no
3179 profitability analysis. Returns non-zero todo flags when something
3180 changed. */
3182 unsigned int
3183 tree_if_conversion (class loop *loop, vec<gimple *> *preds)
3185 unsigned int todo = 0;
3186 bool aggressive_if_conv;
3187 class loop *rloop;
3188 bitmap exit_bbs;
3189 edge pe;
3191 again:
3192 rloop = NULL;
3193 ifc_bbs = NULL;
3194 need_to_predicate = false;
3195 need_to_rewrite_undefined = false;
3196 any_complicated_phi = false;
3198 /* Apply more aggressive if-conversion when loop or its outer loop were
3199 marked with simd pragma. When that's the case, we try to if-convert
3200 loop containing PHIs with more than MAX_PHI_ARG_NUM arguments. */
3201 aggressive_if_conv = loop->force_vectorize;
3202 if (!aggressive_if_conv)
3204 class loop *outer_loop = loop_outer (loop);
3205 if (outer_loop && outer_loop->force_vectorize)
3206 aggressive_if_conv = true;
3209 if (!ifcvt_split_critical_edges (loop, aggressive_if_conv))
3210 goto cleanup;
3212 if (!if_convertible_loop_p (loop)
3213 || !dbg_cnt (if_conversion_tree))
3214 goto cleanup;
3216 if ((need_to_predicate || any_complicated_phi)
3217 && ((!flag_tree_loop_vectorize && !loop->force_vectorize)
3218 || loop->dont_vectorize))
3219 goto cleanup;
3221 /* The edge to insert invariant stmts on. */
3222 pe = loop_preheader_edge (loop);
3224 /* Since we have no cost model, always version loops unless the user
3225 specified -ftree-loop-if-convert or unless versioning is required.
3226 Either version this loop, or if the pattern is right for outer-loop
3227 vectorization, version the outer loop. In the latter case we will
3228 still if-convert the original inner loop. */
3229 if (need_to_predicate
3230 || any_complicated_phi
3231 || flag_tree_loop_if_convert != 1)
3233 class loop *vloop
3234 = (versionable_outer_loop_p (loop_outer (loop))
3235 ? loop_outer (loop) : loop);
3236 class loop *nloop = version_loop_for_if_conversion (vloop, preds);
3237 if (nloop == NULL)
3238 goto cleanup;
3239 if (vloop != loop)
3241 /* If versionable_outer_loop_p decided to version the
3242 outer loop, version also the inner loop of the non-vectorized
3243 loop copy. So we transform:
3244 loop1
3245 loop2
3246 into:
3247 if (LOOP_VECTORIZED (1, 3))
3249 loop1
3250 loop2
3252 else
3253 loop3 (copy of loop1)
3254 if (LOOP_VECTORIZED (4, 5))
3255 loop4 (copy of loop2)
3256 else
3257 loop5 (copy of loop4) */
3258 gcc_assert (nloop->inner && nloop->inner->next == NULL);
3259 rloop = nloop->inner;
3261 else
3262 /* If we versioned loop then make sure to insert invariant
3263 stmts before the .LOOP_VECTORIZED check since the vectorizer
3264 will re-use that for things like runtime alias versioning
3265 whose condition can end up using those invariants. */
3266 pe = single_pred_edge (gimple_bb (preds->last ()));
3269 /* Now all statements are if-convertible. Combine all the basic
3270 blocks into one huge basic block doing the if-conversion
3271 on-the-fly. */
3272 combine_blocks (loop, pe);
3274 /* Perform local CSE, this esp. helps the vectorizer analysis if loads
3275 and stores are involved. CSE only the loop body, not the entry
3276 PHIs, those are to be kept in sync with the non-if-converted copy.
3277 ??? We'll still keep dead stores though. */
3278 exit_bbs = BITMAP_ALLOC (NULL);
3279 bitmap_set_bit (exit_bbs, single_exit (loop)->dest->index);
3280 bitmap_set_bit (exit_bbs, loop->latch->index);
3282 std::pair <tree, tree> *name_pair;
3283 unsigned ssa_names_idx;
3284 FOR_EACH_VEC_ELT (redundant_ssa_names, ssa_names_idx, name_pair)
3285 replace_uses_by (name_pair->first, name_pair->second);
3286 redundant_ssa_names.release ();
3288 todo |= do_rpo_vn (cfun, loop_preheader_edge (loop), exit_bbs);
3290 /* Delete dead predicate computations. */
3291 ifcvt_local_dce (loop);
3292 BITMAP_FREE (exit_bbs);
3294 todo |= TODO_cleanup_cfg;
3296 cleanup:
3297 if (ifc_bbs)
3299 unsigned int i;
3301 for (i = 0; i < loop->num_nodes; i++)
3302 free_bb_predicate (ifc_bbs[i]);
3304 free (ifc_bbs);
3305 ifc_bbs = NULL;
3307 if (rloop != NULL)
3309 loop = rloop;
3310 goto again;
3313 return todo;
3316 /* Tree if-conversion pass management. */
3318 namespace {
3320 const pass_data pass_data_if_conversion =
3322 GIMPLE_PASS, /* type */
3323 "ifcvt", /* name */
3324 OPTGROUP_NONE, /* optinfo_flags */
3325 TV_TREE_LOOP_IFCVT, /* tv_id */
3326 ( PROP_cfg | PROP_ssa ), /* properties_required */
3327 0, /* properties_provided */
3328 0, /* properties_destroyed */
3329 0, /* todo_flags_start */
3330 0, /* todo_flags_finish */
3333 class pass_if_conversion : public gimple_opt_pass
3335 public:
3336 pass_if_conversion (gcc::context *ctxt)
3337 : gimple_opt_pass (pass_data_if_conversion, ctxt)
3340 /* opt_pass methods: */
3341 virtual bool gate (function *);
3342 virtual unsigned int execute (function *);
3344 }; // class pass_if_conversion
3346 bool
3347 pass_if_conversion::gate (function *fun)
3349 return (((flag_tree_loop_vectorize || fun->has_force_vectorize_loops)
3350 && flag_tree_loop_if_convert != 0)
3351 || flag_tree_loop_if_convert == 1);
3354 unsigned int
3355 pass_if_conversion::execute (function *fun)
3357 unsigned todo = 0;
3359 if (number_of_loops (fun) <= 1)
3360 return 0;
3362 auto_vec<gimple *> preds;
3363 for (auto loop : loops_list (cfun, 0))
3364 if (flag_tree_loop_if_convert == 1
3365 || ((flag_tree_loop_vectorize || loop->force_vectorize)
3366 && !loop->dont_vectorize))
3367 todo |= tree_if_conversion (loop, &preds);
3369 if (todo)
3371 free_numbers_of_iterations_estimates (fun);
3372 scev_reset ();
3375 if (flag_checking)
3377 basic_block bb;
3378 FOR_EACH_BB_FN (bb, fun)
3379 gcc_assert (!bb->aux);
3382 /* Perform IL update now, it might elide some loops. */
3383 if (todo & TODO_cleanup_cfg)
3385 cleanup_tree_cfg ();
3386 if (need_ssa_update_p (fun))
3387 todo |= TODO_update_ssa;
3389 if (todo & TODO_update_ssa_any)
3390 update_ssa (todo & TODO_update_ssa_any);
3392 /* If if-conversion elided the loop fall back to the original one. */
3393 for (unsigned i = 0; i < preds.length (); ++i)
3395 gimple *g = preds[i];
3396 if (!gimple_bb (g))
3397 continue;
3398 unsigned ifcvt_loop = tree_to_uhwi (gimple_call_arg (g, 0));
3399 if (!get_loop (fun, ifcvt_loop))
3401 if (dump_file)
3402 fprintf (dump_file, "If-converted loop vanished\n");
3403 fold_loop_internal_call (g, boolean_false_node);
3407 return 0;
3410 } // anon namespace
3412 gimple_opt_pass *
3413 make_pass_if_conversion (gcc::context *ctxt)
3415 return new pass_if_conversion (ctxt);