2015-07-02 Steven G. Kargl <kargl@gcc.gnu.org>
[official-gcc.git] / gcc / tree-if-conv.c
blob003f1ddecd2e312e2009907dc1a3111851410b74
1 /* If-conversion for vectorizer.
2 Copyright (C) 2004-2015 Free Software Foundation, Inc.
3 Contributed by Devang Patel <dpatel@apple.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass implements a tree level if-conversion of loops. Its
22 initial goal is to help the vectorizer to vectorize loops with
23 conditions.
25 A short description of if-conversion:
27 o Decide if a loop is if-convertible or not.
28 o Walk all loop basic blocks in breadth first order (BFS order).
29 o Remove conditional statements (at the end of basic block)
30 and propagate condition into destination basic blocks'
31 predicate list.
32 o Replace modify expression with conditional modify expression
33 using current basic block's condition.
34 o Merge all basic blocks
35 o Replace phi nodes with conditional modify expr
36 o Merge all basic blocks into header
38 Sample transformation:
40 INPUT
41 -----
43 # i_23 = PHI <0(0), i_18(10)>;
44 <L0>:;
45 j_15 = A[i_23];
46 if (j_15 > 41) goto <L1>; else goto <L17>;
48 <L17>:;
49 goto <bb 3> (<L3>);
51 <L1>:;
53 # iftmp.2_4 = PHI <0(8), 42(2)>;
54 <L3>:;
55 A[i_23] = iftmp.2_4;
56 i_18 = i_23 + 1;
57 if (i_18 <= 15) goto <L19>; else goto <L18>;
59 <L19>:;
60 goto <bb 1> (<L0>);
62 <L18>:;
64 OUTPUT
65 ------
67 # i_23 = PHI <0(0), i_18(10)>;
68 <L0>:;
69 j_15 = A[i_23];
71 <L3>:;
72 iftmp.2_4 = j_15 > 41 ? 42 : 0;
73 A[i_23] = iftmp.2_4;
74 i_18 = i_23 + 1;
75 if (i_18 <= 15) goto <L19>; else goto <L18>;
77 <L19>:;
78 goto <bb 1> (<L0>);
80 <L18>:;
83 #include "config.h"
84 #include "system.h"
85 #include "coretypes.h"
86 #include "tm.h"
87 #include "alias.h"
88 #include "symtab.h"
89 #include "tree.h"
90 #include "fold-const.h"
91 #include "stor-layout.h"
92 #include "flags.h"
93 #include "predict.h"
94 #include "hard-reg-set.h"
95 #include "function.h"
96 #include "dominance.h"
97 #include "cfg.h"
98 #include "basic-block.h"
99 #include "gimple-pretty-print.h"
100 #include "tree-ssa-alias.h"
101 #include "internal-fn.h"
102 #include "gimple-fold.h"
103 #include "gimple-expr.h"
104 #include "gimple.h"
105 #include "gimplify.h"
106 #include "gimple-iterator.h"
107 #include "gimplify-me.h"
108 #include "gimple-ssa.h"
109 #include "tree-cfg.h"
110 #include "tree-phinodes.h"
111 #include "ssa-iterators.h"
112 #include "stringpool.h"
113 #include "tree-ssanames.h"
114 #include "tree-into-ssa.h"
115 #include "tree-ssa.h"
116 #include "cfgloop.h"
117 #include "tree-chrec.h"
118 #include "tree-data-ref.h"
119 #include "tree-scalar-evolution.h"
120 #include "tree-ssa-loop-ivopts.h"
121 #include "tree-ssa-address.h"
122 #include "tree-pass.h"
123 #include "dbgcnt.h"
124 #include "rtl.h"
125 #include "insn-config.h"
126 #include "expmed.h"
127 #include "dojump.h"
128 #include "explow.h"
129 #include "calls.h"
130 #include "emit-rtl.h"
131 #include "varasm.h"
132 #include "stmt.h"
133 #include "expr.h"
134 #include "insn-codes.h"
135 #include "optabs.h"
136 #include "tree-hash-traits.h"
138 /* List of basic blocks in if-conversion-suitable order. */
139 static basic_block *ifc_bbs;
141 /* Apply more aggressive (extended) if-conversion if true. */
142 static bool aggressive_if_conv;
144 /* Structure used to predicate basic blocks. This is attached to the
145 ->aux field of the BBs in the loop to be if-converted. */
146 typedef struct bb_predicate_s {
148 /* The condition under which this basic block is executed. */
149 tree predicate;
151 /* PREDICATE is gimplified, and the sequence of statements is
152 recorded here, in order to avoid the duplication of computations
153 that occur in previous conditions. See PR44483. */
154 gimple_seq predicate_gimplified_stmts;
155 } *bb_predicate_p;
157 /* Returns true when the basic block BB has a predicate. */
159 static inline bool
160 bb_has_predicate (basic_block bb)
162 return bb->aux != NULL;
165 /* Returns the gimplified predicate for basic block BB. */
167 static inline tree
168 bb_predicate (basic_block bb)
170 return ((bb_predicate_p) bb->aux)->predicate;
173 /* Sets the gimplified predicate COND for basic block BB. */
175 static inline void
176 set_bb_predicate (basic_block bb, tree cond)
178 gcc_assert ((TREE_CODE (cond) == TRUTH_NOT_EXPR
179 && is_gimple_condexpr (TREE_OPERAND (cond, 0)))
180 || is_gimple_condexpr (cond));
181 ((bb_predicate_p) bb->aux)->predicate = cond;
184 /* Returns the sequence of statements of the gimplification of the
185 predicate for basic block BB. */
187 static inline gimple_seq
188 bb_predicate_gimplified_stmts (basic_block bb)
190 return ((bb_predicate_p) bb->aux)->predicate_gimplified_stmts;
193 /* Sets the sequence of statements STMTS of the gimplification of the
194 predicate for basic block BB. */
196 static inline void
197 set_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
199 ((bb_predicate_p) bb->aux)->predicate_gimplified_stmts = stmts;
202 /* Adds the sequence of statements STMTS to the sequence of statements
203 of the predicate for basic block BB. */
205 static inline void
206 add_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
208 gimple_seq_add_seq
209 (&(((bb_predicate_p) bb->aux)->predicate_gimplified_stmts), stmts);
212 /* Initializes to TRUE the predicate of basic block BB. */
214 static inline void
215 init_bb_predicate (basic_block bb)
217 bb->aux = XNEW (struct bb_predicate_s);
218 set_bb_predicate_gimplified_stmts (bb, NULL);
219 set_bb_predicate (bb, boolean_true_node);
222 /* Release the SSA_NAMEs associated with the predicate of basic block BB,
223 but don't actually free it. */
225 static inline void
226 release_bb_predicate (basic_block bb)
228 gimple_seq stmts = bb_predicate_gimplified_stmts (bb);
229 if (stmts)
231 gimple_stmt_iterator i;
233 for (i = gsi_start (stmts); !gsi_end_p (i); gsi_next (&i))
234 free_stmt_operands (cfun, gsi_stmt (i));
235 set_bb_predicate_gimplified_stmts (bb, NULL);
239 /* Free the predicate of basic block BB. */
241 static inline void
242 free_bb_predicate (basic_block bb)
244 if (!bb_has_predicate (bb))
245 return;
247 release_bb_predicate (bb);
248 free (bb->aux);
249 bb->aux = NULL;
252 /* Reinitialize predicate of BB with the true predicate. */
254 static inline void
255 reset_bb_predicate (basic_block bb)
257 if (!bb_has_predicate (bb))
258 init_bb_predicate (bb);
259 else
261 release_bb_predicate (bb);
262 set_bb_predicate (bb, boolean_true_node);
266 /* Returns a new SSA_NAME of type TYPE that is assigned the value of
267 the expression EXPR. Inserts the statement created for this
268 computation before GSI and leaves the iterator GSI at the same
269 statement. */
271 static tree
272 ifc_temp_var (tree type, tree expr, gimple_stmt_iterator *gsi)
274 tree new_name = make_temp_ssa_name (type, NULL, "_ifc_");
275 gimple stmt = gimple_build_assign (new_name, expr);
276 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
277 return new_name;
280 /* Return true when COND is a true predicate. */
282 static inline bool
283 is_true_predicate (tree cond)
285 return (cond == NULL_TREE
286 || cond == boolean_true_node
287 || integer_onep (cond));
290 /* Returns true when BB has a predicate that is not trivial: true or
291 NULL_TREE. */
293 static inline bool
294 is_predicated (basic_block bb)
296 return !is_true_predicate (bb_predicate (bb));
299 /* Parses the predicate COND and returns its comparison code and
300 operands OP0 and OP1. */
302 static enum tree_code
303 parse_predicate (tree cond, tree *op0, tree *op1)
305 gimple s;
307 if (TREE_CODE (cond) == SSA_NAME
308 && is_gimple_assign (s = SSA_NAME_DEF_STMT (cond)))
310 if (TREE_CODE_CLASS (gimple_assign_rhs_code (s)) == tcc_comparison)
312 *op0 = gimple_assign_rhs1 (s);
313 *op1 = gimple_assign_rhs2 (s);
314 return gimple_assign_rhs_code (s);
317 else if (gimple_assign_rhs_code (s) == TRUTH_NOT_EXPR)
319 tree op = gimple_assign_rhs1 (s);
320 tree type = TREE_TYPE (op);
321 enum tree_code code = parse_predicate (op, op0, op1);
323 return code == ERROR_MARK ? ERROR_MARK
324 : invert_tree_comparison (code, HONOR_NANS (type));
327 return ERROR_MARK;
330 if (COMPARISON_CLASS_P (cond))
332 *op0 = TREE_OPERAND (cond, 0);
333 *op1 = TREE_OPERAND (cond, 1);
334 return TREE_CODE (cond);
337 return ERROR_MARK;
340 /* Returns the fold of predicate C1 OR C2 at location LOC. */
342 static tree
343 fold_or_predicates (location_t loc, tree c1, tree c2)
345 tree op1a, op1b, op2a, op2b;
346 enum tree_code code1 = parse_predicate (c1, &op1a, &op1b);
347 enum tree_code code2 = parse_predicate (c2, &op2a, &op2b);
349 if (code1 != ERROR_MARK && code2 != ERROR_MARK)
351 tree t = maybe_fold_or_comparisons (code1, op1a, op1b,
352 code2, op2a, op2b);
353 if (t)
354 return t;
357 return fold_build2_loc (loc, TRUTH_OR_EXPR, boolean_type_node, c1, c2);
360 /* Returns true if N is either a constant or a SSA_NAME. */
362 static bool
363 constant_or_ssa_name (tree n)
365 switch (TREE_CODE (n))
367 case SSA_NAME:
368 case INTEGER_CST:
369 case REAL_CST:
370 case COMPLEX_CST:
371 case VECTOR_CST:
372 return true;
373 default:
374 return false;
378 /* Returns either a COND_EXPR or the folded expression if the folded
379 expression is a MIN_EXPR, a MAX_EXPR, an ABS_EXPR,
380 a constant or a SSA_NAME. */
382 static tree
383 fold_build_cond_expr (tree type, tree cond, tree rhs, tree lhs)
385 tree rhs1, lhs1, cond_expr;
387 /* If COND is comparison r != 0 and r has boolean type, convert COND
388 to SSA_NAME to accept by vect bool pattern. */
389 if (TREE_CODE (cond) == NE_EXPR)
391 tree op0 = TREE_OPERAND (cond, 0);
392 tree op1 = TREE_OPERAND (cond, 1);
393 if (TREE_CODE (op0) == SSA_NAME
394 && TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE
395 && (integer_zerop (op1)))
396 cond = op0;
398 cond_expr = fold_ternary (COND_EXPR, type, cond,
399 rhs, lhs);
401 if (cond_expr == NULL_TREE)
402 return build3 (COND_EXPR, type, cond, rhs, lhs);
404 STRIP_USELESS_TYPE_CONVERSION (cond_expr);
406 if (constant_or_ssa_name (cond_expr))
407 return cond_expr;
409 if (TREE_CODE (cond_expr) == ABS_EXPR)
411 rhs1 = TREE_OPERAND (cond_expr, 1);
412 STRIP_USELESS_TYPE_CONVERSION (rhs1);
413 if (constant_or_ssa_name (rhs1))
414 return build1 (ABS_EXPR, type, rhs1);
417 if (TREE_CODE (cond_expr) == MIN_EXPR
418 || TREE_CODE (cond_expr) == MAX_EXPR)
420 lhs1 = TREE_OPERAND (cond_expr, 0);
421 STRIP_USELESS_TYPE_CONVERSION (lhs1);
422 rhs1 = TREE_OPERAND (cond_expr, 1);
423 STRIP_USELESS_TYPE_CONVERSION (rhs1);
424 if (constant_or_ssa_name (rhs1)
425 && constant_or_ssa_name (lhs1))
426 return build2 (TREE_CODE (cond_expr), type, lhs1, rhs1);
428 return build3 (COND_EXPR, type, cond, rhs, lhs);
431 /* Add condition NC to the predicate list of basic block BB. LOOP is
432 the loop to be if-converted. Use predicate of cd-equivalent block
433 for join bb if it exists: we call basic blocks bb1 and bb2
434 cd-equivalent if they are executed under the same condition. */
436 static inline void
437 add_to_predicate_list (struct loop *loop, basic_block bb, tree nc)
439 tree bc, *tp;
440 basic_block dom_bb;
442 if (is_true_predicate (nc))
443 return;
445 /* If dominance tells us this basic block is always executed,
446 don't record any predicates for it. */
447 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
448 return;
450 dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb);
451 /* We use notion of cd equivalence to get simpler predicate for
452 join block, e.g. if join block has 2 predecessors with predicates
453 p1 & p2 and p1 & !p2, we'd like to get p1 for it instead of
454 p1 & p2 | p1 & !p2. */
455 if (dom_bb != loop->header
456 && get_immediate_dominator (CDI_POST_DOMINATORS, dom_bb) == bb)
458 gcc_assert (flow_bb_inside_loop_p (loop, dom_bb));
459 bc = bb_predicate (dom_bb);
460 if (!is_true_predicate (bc))
461 set_bb_predicate (bb, bc);
462 else
463 gcc_assert (is_true_predicate (bb_predicate (bb)));
464 if (dump_file && (dump_flags & TDF_DETAILS))
465 fprintf (dump_file, "Use predicate of bb#%d for bb#%d\n",
466 dom_bb->index, bb->index);
467 return;
470 if (!is_predicated (bb))
471 bc = nc;
472 else
474 bc = bb_predicate (bb);
475 bc = fold_or_predicates (EXPR_LOCATION (bc), nc, bc);
476 if (is_true_predicate (bc))
478 reset_bb_predicate (bb);
479 return;
483 /* Allow a TRUTH_NOT_EXPR around the main predicate. */
484 if (TREE_CODE (bc) == TRUTH_NOT_EXPR)
485 tp = &TREE_OPERAND (bc, 0);
486 else
487 tp = &bc;
488 if (!is_gimple_condexpr (*tp))
490 gimple_seq stmts;
491 *tp = force_gimple_operand_1 (*tp, &stmts, is_gimple_condexpr, NULL_TREE);
492 add_bb_predicate_gimplified_stmts (bb, stmts);
494 set_bb_predicate (bb, bc);
497 /* Add the condition COND to the previous condition PREV_COND, and add
498 this to the predicate list of the destination of edge E. LOOP is
499 the loop to be if-converted. */
501 static void
502 add_to_dst_predicate_list (struct loop *loop, edge e,
503 tree prev_cond, tree cond)
505 if (!flow_bb_inside_loop_p (loop, e->dest))
506 return;
508 if (!is_true_predicate (prev_cond))
509 cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
510 prev_cond, cond);
512 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, e->dest))
513 add_to_predicate_list (loop, e->dest, cond);
516 /* Return true if one of the successor edges of BB exits LOOP. */
518 static bool
519 bb_with_exit_edge_p (struct loop *loop, basic_block bb)
521 edge e;
522 edge_iterator ei;
524 FOR_EACH_EDGE (e, ei, bb->succs)
525 if (loop_exit_edge_p (loop, e))
526 return true;
528 return false;
531 /* Return true when PHI is if-convertible. PHI is part of loop LOOP
532 and it belongs to basic block BB.
534 PHI is not if-convertible if:
535 - it has more than 2 arguments.
537 When the flag_tree_loop_if_convert_stores is not set, PHI is not
538 if-convertible if:
539 - a virtual PHI is immediately used in another PHI node,
540 - there is a virtual PHI in a BB other than the loop->header.
541 When the aggressive_if_conv is set, PHI can have more than
542 two arguments. */
544 static bool
545 if_convertible_phi_p (struct loop *loop, basic_block bb, gphi *phi,
546 bool any_mask_load_store)
548 if (dump_file && (dump_flags & TDF_DETAILS))
550 fprintf (dump_file, "-------------------------\n");
551 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
554 if (bb != loop->header)
556 if (gimple_phi_num_args (phi) != 2
557 && !aggressive_if_conv)
559 if (dump_file && (dump_flags & TDF_DETAILS))
560 fprintf (dump_file, "More than two phi node args.\n");
561 return false;
565 if (flag_tree_loop_if_convert_stores || any_mask_load_store)
566 return true;
568 /* When the flag_tree_loop_if_convert_stores is not set, check
569 that there are no memory writes in the branches of the loop to be
570 if-converted. */
571 if (virtual_operand_p (gimple_phi_result (phi)))
573 imm_use_iterator imm_iter;
574 use_operand_p use_p;
576 if (bb != loop->header)
578 if (dump_file && (dump_flags & TDF_DETAILS))
579 fprintf (dump_file, "Virtual phi not on loop->header.\n");
580 return false;
583 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_phi_result (phi))
585 if (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI
586 && USE_STMT (use_p) != (gimple) phi)
588 if (dump_file && (dump_flags & TDF_DETAILS))
589 fprintf (dump_file, "Difficult to handle this virtual phi.\n");
590 return false;
595 return true;
598 /* Records the status of a data reference. This struct is attached to
599 each DR->aux field. */
601 struct ifc_dr {
602 /* -1 when not initialized, 0 when false, 1 when true. */
603 int written_at_least_once;
605 /* -1 when not initialized, 0 when false, 1 when true. */
606 int rw_unconditionally;
609 #define IFC_DR(DR) ((struct ifc_dr *) (DR)->aux)
610 #define DR_WRITTEN_AT_LEAST_ONCE(DR) (IFC_DR (DR)->written_at_least_once)
611 #define DR_RW_UNCONDITIONALLY(DR) (IFC_DR (DR)->rw_unconditionally)
613 /* Returns true when the memory references of STMT are read or written
614 unconditionally. In other words, this function returns true when
615 for every data reference A in STMT there exist other accesses to
616 a data reference with the same base with predicates that add up (OR-up) to
617 the true predicate: this ensures that the data reference A is touched
618 (read or written) on every iteration of the if-converted loop. */
620 static bool
621 memrefs_read_or_written_unconditionally (gimple stmt,
622 vec<data_reference_p> drs)
624 int i, j;
625 data_reference_p a, b;
626 tree ca = bb_predicate (gimple_bb (stmt));
628 for (i = 0; drs.iterate (i, &a); i++)
629 if (DR_STMT (a) == stmt)
631 bool found = false;
632 int x = DR_RW_UNCONDITIONALLY (a);
634 if (x == 0)
635 return false;
637 if (x == 1)
638 continue;
640 for (j = 0; drs.iterate (j, &b); j++)
642 tree ref_base_a = DR_REF (a);
643 tree ref_base_b = DR_REF (b);
645 if (DR_STMT (b) == stmt)
646 continue;
648 while (TREE_CODE (ref_base_a) == COMPONENT_REF
649 || TREE_CODE (ref_base_a) == IMAGPART_EXPR
650 || TREE_CODE (ref_base_a) == REALPART_EXPR)
651 ref_base_a = TREE_OPERAND (ref_base_a, 0);
653 while (TREE_CODE (ref_base_b) == COMPONENT_REF
654 || TREE_CODE (ref_base_b) == IMAGPART_EXPR
655 || TREE_CODE (ref_base_b) == REALPART_EXPR)
656 ref_base_b = TREE_OPERAND (ref_base_b, 0);
658 if (!operand_equal_p (ref_base_a, ref_base_b, 0))
660 tree cb = bb_predicate (gimple_bb (DR_STMT (b)));
662 if (DR_RW_UNCONDITIONALLY (b) == 1
663 || is_true_predicate (cb)
664 || is_true_predicate (ca
665 = fold_or_predicates (EXPR_LOCATION (cb), ca, cb)))
667 DR_RW_UNCONDITIONALLY (a) = 1;
668 DR_RW_UNCONDITIONALLY (b) = 1;
669 found = true;
670 break;
675 if (!found)
677 DR_RW_UNCONDITIONALLY (a) = 0;
678 return false;
682 return true;
685 /* Returns true when the memory references of STMT are unconditionally
686 written. In other words, this function returns true when for every
687 data reference A written in STMT, there exist other writes to the
688 same data reference with predicates that add up (OR-up) to the true
689 predicate: this ensures that the data reference A is written on
690 every iteration of the if-converted loop. */
692 static bool
693 write_memrefs_written_at_least_once (gimple stmt,
694 vec<data_reference_p> drs)
696 int i, j;
697 data_reference_p a, b;
698 tree ca = bb_predicate (gimple_bb (stmt));
700 for (i = 0; drs.iterate (i, &a); i++)
701 if (DR_STMT (a) == stmt
702 && DR_IS_WRITE (a))
704 bool found = false;
705 int x = DR_WRITTEN_AT_LEAST_ONCE (a);
707 if (x == 0)
708 return false;
710 if (x == 1)
711 continue;
713 for (j = 0; drs.iterate (j, &b); j++)
714 if (DR_STMT (b) != stmt
715 && DR_IS_WRITE (b)
716 && same_data_refs_base_objects (a, b))
718 tree cb = bb_predicate (gimple_bb (DR_STMT (b)));
720 if (DR_WRITTEN_AT_LEAST_ONCE (b) == 1
721 || is_true_predicate (cb)
722 || is_true_predicate (ca = fold_or_predicates (EXPR_LOCATION (cb),
723 ca, cb)))
725 DR_WRITTEN_AT_LEAST_ONCE (a) = 1;
726 DR_WRITTEN_AT_LEAST_ONCE (b) = 1;
727 found = true;
728 break;
732 if (!found)
734 DR_WRITTEN_AT_LEAST_ONCE (a) = 0;
735 return false;
739 return true;
742 /* Return true when the memory references of STMT won't trap in the
743 if-converted code. There are two things that we have to check for:
745 - writes to memory occur to writable memory: if-conversion of
746 memory writes transforms the conditional memory writes into
747 unconditional writes, i.e. "if (cond) A[i] = foo" is transformed
748 into "A[i] = cond ? foo : A[i]", and as the write to memory may not
749 be executed at all in the original code, it may be a readonly
750 memory. To check that A is not const-qualified, we check that
751 there exists at least an unconditional write to A in the current
752 function.
754 - reads or writes to memory are valid memory accesses for every
755 iteration. To check that the memory accesses are correctly formed
756 and that we are allowed to read and write in these locations, we
757 check that the memory accesses to be if-converted occur at every
758 iteration unconditionally. */
760 static bool
761 ifcvt_memrefs_wont_trap (gimple stmt, vec<data_reference_p> refs)
763 return write_memrefs_written_at_least_once (stmt, refs)
764 && memrefs_read_or_written_unconditionally (stmt, refs);
767 /* Wrapper around gimple_could_trap_p refined for the needs of the
768 if-conversion. Try to prove that the memory accesses of STMT could
769 not trap in the innermost loop containing STMT. */
771 static bool
772 ifcvt_could_trap_p (gimple stmt, vec<data_reference_p> refs)
774 if (gimple_vuse (stmt)
775 && !gimple_could_trap_p_1 (stmt, false, false)
776 && ifcvt_memrefs_wont_trap (stmt, refs))
777 return false;
779 return gimple_could_trap_p (stmt);
782 /* Return true if STMT could be converted into a masked load or store
783 (conditional load or store based on a mask computed from bb predicate). */
785 static bool
786 ifcvt_can_use_mask_load_store (gimple stmt)
788 tree lhs, ref;
789 machine_mode mode;
790 basic_block bb = gimple_bb (stmt);
791 bool is_load;
793 if (!(flag_tree_loop_vectorize || bb->loop_father->force_vectorize)
794 || bb->loop_father->dont_vectorize
795 || !gimple_assign_single_p (stmt)
796 || gimple_has_volatile_ops (stmt))
797 return false;
799 /* Check whether this is a load or store. */
800 lhs = gimple_assign_lhs (stmt);
801 if (gimple_store_p (stmt))
803 if (!is_gimple_val (gimple_assign_rhs1 (stmt)))
804 return false;
805 is_load = false;
806 ref = lhs;
808 else if (gimple_assign_load_p (stmt))
810 is_load = true;
811 ref = gimple_assign_rhs1 (stmt);
813 else
814 return false;
816 if (may_be_nonaddressable_p (ref))
817 return false;
819 /* Mask should be integer mode of the same size as the load/store
820 mode. */
821 mode = TYPE_MODE (TREE_TYPE (lhs));
822 if (int_mode_for_mode (mode) == BLKmode
823 || VECTOR_MODE_P (mode))
824 return false;
826 if (can_vec_mask_load_store_p (mode, is_load))
827 return true;
829 return false;
832 /* Return true when STMT is if-convertible.
834 GIMPLE_ASSIGN statement is not if-convertible if,
835 - it is not movable,
836 - it could trap,
837 - LHS is not var decl. */
839 static bool
840 if_convertible_gimple_assign_stmt_p (gimple stmt,
841 vec<data_reference_p> refs,
842 bool *any_mask_load_store)
844 tree lhs = gimple_assign_lhs (stmt);
845 basic_block bb;
847 if (dump_file && (dump_flags & TDF_DETAILS))
849 fprintf (dump_file, "-------------------------\n");
850 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
853 if (!is_gimple_reg_type (TREE_TYPE (lhs)))
854 return false;
856 /* Some of these constrains might be too conservative. */
857 if (stmt_ends_bb_p (stmt)
858 || gimple_has_volatile_ops (stmt)
859 || (TREE_CODE (lhs) == SSA_NAME
860 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
861 || gimple_has_side_effects (stmt))
863 if (dump_file && (dump_flags & TDF_DETAILS))
864 fprintf (dump_file, "stmt not suitable for ifcvt\n");
865 return false;
868 /* tree-into-ssa.c uses GF_PLF_1, so avoid it, because
869 in between if_convertible_loop_p and combine_blocks
870 we can perform loop versioning. */
871 gimple_set_plf (stmt, GF_PLF_2, false);
873 if (flag_tree_loop_if_convert_stores)
875 if (ifcvt_could_trap_p (stmt, refs))
877 if (ifcvt_can_use_mask_load_store (stmt))
879 gimple_set_plf (stmt, GF_PLF_2, true);
880 *any_mask_load_store = true;
881 return true;
883 if (dump_file && (dump_flags & TDF_DETAILS))
884 fprintf (dump_file, "tree could trap...\n");
885 return false;
887 return true;
890 if (gimple_assign_rhs_could_trap_p (stmt))
892 if (ifcvt_can_use_mask_load_store (stmt))
894 gimple_set_plf (stmt, GF_PLF_2, true);
895 *any_mask_load_store = true;
896 return true;
898 if (dump_file && (dump_flags & TDF_DETAILS))
899 fprintf (dump_file, "tree could trap...\n");
900 return false;
903 bb = gimple_bb (stmt);
905 if (TREE_CODE (lhs) != SSA_NAME
906 && bb != bb->loop_father->header
907 && !bb_with_exit_edge_p (bb->loop_father, bb))
909 if (ifcvt_can_use_mask_load_store (stmt))
911 gimple_set_plf (stmt, GF_PLF_2, true);
912 *any_mask_load_store = true;
913 return true;
915 if (dump_file && (dump_flags & TDF_DETAILS))
917 fprintf (dump_file, "LHS is not var\n");
918 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
920 return false;
923 return true;
926 /* Return true when STMT is if-convertible.
928 A statement is if-convertible if:
929 - it is an if-convertible GIMPLE_ASSIGN,
930 - it is a GIMPLE_LABEL or a GIMPLE_COND,
931 - it is builtins call. */
933 static bool
934 if_convertible_stmt_p (gimple stmt, vec<data_reference_p> refs,
935 bool *any_mask_load_store)
937 switch (gimple_code (stmt))
939 case GIMPLE_LABEL:
940 case GIMPLE_DEBUG:
941 case GIMPLE_COND:
942 return true;
944 case GIMPLE_ASSIGN:
945 return if_convertible_gimple_assign_stmt_p (stmt, refs,
946 any_mask_load_store);
948 case GIMPLE_CALL:
950 tree fndecl = gimple_call_fndecl (stmt);
951 if (fndecl)
953 int flags = gimple_call_flags (stmt);
954 if ((flags & ECF_CONST)
955 && !(flags & ECF_LOOPING_CONST_OR_PURE)
956 /* We can only vectorize some builtins at the moment,
957 so restrict if-conversion to those. */
958 && DECL_BUILT_IN (fndecl))
959 return true;
961 return false;
964 default:
965 /* Don't know what to do with 'em so don't do anything. */
966 if (dump_file && (dump_flags & TDF_DETAILS))
968 fprintf (dump_file, "don't know what to do\n");
969 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
971 return false;
972 break;
975 return true;
978 /* Assumes that BB has more than 1 predecessors.
979 Returns false if at least one successor is not on critical edge
980 and true otherwise. */
982 static inline bool
983 all_preds_critical_p (basic_block bb)
985 edge e;
986 edge_iterator ei;
988 FOR_EACH_EDGE (e, ei, bb->preds)
989 if (EDGE_COUNT (e->src->succs) == 1)
990 return false;
991 return true;
994 /* Returns true if at least one successor in on critical edge. */
995 static inline bool
996 has_pred_critical_p (basic_block bb)
998 edge e;
999 edge_iterator ei;
1001 FOR_EACH_EDGE (e, ei, bb->preds)
1002 if (EDGE_COUNT (e->src->succs) > 1)
1003 return true;
1004 return false;
1007 /* Return true when BB is if-convertible. This routine does not check
1008 basic block's statements and phis.
1010 A basic block is not if-convertible if:
1011 - it is non-empty and it is after the exit block (in BFS order),
1012 - it is after the exit block but before the latch,
1013 - its edges are not normal.
1015 Last restriction is valid if aggressive_if_conv is false.
1017 EXIT_BB is the basic block containing the exit of the LOOP. BB is
1018 inside LOOP. */
1020 static bool
1021 if_convertible_bb_p (struct loop *loop, basic_block bb, basic_block exit_bb)
1023 edge e;
1024 edge_iterator ei;
1026 if (dump_file && (dump_flags & TDF_DETAILS))
1027 fprintf (dump_file, "----------[%d]-------------\n", bb->index);
1029 if (EDGE_COUNT (bb->succs) > 2)
1030 return false;
1032 if (EDGE_COUNT (bb->preds) > 2
1033 && !aggressive_if_conv)
1034 return false;
1036 if (exit_bb)
1038 if (bb != loop->latch)
1040 if (dump_file && (dump_flags & TDF_DETAILS))
1041 fprintf (dump_file, "basic block after exit bb but before latch\n");
1042 return false;
1044 else if (!empty_block_p (bb))
1046 if (dump_file && (dump_flags & TDF_DETAILS))
1047 fprintf (dump_file, "non empty basic block after exit bb\n");
1048 return false;
1050 else if (bb == loop->latch
1051 && bb != exit_bb
1052 && !dominated_by_p (CDI_DOMINATORS, bb, exit_bb))
1054 if (dump_file && (dump_flags & TDF_DETAILS))
1055 fprintf (dump_file, "latch is not dominated by exit_block\n");
1056 return false;
1060 /* Be less adventurous and handle only normal edges. */
1061 FOR_EACH_EDGE (e, ei, bb->succs)
1062 if (e->flags & (EDGE_EH | EDGE_ABNORMAL | EDGE_IRREDUCIBLE_LOOP))
1064 if (dump_file && (dump_flags & TDF_DETAILS))
1065 fprintf (dump_file, "Difficult to handle edges\n");
1066 return false;
1069 /* At least one incoming edge has to be non-critical as otherwise edge
1070 predicates are not equal to basic-block predicates of the edge
1071 source. This check is skipped if aggressive_if_conv is true. */
1072 if (!aggressive_if_conv
1073 && EDGE_COUNT (bb->preds) > 1
1074 && bb != loop->header
1075 && all_preds_critical_p (bb))
1077 if (dump_file && (dump_flags & TDF_DETAILS))
1078 fprintf (dump_file, "only critical predecessors\n");
1079 return false;
1082 return true;
1085 /* Return true when all predecessor blocks of BB are visited. The
1086 VISITED bitmap keeps track of the visited blocks. */
1088 static bool
1089 pred_blocks_visited_p (basic_block bb, bitmap *visited)
1091 edge e;
1092 edge_iterator ei;
1093 FOR_EACH_EDGE (e, ei, bb->preds)
1094 if (!bitmap_bit_p (*visited, e->src->index))
1095 return false;
1097 return true;
1100 /* Get body of a LOOP in suitable order for if-conversion. It is
1101 caller's responsibility to deallocate basic block list.
1102 If-conversion suitable order is, breadth first sort (BFS) order
1103 with an additional constraint: select a block only if all its
1104 predecessors are already selected. */
1106 static basic_block *
1107 get_loop_body_in_if_conv_order (const struct loop *loop)
1109 basic_block *blocks, *blocks_in_bfs_order;
1110 basic_block bb;
1111 bitmap visited;
1112 unsigned int index = 0;
1113 unsigned int visited_count = 0;
1115 gcc_assert (loop->num_nodes);
1116 gcc_assert (loop->latch != EXIT_BLOCK_PTR_FOR_FN (cfun));
1118 blocks = XCNEWVEC (basic_block, loop->num_nodes);
1119 visited = BITMAP_ALLOC (NULL);
1121 blocks_in_bfs_order = get_loop_body_in_bfs_order (loop);
1123 index = 0;
1124 while (index < loop->num_nodes)
1126 bb = blocks_in_bfs_order [index];
1128 if (bb->flags & BB_IRREDUCIBLE_LOOP)
1130 free (blocks_in_bfs_order);
1131 BITMAP_FREE (visited);
1132 free (blocks);
1133 return NULL;
1136 if (!bitmap_bit_p (visited, bb->index))
1138 if (pred_blocks_visited_p (bb, &visited)
1139 || bb == loop->header)
1141 /* This block is now visited. */
1142 bitmap_set_bit (visited, bb->index);
1143 blocks[visited_count++] = bb;
1147 index++;
1149 if (index == loop->num_nodes
1150 && visited_count != loop->num_nodes)
1151 /* Not done yet. */
1152 index = 0;
1154 free (blocks_in_bfs_order);
1155 BITMAP_FREE (visited);
1156 return blocks;
1159 /* Returns true when the analysis of the predicates for all the basic
1160 blocks in LOOP succeeded.
1162 predicate_bbs first allocates the predicates of the basic blocks.
1163 These fields are then initialized with the tree expressions
1164 representing the predicates under which a basic block is executed
1165 in the LOOP. As the loop->header is executed at each iteration, it
1166 has the "true" predicate. Other statements executed under a
1167 condition are predicated with that condition, for example
1169 | if (x)
1170 | S1;
1171 | else
1172 | S2;
1174 S1 will be predicated with "x", and
1175 S2 will be predicated with "!x". */
1177 static void
1178 predicate_bbs (loop_p loop)
1180 unsigned int i;
1182 for (i = 0; i < loop->num_nodes; i++)
1183 init_bb_predicate (ifc_bbs[i]);
1185 for (i = 0; i < loop->num_nodes; i++)
1187 basic_block bb = ifc_bbs[i];
1188 tree cond;
1189 gimple stmt;
1191 /* The loop latch and loop exit block are always executed and
1192 have no extra conditions to be processed: skip them. */
1193 if (bb == loop->latch
1194 || bb_with_exit_edge_p (loop, bb))
1196 reset_bb_predicate (bb);
1197 continue;
1200 cond = bb_predicate (bb);
1201 stmt = last_stmt (bb);
1202 if (stmt && gimple_code (stmt) == GIMPLE_COND)
1204 tree c2;
1205 edge true_edge, false_edge;
1206 location_t loc = gimple_location (stmt);
1207 tree c = build2_loc (loc, gimple_cond_code (stmt),
1208 boolean_type_node,
1209 gimple_cond_lhs (stmt),
1210 gimple_cond_rhs (stmt));
1212 /* Add new condition into destination's predicate list. */
1213 extract_true_false_edges_from_block (gimple_bb (stmt),
1214 &true_edge, &false_edge);
1216 /* If C is true, then TRUE_EDGE is taken. */
1217 add_to_dst_predicate_list (loop, true_edge, unshare_expr (cond),
1218 unshare_expr (c));
1220 /* If C is false, then FALSE_EDGE is taken. */
1221 c2 = build1_loc (loc, TRUTH_NOT_EXPR, boolean_type_node,
1222 unshare_expr (c));
1223 add_to_dst_predicate_list (loop, false_edge,
1224 unshare_expr (cond), c2);
1226 cond = NULL_TREE;
1229 /* If current bb has only one successor, then consider it as an
1230 unconditional goto. */
1231 if (single_succ_p (bb))
1233 basic_block bb_n = single_succ (bb);
1235 /* The successor bb inherits the predicate of its
1236 predecessor. If there is no predicate in the predecessor
1237 bb, then consider the successor bb as always executed. */
1238 if (cond == NULL_TREE)
1239 cond = boolean_true_node;
1241 add_to_predicate_list (loop, bb_n, cond);
1245 /* The loop header is always executed. */
1246 reset_bb_predicate (loop->header);
1247 gcc_assert (bb_predicate_gimplified_stmts (loop->header) == NULL
1248 && bb_predicate_gimplified_stmts (loop->latch) == NULL);
1251 /* Return true when LOOP is if-convertible. This is a helper function
1252 for if_convertible_loop_p. REFS and DDRS are initialized and freed
1253 in if_convertible_loop_p. */
1255 static bool
1256 if_convertible_loop_p_1 (struct loop *loop,
1257 vec<loop_p> *loop_nest,
1258 vec<data_reference_p> *refs,
1259 vec<ddr_p> *ddrs, bool *any_mask_load_store)
1261 bool res;
1262 unsigned int i;
1263 basic_block exit_bb = NULL;
1265 /* Don't if-convert the loop when the data dependences cannot be
1266 computed: the loop won't be vectorized in that case. */
1267 res = compute_data_dependences_for_loop (loop, true, loop_nest, refs, ddrs);
1268 if (!res)
1269 return false;
1271 calculate_dominance_info (CDI_DOMINATORS);
1272 calculate_dominance_info (CDI_POST_DOMINATORS);
1274 /* Allow statements that can be handled during if-conversion. */
1275 ifc_bbs = get_loop_body_in_if_conv_order (loop);
1276 if (!ifc_bbs)
1278 if (dump_file && (dump_flags & TDF_DETAILS))
1279 fprintf (dump_file, "Irreducible loop\n");
1280 return false;
1283 for (i = 0; i < loop->num_nodes; i++)
1285 basic_block bb = ifc_bbs[i];
1287 if (!if_convertible_bb_p (loop, bb, exit_bb))
1288 return false;
1290 if (bb_with_exit_edge_p (loop, bb))
1291 exit_bb = bb;
1294 for (i = 0; i < loop->num_nodes; i++)
1296 basic_block bb = ifc_bbs[i];
1297 gimple_stmt_iterator gsi;
1299 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1300 switch (gimple_code (gsi_stmt (gsi)))
1302 case GIMPLE_LABEL:
1303 case GIMPLE_ASSIGN:
1304 case GIMPLE_CALL:
1305 case GIMPLE_DEBUG:
1306 case GIMPLE_COND:
1307 break;
1308 default:
1309 return false;
1313 if (flag_tree_loop_if_convert_stores)
1315 data_reference_p dr;
1317 for (i = 0; refs->iterate (i, &dr); i++)
1319 dr->aux = XNEW (struct ifc_dr);
1320 DR_WRITTEN_AT_LEAST_ONCE (dr) = -1;
1321 DR_RW_UNCONDITIONALLY (dr) = -1;
1323 predicate_bbs (loop);
1326 for (i = 0; i < loop->num_nodes; i++)
1328 basic_block bb = ifc_bbs[i];
1329 gimple_stmt_iterator itr;
1331 /* Check the if-convertibility of statements in predicated BBs. */
1332 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1333 for (itr = gsi_start_bb (bb); !gsi_end_p (itr); gsi_next (&itr))
1334 if (!if_convertible_stmt_p (gsi_stmt (itr), *refs,
1335 any_mask_load_store))
1336 return false;
1339 if (flag_tree_loop_if_convert_stores)
1340 for (i = 0; i < loop->num_nodes; i++)
1341 free_bb_predicate (ifc_bbs[i]);
1343 /* Checking PHIs needs to be done after stmts, as the fact whether there
1344 are any masked loads or stores affects the tests. */
1345 for (i = 0; i < loop->num_nodes; i++)
1347 basic_block bb = ifc_bbs[i];
1348 gphi_iterator itr;
1350 for (itr = gsi_start_phis (bb); !gsi_end_p (itr); gsi_next (&itr))
1351 if (!if_convertible_phi_p (loop, bb, itr.phi (),
1352 *any_mask_load_store))
1353 return false;
1356 if (dump_file)
1357 fprintf (dump_file, "Applying if-conversion\n");
1359 return true;
1362 /* Return true when LOOP is if-convertible.
1363 LOOP is if-convertible if:
1364 - it is innermost,
1365 - it has two or more basic blocks,
1366 - it has only one exit,
1367 - loop header is not the exit edge,
1368 - if its basic blocks and phi nodes are if convertible. */
1370 static bool
1371 if_convertible_loop_p (struct loop *loop, bool *any_mask_load_store)
1373 edge e;
1374 edge_iterator ei;
1375 bool res = false;
1376 vec<data_reference_p> refs;
1377 vec<ddr_p> ddrs;
1379 /* Handle only innermost loop. */
1380 if (!loop || loop->inner)
1382 if (dump_file && (dump_flags & TDF_DETAILS))
1383 fprintf (dump_file, "not innermost loop\n");
1384 return false;
1387 /* If only one block, no need for if-conversion. */
1388 if (loop->num_nodes <= 2)
1390 if (dump_file && (dump_flags & TDF_DETAILS))
1391 fprintf (dump_file, "less than 2 basic blocks\n");
1392 return false;
1395 /* More than one loop exit is too much to handle. */
1396 if (!single_exit (loop))
1398 if (dump_file && (dump_flags & TDF_DETAILS))
1399 fprintf (dump_file, "multiple exits\n");
1400 return false;
1403 /* If one of the loop header's edge is an exit edge then do not
1404 apply if-conversion. */
1405 FOR_EACH_EDGE (e, ei, loop->header->succs)
1406 if (loop_exit_edge_p (loop, e))
1407 return false;
1409 refs.create (5);
1410 ddrs.create (25);
1411 auto_vec<loop_p, 3> loop_nest;
1412 res = if_convertible_loop_p_1 (loop, &loop_nest, &refs, &ddrs,
1413 any_mask_load_store);
1415 if (flag_tree_loop_if_convert_stores)
1417 data_reference_p dr;
1418 unsigned int i;
1420 for (i = 0; refs.iterate (i, &dr); i++)
1421 free (dr->aux);
1424 free_data_refs (refs);
1425 free_dependence_relations (ddrs);
1426 return res;
1429 /* Returns true if def-stmt for phi argument ARG is simple increment/decrement
1430 which is in predicated basic block.
1431 In fact, the following PHI pattern is searching:
1432 loop-header:
1433 reduc_1 = PHI <..., reduc_2>
1435 if (...)
1436 reduc_3 = ...
1437 reduc_2 = PHI <reduc_1, reduc_3>
1439 ARG_0 and ARG_1 are correspondent PHI arguments.
1440 REDUC, OP0 and OP1 contain reduction stmt and its operands.
1441 EXTENDED is true if PHI has > 2 arguments. */
1443 static bool
1444 is_cond_scalar_reduction (gimple phi, gimple *reduc, tree arg_0, tree arg_1,
1445 tree *op0, tree *op1, bool extended)
1447 tree lhs, r_op1, r_op2;
1448 gimple stmt;
1449 gimple header_phi = NULL;
1450 enum tree_code reduction_op;
1451 basic_block bb = gimple_bb (phi);
1452 struct loop *loop = bb->loop_father;
1453 edge latch_e = loop_latch_edge (loop);
1454 imm_use_iterator imm_iter;
1455 use_operand_p use_p;
1456 edge e;
1457 edge_iterator ei;
1458 bool result = false;
1459 if (TREE_CODE (arg_0) != SSA_NAME || TREE_CODE (arg_1) != SSA_NAME)
1460 return false;
1462 if (!extended && gimple_code (SSA_NAME_DEF_STMT (arg_0)) == GIMPLE_PHI)
1464 lhs = arg_1;
1465 header_phi = SSA_NAME_DEF_STMT (arg_0);
1466 stmt = SSA_NAME_DEF_STMT (arg_1);
1468 else if (gimple_code (SSA_NAME_DEF_STMT (arg_1)) == GIMPLE_PHI)
1470 lhs = arg_0;
1471 header_phi = SSA_NAME_DEF_STMT (arg_1);
1472 stmt = SSA_NAME_DEF_STMT (arg_0);
1474 else
1475 return false;
1476 if (gimple_bb (header_phi) != loop->header)
1477 return false;
1479 if (PHI_ARG_DEF_FROM_EDGE (header_phi, latch_e) != PHI_RESULT (phi))
1480 return false;
1482 if (gimple_code (stmt) != GIMPLE_ASSIGN
1483 || gimple_has_volatile_ops (stmt))
1484 return false;
1486 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
1487 return false;
1489 if (!is_predicated (gimple_bb (stmt)))
1490 return false;
1492 /* Check that stmt-block is predecessor of phi-block. */
1493 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->succs)
1494 if (e->dest == bb)
1496 result = true;
1497 break;
1499 if (!result)
1500 return false;
1502 if (!has_single_use (lhs))
1503 return false;
1505 reduction_op = gimple_assign_rhs_code (stmt);
1506 if (reduction_op != PLUS_EXPR && reduction_op != MINUS_EXPR)
1507 return false;
1508 r_op1 = gimple_assign_rhs1 (stmt);
1509 r_op2 = gimple_assign_rhs2 (stmt);
1511 /* Make R_OP1 to hold reduction variable. */
1512 if (r_op2 == PHI_RESULT (header_phi)
1513 && reduction_op == PLUS_EXPR)
1514 std::swap (r_op1, r_op2);
1515 else if (r_op1 != PHI_RESULT (header_phi))
1516 return false;
1518 /* Check that R_OP1 is used in reduction stmt or in PHI only. */
1519 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_op1)
1521 gimple use_stmt = USE_STMT (use_p);
1522 if (is_gimple_debug (use_stmt))
1523 continue;
1524 if (use_stmt == stmt)
1525 continue;
1526 if (gimple_code (use_stmt) != GIMPLE_PHI)
1527 return false;
1530 *op0 = r_op1; *op1 = r_op2;
1531 *reduc = stmt;
1532 return true;
1535 /* Converts conditional scalar reduction into unconditional form, e.g.
1536 bb_4
1537 if (_5 != 0) goto bb_5 else goto bb_6
1538 end_bb_4
1539 bb_5
1540 res_6 = res_13 + 1;
1541 end_bb_5
1542 bb_6
1543 # res_2 = PHI <res_13(4), res_6(5)>
1544 end_bb_6
1546 will be converted into sequence
1547 _ifc__1 = _5 != 0 ? 1 : 0;
1548 res_2 = res_13 + _ifc__1;
1549 Argument SWAP tells that arguments of conditional expression should be
1550 swapped.
1551 Returns rhs of resulting PHI assignment. */
1553 static tree
1554 convert_scalar_cond_reduction (gimple reduc, gimple_stmt_iterator *gsi,
1555 tree cond, tree op0, tree op1, bool swap)
1557 gimple_stmt_iterator stmt_it;
1558 gimple new_assign;
1559 tree rhs;
1560 tree rhs1 = gimple_assign_rhs1 (reduc);
1561 tree tmp = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, "_ifc_");
1562 tree c;
1563 tree zero = build_zero_cst (TREE_TYPE (rhs1));
1565 if (dump_file && (dump_flags & TDF_DETAILS))
1567 fprintf (dump_file, "Found cond scalar reduction.\n");
1568 print_gimple_stmt (dump_file, reduc, 0, TDF_SLIM);
1571 /* Build cond expression using COND and constant operand
1572 of reduction rhs. */
1573 c = fold_build_cond_expr (TREE_TYPE (rhs1),
1574 unshare_expr (cond),
1575 swap ? zero : op1,
1576 swap ? op1 : zero);
1578 /* Create assignment stmt and insert it at GSI. */
1579 new_assign = gimple_build_assign (tmp, c);
1580 gsi_insert_before (gsi, new_assign, GSI_SAME_STMT);
1581 /* Build rhs for unconditional increment/decrement. */
1582 rhs = fold_build2 (gimple_assign_rhs_code (reduc),
1583 TREE_TYPE (rhs1), op0, tmp);
1585 /* Delete original reduction stmt. */
1586 stmt_it = gsi_for_stmt (reduc);
1587 gsi_remove (&stmt_it, true);
1588 release_defs (reduc);
1589 return rhs;
1592 /* Produce condition for all occurrences of ARG in PHI node. */
1594 static tree
1595 gen_phi_arg_condition (gphi *phi, vec<int> *occur,
1596 gimple_stmt_iterator *gsi)
1598 int len;
1599 int i;
1600 tree cond = NULL_TREE;
1601 tree c;
1602 edge e;
1604 len = occur->length ();
1605 gcc_assert (len > 0);
1606 for (i = 0; i < len; i++)
1608 e = gimple_phi_arg_edge (phi, (*occur)[i]);
1609 c = bb_predicate (e->src);
1610 if (is_true_predicate (c))
1611 continue;
1612 c = force_gimple_operand_gsi_1 (gsi, unshare_expr (c),
1613 is_gimple_condexpr, NULL_TREE,
1614 true, GSI_SAME_STMT);
1615 if (cond != NULL_TREE)
1617 /* Must build OR expression. */
1618 cond = fold_or_predicates (EXPR_LOCATION (c), c, cond);
1619 cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond),
1620 is_gimple_condexpr, NULL_TREE,
1621 true, GSI_SAME_STMT);
1623 else
1624 cond = c;
1626 gcc_assert (cond != NULL_TREE);
1627 return cond;
1630 /* Replace a scalar PHI node with a COND_EXPR using COND as condition.
1631 This routine can handle PHI nodes with more than two arguments.
1633 For example,
1634 S1: A = PHI <x1(1), x2(5)>
1635 is converted into,
1636 S2: A = cond ? x1 : x2;
1638 The generated code is inserted at GSI that points to the top of
1639 basic block's statement list.
1640 If PHI node has more than two arguments a chain of conditional
1641 expression is produced. */
1644 static void
1645 predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
1647 gimple new_stmt = NULL, reduc;
1648 tree rhs, res, arg0, arg1, op0, op1, scev;
1649 tree cond;
1650 unsigned int index0;
1651 unsigned int max, args_len;
1652 edge e;
1653 basic_block bb;
1654 unsigned int i;
1656 res = gimple_phi_result (phi);
1657 if (virtual_operand_p (res))
1658 return;
1660 if ((rhs = degenerate_phi_result (phi))
1661 || ((scev = analyze_scalar_evolution (gimple_bb (phi)->loop_father,
1662 res))
1663 && !chrec_contains_undetermined (scev)
1664 && scev != res
1665 && (rhs = gimple_phi_arg_def (phi, 0))))
1667 if (dump_file && (dump_flags & TDF_DETAILS))
1669 fprintf (dump_file, "Degenerate phi!\n");
1670 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
1672 new_stmt = gimple_build_assign (res, rhs);
1673 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1674 update_stmt (new_stmt);
1675 return;
1678 bb = gimple_bb (phi);
1679 if (EDGE_COUNT (bb->preds) == 2)
1681 /* Predicate ordinary PHI node with 2 arguments. */
1682 edge first_edge, second_edge;
1683 basic_block true_bb;
1684 first_edge = EDGE_PRED (bb, 0);
1685 second_edge = EDGE_PRED (bb, 1);
1686 cond = bb_predicate (first_edge->src);
1687 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1688 std::swap (first_edge, second_edge);
1689 if (EDGE_COUNT (first_edge->src->succs) > 1)
1691 cond = bb_predicate (second_edge->src);
1692 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1693 cond = TREE_OPERAND (cond, 0);
1694 else
1695 first_edge = second_edge;
1697 else
1698 cond = bb_predicate (first_edge->src);
1699 /* Gimplify the condition to a valid cond-expr conditonal operand. */
1700 cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond),
1701 is_gimple_condexpr, NULL_TREE,
1702 true, GSI_SAME_STMT);
1703 true_bb = first_edge->src;
1704 if (EDGE_PRED (bb, 1)->src == true_bb)
1706 arg0 = gimple_phi_arg_def (phi, 1);
1707 arg1 = gimple_phi_arg_def (phi, 0);
1709 else
1711 arg0 = gimple_phi_arg_def (phi, 0);
1712 arg1 = gimple_phi_arg_def (phi, 1);
1714 if (is_cond_scalar_reduction (phi, &reduc, arg0, arg1,
1715 &op0, &op1, false))
1716 /* Convert reduction stmt into vectorizable form. */
1717 rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
1718 true_bb != gimple_bb (reduc));
1719 else
1720 /* Build new RHS using selected condition and arguments. */
1721 rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
1722 arg0, arg1);
1723 new_stmt = gimple_build_assign (res, rhs);
1724 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1725 update_stmt (new_stmt);
1727 if (dump_file && (dump_flags & TDF_DETAILS))
1729 fprintf (dump_file, "new phi replacement stmt\n");
1730 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
1732 return;
1735 /* Create hashmap for PHI node which contain vector of argument indexes
1736 having the same value. */
1737 bool swap = false;
1738 hash_map<tree_operand_hash, auto_vec<int> > phi_arg_map;
1739 unsigned int num_args = gimple_phi_num_args (phi);
1740 int max_ind = -1;
1741 /* Vector of different PHI argument values. */
1742 auto_vec<tree> args (num_args);
1744 /* Compute phi_arg_map. */
1745 for (i = 0; i < num_args; i++)
1747 tree arg;
1749 arg = gimple_phi_arg_def (phi, i);
1750 if (!phi_arg_map.get (arg))
1751 args.quick_push (arg);
1752 phi_arg_map.get_or_insert (arg).safe_push (i);
1755 /* Determine element with max number of occurrences. */
1756 max_ind = -1;
1757 max = 1;
1758 args_len = args.length ();
1759 for (i = 0; i < args_len; i++)
1761 unsigned int len;
1762 if ((len = phi_arg_map.get (args[i])->length ()) > max)
1764 max_ind = (int) i;
1765 max = len;
1769 /* Put element with max number of occurences to the end of ARGS. */
1770 if (max_ind != -1 && max_ind +1 != (int) args_len)
1771 std::swap (args[args_len - 1], args[max_ind]);
1773 /* Handle one special case when number of arguments with different values
1774 is equal 2 and one argument has the only occurrence. Such PHI can be
1775 handled as if would have only 2 arguments. */
1776 if (args_len == 2 && phi_arg_map.get (args[0])->length () == 1)
1778 vec<int> *indexes;
1779 indexes = phi_arg_map.get (args[0]);
1780 index0 = (*indexes)[0];
1781 arg0 = args[0];
1782 arg1 = args[1];
1783 e = gimple_phi_arg_edge (phi, index0);
1784 cond = bb_predicate (e->src);
1785 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1787 swap = true;
1788 cond = TREE_OPERAND (cond, 0);
1790 /* Gimplify the condition to a valid cond-expr conditonal operand. */
1791 cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond),
1792 is_gimple_condexpr, NULL_TREE,
1793 true, GSI_SAME_STMT);
1794 if (!(is_cond_scalar_reduction (phi, &reduc, arg0 , arg1,
1795 &op0, &op1, true)))
1796 rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
1797 swap? arg1 : arg0,
1798 swap? arg0 : arg1);
1799 else
1800 /* Convert reduction stmt into vectorizable form. */
1801 rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
1802 swap);
1803 new_stmt = gimple_build_assign (res, rhs);
1804 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1805 update_stmt (new_stmt);
1807 else
1809 /* Common case. */
1810 vec<int> *indexes;
1811 tree type = TREE_TYPE (gimple_phi_result (phi));
1812 tree lhs;
1813 arg1 = args[1];
1814 for (i = 0; i < args_len; i++)
1816 arg0 = args[i];
1817 indexes = phi_arg_map.get (args[i]);
1818 if (i != args_len - 1)
1819 lhs = make_temp_ssa_name (type, NULL, "_ifc_");
1820 else
1821 lhs = res;
1822 cond = gen_phi_arg_condition (phi, indexes, gsi);
1823 rhs = fold_build_cond_expr (type, unshare_expr (cond),
1824 arg0, arg1);
1825 new_stmt = gimple_build_assign (lhs, rhs);
1826 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1827 update_stmt (new_stmt);
1828 arg1 = lhs;
1832 if (dump_file && (dump_flags & TDF_DETAILS))
1834 fprintf (dump_file, "new extended phi replacement stmt\n");
1835 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
1839 /* Replaces in LOOP all the scalar phi nodes other than those in the
1840 LOOP->header block with conditional modify expressions. */
1842 static void
1843 predicate_all_scalar_phis (struct loop *loop)
1845 basic_block bb;
1846 unsigned int orig_loop_num_nodes = loop->num_nodes;
1847 unsigned int i;
1849 for (i = 1; i < orig_loop_num_nodes; i++)
1851 gphi *phi;
1852 gimple_stmt_iterator gsi;
1853 gphi_iterator phi_gsi;
1854 bb = ifc_bbs[i];
1856 if (bb == loop->header)
1857 continue;
1859 if (EDGE_COUNT (bb->preds) == 1)
1860 continue;
1862 phi_gsi = gsi_start_phis (bb);
1863 if (gsi_end_p (phi_gsi))
1864 continue;
1866 gsi = gsi_after_labels (bb);
1867 while (!gsi_end_p (phi_gsi))
1869 phi = phi_gsi.phi ();
1870 predicate_scalar_phi (phi, &gsi);
1871 release_phi_node (phi);
1872 gsi_next (&phi_gsi);
1875 set_phi_nodes (bb, NULL);
1879 /* Insert in each basic block of LOOP the statements produced by the
1880 gimplification of the predicates. */
1882 static void
1883 insert_gimplified_predicates (loop_p loop, bool any_mask_load_store)
1885 unsigned int i;
1887 for (i = 0; i < loop->num_nodes; i++)
1889 basic_block bb = ifc_bbs[i];
1890 gimple_seq stmts;
1891 if (!is_predicated (bb))
1892 gcc_assert (bb_predicate_gimplified_stmts (bb) == NULL);
1893 if (!is_predicated (bb))
1895 /* Do not insert statements for a basic block that is not
1896 predicated. Also make sure that the predicate of the
1897 basic block is set to true. */
1898 reset_bb_predicate (bb);
1899 continue;
1902 stmts = bb_predicate_gimplified_stmts (bb);
1903 if (stmts)
1905 if (flag_tree_loop_if_convert_stores
1906 || any_mask_load_store)
1908 /* Insert the predicate of the BB just after the label,
1909 as the if-conversion of memory writes will use this
1910 predicate. */
1911 gimple_stmt_iterator gsi = gsi_after_labels (bb);
1912 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1914 else
1916 /* Insert the predicate of the BB at the end of the BB
1917 as this would reduce the register pressure: the only
1918 use of this predicate will be in successor BBs. */
1919 gimple_stmt_iterator gsi = gsi_last_bb (bb);
1921 if (gsi_end_p (gsi)
1922 || stmt_ends_bb_p (gsi_stmt (gsi)))
1923 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1924 else
1925 gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
1928 /* Once the sequence is code generated, set it to NULL. */
1929 set_bb_predicate_gimplified_stmts (bb, NULL);
1934 /* Helper function for predicate_mem_writes. Returns index of existent
1935 mask if it was created for given SIZE and -1 otherwise. */
1937 static int
1938 mask_exists (int size, vec<int> vec)
1940 unsigned int ix;
1941 int v;
1942 FOR_EACH_VEC_ELT (vec, ix, v)
1943 if (v == size)
1944 return (int) ix;
1945 return -1;
1948 /* Predicate each write to memory in LOOP.
1950 This function transforms control flow constructs containing memory
1951 writes of the form:
1953 | for (i = 0; i < N; i++)
1954 | if (cond)
1955 | A[i] = expr;
1957 into the following form that does not contain control flow:
1959 | for (i = 0; i < N; i++)
1960 | A[i] = cond ? expr : A[i];
1962 The original CFG looks like this:
1964 | bb_0
1965 | i = 0
1966 | end_bb_0
1968 | bb_1
1969 | if (i < N) goto bb_5 else goto bb_2
1970 | end_bb_1
1972 | bb_2
1973 | cond = some_computation;
1974 | if (cond) goto bb_3 else goto bb_4
1975 | end_bb_2
1977 | bb_3
1978 | A[i] = expr;
1979 | goto bb_4
1980 | end_bb_3
1982 | bb_4
1983 | goto bb_1
1984 | end_bb_4
1986 insert_gimplified_predicates inserts the computation of the COND
1987 expression at the beginning of the destination basic block:
1989 | bb_0
1990 | i = 0
1991 | end_bb_0
1993 | bb_1
1994 | if (i < N) goto bb_5 else goto bb_2
1995 | end_bb_1
1997 | bb_2
1998 | cond = some_computation;
1999 | if (cond) goto bb_3 else goto bb_4
2000 | end_bb_2
2002 | bb_3
2003 | cond = some_computation;
2004 | A[i] = expr;
2005 | goto bb_4
2006 | end_bb_3
2008 | bb_4
2009 | goto bb_1
2010 | end_bb_4
2012 predicate_mem_writes is then predicating the memory write as follows:
2014 | bb_0
2015 | i = 0
2016 | end_bb_0
2018 | bb_1
2019 | if (i < N) goto bb_5 else goto bb_2
2020 | end_bb_1
2022 | bb_2
2023 | if (cond) goto bb_3 else goto bb_4
2024 | end_bb_2
2026 | bb_3
2027 | cond = some_computation;
2028 | A[i] = cond ? expr : A[i];
2029 | goto bb_4
2030 | end_bb_3
2032 | bb_4
2033 | goto bb_1
2034 | end_bb_4
2036 and finally combine_blocks removes the basic block boundaries making
2037 the loop vectorizable:
2039 | bb_0
2040 | i = 0
2041 | if (i < N) goto bb_5 else goto bb_1
2042 | end_bb_0
2044 | bb_1
2045 | cond = some_computation;
2046 | A[i] = cond ? expr : A[i];
2047 | if (i < N) goto bb_5 else goto bb_4
2048 | end_bb_1
2050 | bb_4
2051 | goto bb_1
2052 | end_bb_4
2055 static void
2056 predicate_mem_writes (loop_p loop)
2058 unsigned int i, orig_loop_num_nodes = loop->num_nodes;
2059 auto_vec<int, 1> vect_sizes;
2060 auto_vec<tree, 1> vect_masks;
2062 for (i = 1; i < orig_loop_num_nodes; i++)
2064 gimple_stmt_iterator gsi;
2065 basic_block bb = ifc_bbs[i];
2066 tree cond = bb_predicate (bb);
2067 bool swap;
2068 gimple stmt;
2069 int index;
2071 if (is_true_predicate (cond))
2072 continue;
2074 swap = false;
2075 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2077 swap = true;
2078 cond = TREE_OPERAND (cond, 0);
2081 vect_sizes.truncate (0);
2082 vect_masks.truncate (0);
2084 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2085 if (!gimple_assign_single_p (stmt = gsi_stmt (gsi)))
2086 continue;
2087 else if (gimple_plf (stmt, GF_PLF_2))
2089 tree lhs = gimple_assign_lhs (stmt);
2090 tree rhs = gimple_assign_rhs1 (stmt);
2091 tree ref, addr, ptr, masktype, mask_op0, mask_op1, mask;
2092 gimple new_stmt;
2093 int bitsize = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (lhs)));
2094 ref = TREE_CODE (lhs) == SSA_NAME ? rhs : lhs;
2095 mark_addressable (ref);
2096 addr = force_gimple_operand_gsi (&gsi, build_fold_addr_expr (ref),
2097 true, NULL_TREE, true,
2098 GSI_SAME_STMT);
2099 if (!vect_sizes.is_empty ()
2100 && (index = mask_exists (bitsize, vect_sizes)) != -1)
2101 /* Use created mask. */
2102 mask = vect_masks[index];
2103 else
2105 masktype = build_nonstandard_integer_type (bitsize, 1);
2106 mask_op0 = build_int_cst (masktype, swap ? 0 : -1);
2107 mask_op1 = build_int_cst (masktype, swap ? -1 : 0);
2108 cond = force_gimple_operand_gsi_1 (&gsi, unshare_expr (cond),
2109 is_gimple_condexpr,
2110 NULL_TREE,
2111 true, GSI_SAME_STMT);
2112 mask = fold_build_cond_expr (masktype, unshare_expr (cond),
2113 mask_op0, mask_op1);
2114 mask = ifc_temp_var (masktype, mask, &gsi);
2115 /* Save mask and its size for further use. */
2116 vect_sizes.safe_push (bitsize);
2117 vect_masks.safe_push (mask);
2119 ptr = build_int_cst (reference_alias_ptr_type (ref), 0);
2120 /* Copy points-to info if possible. */
2121 if (TREE_CODE (addr) == SSA_NAME && !SSA_NAME_PTR_INFO (addr))
2122 copy_ref_info (build2 (MEM_REF, TREE_TYPE (ref), addr, ptr),
2123 ref);
2124 if (TREE_CODE (lhs) == SSA_NAME)
2126 new_stmt
2127 = gimple_build_call_internal (IFN_MASK_LOAD, 3, addr,
2128 ptr, mask);
2129 gimple_call_set_lhs (new_stmt, lhs);
2131 else
2132 new_stmt
2133 = gimple_build_call_internal (IFN_MASK_STORE, 4, addr, ptr,
2134 mask, rhs);
2135 gsi_replace (&gsi, new_stmt, true);
2137 else if (gimple_vdef (stmt))
2139 tree lhs = gimple_assign_lhs (stmt);
2140 tree rhs = gimple_assign_rhs1 (stmt);
2141 tree type = TREE_TYPE (lhs);
2143 lhs = ifc_temp_var (type, unshare_expr (lhs), &gsi);
2144 rhs = ifc_temp_var (type, unshare_expr (rhs), &gsi);
2145 if (swap)
2146 std::swap (lhs, rhs);
2147 cond = force_gimple_operand_gsi_1 (&gsi, unshare_expr (cond),
2148 is_gimple_condexpr, NULL_TREE,
2149 true, GSI_SAME_STMT);
2150 rhs = fold_build_cond_expr (type, unshare_expr (cond), rhs, lhs);
2151 gimple_assign_set_rhs1 (stmt, ifc_temp_var (type, rhs, &gsi));
2152 update_stmt (stmt);
2157 /* Remove all GIMPLE_CONDs and GIMPLE_LABELs of all the basic blocks
2158 other than the exit and latch of the LOOP. Also resets the
2159 GIMPLE_DEBUG information. */
2161 static void
2162 remove_conditions_and_labels (loop_p loop)
2164 gimple_stmt_iterator gsi;
2165 unsigned int i;
2167 for (i = 0; i < loop->num_nodes; i++)
2169 basic_block bb = ifc_bbs[i];
2171 if (bb_with_exit_edge_p (loop, bb)
2172 || bb == loop->latch)
2173 continue;
2175 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
2176 switch (gimple_code (gsi_stmt (gsi)))
2178 case GIMPLE_COND:
2179 case GIMPLE_LABEL:
2180 gsi_remove (&gsi, true);
2181 break;
2183 case GIMPLE_DEBUG:
2184 /* ??? Should there be conditional GIMPLE_DEBUG_BINDs? */
2185 if (gimple_debug_bind_p (gsi_stmt (gsi)))
2187 gimple_debug_bind_reset_value (gsi_stmt (gsi));
2188 update_stmt (gsi_stmt (gsi));
2190 gsi_next (&gsi);
2191 break;
2193 default:
2194 gsi_next (&gsi);
2199 /* Combine all the basic blocks from LOOP into one or two super basic
2200 blocks. Replace PHI nodes with conditional modify expressions. */
2202 static void
2203 combine_blocks (struct loop *loop, bool any_mask_load_store)
2205 basic_block bb, exit_bb, merge_target_bb;
2206 unsigned int orig_loop_num_nodes = loop->num_nodes;
2207 unsigned int i;
2208 edge e;
2209 edge_iterator ei;
2211 predicate_bbs (loop);
2212 remove_conditions_and_labels (loop);
2213 insert_gimplified_predicates (loop, any_mask_load_store);
2214 predicate_all_scalar_phis (loop);
2216 if (flag_tree_loop_if_convert_stores || any_mask_load_store)
2217 predicate_mem_writes (loop);
2219 /* Merge basic blocks: first remove all the edges in the loop,
2220 except for those from the exit block. */
2221 exit_bb = NULL;
2222 for (i = 0; i < orig_loop_num_nodes; i++)
2224 bb = ifc_bbs[i];
2225 free_bb_predicate (bb);
2226 if (bb_with_exit_edge_p (loop, bb))
2228 gcc_assert (exit_bb == NULL);
2229 exit_bb = bb;
2232 gcc_assert (exit_bb != loop->latch);
2234 for (i = 1; i < orig_loop_num_nodes; i++)
2236 bb = ifc_bbs[i];
2238 for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));)
2240 if (e->src == exit_bb)
2241 ei_next (&ei);
2242 else
2243 remove_edge (e);
2247 if (exit_bb != NULL)
2249 if (exit_bb != loop->header)
2251 /* Connect this node to loop header. */
2252 make_edge (loop->header, exit_bb, EDGE_FALLTHRU);
2253 set_immediate_dominator (CDI_DOMINATORS, exit_bb, loop->header);
2256 /* Redirect non-exit edges to loop->latch. */
2257 FOR_EACH_EDGE (e, ei, exit_bb->succs)
2259 if (!loop_exit_edge_p (loop, e))
2260 redirect_edge_and_branch (e, loop->latch);
2262 set_immediate_dominator (CDI_DOMINATORS, loop->latch, exit_bb);
2264 else
2266 /* If the loop does not have an exit, reconnect header and latch. */
2267 make_edge (loop->header, loop->latch, EDGE_FALLTHRU);
2268 set_immediate_dominator (CDI_DOMINATORS, loop->latch, loop->header);
2271 merge_target_bb = loop->header;
2272 for (i = 1; i < orig_loop_num_nodes; i++)
2274 gimple_stmt_iterator gsi;
2275 gimple_stmt_iterator last;
2277 bb = ifc_bbs[i];
2279 if (bb == exit_bb || bb == loop->latch)
2280 continue;
2282 /* Make stmts member of loop->header. */
2283 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2284 gimple_set_bb (gsi_stmt (gsi), merge_target_bb);
2286 /* Update stmt list. */
2287 last = gsi_last_bb (merge_target_bb);
2288 gsi_insert_seq_after (&last, bb_seq (bb), GSI_NEW_STMT);
2289 set_bb_seq (bb, NULL);
2291 delete_basic_block (bb);
2294 /* If possible, merge loop header to the block with the exit edge.
2295 This reduces the number of basic blocks to two, to please the
2296 vectorizer that handles only loops with two nodes. */
2297 if (exit_bb
2298 && exit_bb != loop->header
2299 && can_merge_blocks_p (loop->header, exit_bb))
2300 merge_blocks (loop->header, exit_bb);
2302 free (ifc_bbs);
2303 ifc_bbs = NULL;
2306 /* Version LOOP before if-converting it, the original loop
2307 will be then if-converted, the new copy of the loop will not,
2308 and the LOOP_VECTORIZED internal call will be guarding which
2309 loop to execute. The vectorizer pass will fold this
2310 internal call into either true or false. */
2312 static bool
2313 version_loop_for_if_conversion (struct loop *loop)
2315 basic_block cond_bb;
2316 tree cond = make_ssa_name (boolean_type_node);
2317 struct loop *new_loop;
2318 gimple g;
2319 gimple_stmt_iterator gsi;
2321 g = gimple_build_call_internal (IFN_LOOP_VECTORIZED, 2,
2322 build_int_cst (integer_type_node, loop->num),
2323 integer_zero_node);
2324 gimple_call_set_lhs (g, cond);
2326 initialize_original_copy_tables ();
2327 new_loop = loop_version (loop, cond, &cond_bb,
2328 REG_BR_PROB_BASE, REG_BR_PROB_BASE,
2329 REG_BR_PROB_BASE, true);
2330 free_original_copy_tables ();
2331 if (new_loop == NULL)
2332 return false;
2333 new_loop->dont_vectorize = true;
2334 new_loop->force_vectorize = false;
2335 gsi = gsi_last_bb (cond_bb);
2336 gimple_call_set_arg (g, 1, build_int_cst (integer_type_node, new_loop->num));
2337 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2338 update_ssa (TODO_update_ssa);
2339 return true;
2342 /* Performs splitting of critical edges if aggressive_if_conv is true.
2343 Returns false if loop won't be if converted and true otherwise. */
2345 static bool
2346 ifcvt_split_critical_edges (struct loop *loop)
2348 basic_block *body;
2349 basic_block bb;
2350 unsigned int num = loop->num_nodes;
2351 unsigned int i;
2352 gimple stmt;
2353 edge e;
2354 edge_iterator ei;
2356 if (num <= 2)
2357 return false;
2358 if (loop->inner)
2359 return false;
2360 if (!single_exit (loop))
2361 return false;
2363 body = get_loop_body (loop);
2364 for (i = 0; i < num; i++)
2366 bb = body[i];
2367 if (bb == loop->latch
2368 || bb_with_exit_edge_p (loop, bb))
2369 continue;
2370 stmt = last_stmt (bb);
2371 /* Skip basic blocks not ending with conditional branch. */
2372 if (!(stmt && gimple_code (stmt) == GIMPLE_COND))
2373 continue;
2374 FOR_EACH_EDGE (e, ei, bb->succs)
2375 if (EDGE_CRITICAL_P (e) && e->dest->loop_father == loop)
2376 split_edge (e);
2378 free (body);
2379 return true;
2382 /* Assumes that lhs of DEF_STMT have multiple uses.
2383 Delete one use by (1) creation of copy DEF_STMT with
2384 unique lhs; (2) change original use of lhs in one
2385 use statement with newly created lhs. */
2387 static void
2388 ifcvt_split_def_stmt (gimple def_stmt, gimple use_stmt)
2390 tree var;
2391 tree lhs;
2392 gimple copy_stmt;
2393 gimple_stmt_iterator gsi;
2394 use_operand_p use_p;
2395 imm_use_iterator imm_iter;
2397 var = gimple_assign_lhs (def_stmt);
2398 copy_stmt = gimple_copy (def_stmt);
2399 lhs = make_temp_ssa_name (TREE_TYPE (var), NULL, "_ifc_");
2400 gimple_assign_set_lhs (copy_stmt, lhs);
2401 SSA_NAME_DEF_STMT (lhs) = copy_stmt;
2402 /* Insert copy of DEF_STMT. */
2403 gsi = gsi_for_stmt (def_stmt);
2404 gsi_insert_after (&gsi, copy_stmt, GSI_SAME_STMT);
2405 /* Change use of var to lhs in use_stmt. */
2406 if (dump_file && (dump_flags & TDF_DETAILS))
2408 fprintf (dump_file, "Change use of var ");
2409 print_generic_expr (dump_file, var, TDF_SLIM);
2410 fprintf (dump_file, " to ");
2411 print_generic_expr (dump_file, lhs, TDF_SLIM);
2412 fprintf (dump_file, "\n");
2414 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, var)
2416 if (USE_STMT (use_p) != use_stmt)
2417 continue;
2418 SET_USE (use_p, lhs);
2419 break;
2423 /* Traverse bool pattern recursively starting from VAR.
2424 Save its def and use statements to defuse_list if VAR does
2425 not have single use. */
2427 static void
2428 ifcvt_walk_pattern_tree (tree var, vec<gimple> *defuse_list,
2429 gimple use_stmt)
2431 tree rhs1, rhs2;
2432 enum tree_code code;
2433 gimple def_stmt;
2435 def_stmt = SSA_NAME_DEF_STMT (var);
2436 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
2437 return;
2438 if (!has_single_use (var))
2440 /* Put def and use stmts into defuse_list. */
2441 defuse_list->safe_push (def_stmt);
2442 defuse_list->safe_push (use_stmt);
2443 if (dump_file && (dump_flags & TDF_DETAILS))
2445 fprintf (dump_file, "Multiple lhs uses in stmt\n");
2446 print_gimple_stmt (dump_file, def_stmt, 0, TDF_SLIM);
2449 rhs1 = gimple_assign_rhs1 (def_stmt);
2450 code = gimple_assign_rhs_code (def_stmt);
2451 switch (code)
2453 case SSA_NAME:
2454 ifcvt_walk_pattern_tree (rhs1, defuse_list, def_stmt);
2455 break;
2456 CASE_CONVERT:
2457 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2458 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2459 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2460 break;
2461 ifcvt_walk_pattern_tree (rhs1, defuse_list, def_stmt);
2462 break;
2463 case BIT_NOT_EXPR:
2464 ifcvt_walk_pattern_tree (rhs1, defuse_list, def_stmt);
2465 break;
2466 case BIT_AND_EXPR:
2467 case BIT_IOR_EXPR:
2468 case BIT_XOR_EXPR:
2469 ifcvt_walk_pattern_tree (rhs1, defuse_list, def_stmt);
2470 rhs2 = gimple_assign_rhs2 (def_stmt);
2471 ifcvt_walk_pattern_tree (rhs2, defuse_list, def_stmt);
2472 break;
2473 default:
2474 break;
2476 return;
2479 /* Returns true if STMT can be a root of bool pattern apllied
2480 by vectorizer. */
2482 static bool
2483 stmt_is_root_of_bool_pattern (gimple stmt)
2485 enum tree_code code;
2486 tree lhs, rhs;
2488 code = gimple_assign_rhs_code (stmt);
2489 if (CONVERT_EXPR_CODE_P (code))
2491 lhs = gimple_assign_lhs (stmt);
2492 rhs = gimple_assign_rhs1 (stmt);
2493 if (TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2494 return false;
2495 if (TREE_CODE (TREE_TYPE (lhs)) == BOOLEAN_TYPE)
2496 return false;
2497 return true;
2499 else if (code == COND_EXPR)
2501 rhs = gimple_assign_rhs1 (stmt);
2502 if (TREE_CODE (rhs) != SSA_NAME)
2503 return false;
2504 return true;
2506 return false;
2509 /* Traverse all statements in BB which correspondent to loop header to
2510 find out all statements which can start bool pattern applied by
2511 vectorizer and convert multiple uses in it to conform pattern
2512 restrictions. Such case can occur if the same predicate is used both
2513 for phi node conversion and load/store mask. */
2515 static void
2516 ifcvt_repair_bool_pattern (basic_block bb)
2518 tree rhs;
2519 gimple stmt;
2520 gimple_stmt_iterator gsi;
2521 vec<gimple> defuse_list = vNULL;
2522 vec<gimple> pattern_roots = vNULL;
2523 bool repeat = true;
2524 int niter = 0;
2525 unsigned int ix;
2527 /* Collect all root pattern statements. */
2528 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2530 stmt = gsi_stmt (gsi);
2531 if (gimple_code (stmt) != GIMPLE_ASSIGN)
2532 continue;
2533 if (!stmt_is_root_of_bool_pattern (stmt))
2534 continue;
2535 pattern_roots.safe_push (stmt);
2538 if (pattern_roots.is_empty ())
2539 return;
2541 /* Split all statements with multiple uses iteratively since splitting
2542 may create new multiple uses. */
2543 while (repeat)
2545 repeat = false;
2546 niter++;
2547 FOR_EACH_VEC_ELT (pattern_roots, ix, stmt)
2549 rhs = gimple_assign_rhs1 (stmt);
2550 ifcvt_walk_pattern_tree (rhs, &defuse_list, stmt);
2551 while (defuse_list.length () > 0)
2553 repeat = true;
2554 gimple def_stmt, use_stmt;
2555 use_stmt = defuse_list.pop ();
2556 def_stmt = defuse_list.pop ();
2557 ifcvt_split_def_stmt (def_stmt, use_stmt);
2562 if (dump_file && (dump_flags & TDF_DETAILS))
2563 fprintf (dump_file, "Repair bool pattern takes %d iterations. \n",
2564 niter);
2567 /* Delete redundant statements produced by predication which prevents
2568 loop vectorization. */
2570 static void
2571 ifcvt_local_dce (basic_block bb)
2573 gimple stmt;
2574 gimple stmt1;
2575 gimple phi;
2576 gimple_stmt_iterator gsi;
2577 vec<gimple> worklist;
2578 enum gimple_code code;
2579 use_operand_p use_p;
2580 imm_use_iterator imm_iter;
2582 worklist.create (64);
2583 /* Consider all phi as live statements. */
2584 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2586 phi = gsi_stmt (gsi);
2587 gimple_set_plf (phi, GF_PLF_2, true);
2588 worklist.safe_push (phi);
2590 /* Consider load/store statemnts, CALL and COND as live. */
2591 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2593 stmt = gsi_stmt (gsi);
2594 if (gimple_store_p (stmt)
2595 || gimple_assign_load_p (stmt)
2596 || is_gimple_debug (stmt))
2598 gimple_set_plf (stmt, GF_PLF_2, true);
2599 worklist.safe_push (stmt);
2600 continue;
2602 code = gimple_code (stmt);
2603 if (code == GIMPLE_COND || code == GIMPLE_CALL)
2605 gimple_set_plf (stmt, GF_PLF_2, true);
2606 worklist.safe_push (stmt);
2607 continue;
2609 gimple_set_plf (stmt, GF_PLF_2, false);
2611 if (code == GIMPLE_ASSIGN)
2613 tree lhs = gimple_assign_lhs (stmt);
2614 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2616 stmt1 = USE_STMT (use_p);
2617 if (gimple_bb (stmt1) != bb)
2619 gimple_set_plf (stmt, GF_PLF_2, true);
2620 worklist.safe_push (stmt);
2621 break;
2626 /* Propagate liveness through arguments of live stmt. */
2627 while (worklist.length () > 0)
2629 ssa_op_iter iter;
2630 use_operand_p use_p;
2631 tree use;
2633 stmt = worklist.pop ();
2634 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2636 use = USE_FROM_PTR (use_p);
2637 if (TREE_CODE (use) != SSA_NAME)
2638 continue;
2639 stmt1 = SSA_NAME_DEF_STMT (use);
2640 if (gimple_bb (stmt1) != bb
2641 || gimple_plf (stmt1, GF_PLF_2))
2642 continue;
2643 gimple_set_plf (stmt1, GF_PLF_2, true);
2644 worklist.safe_push (stmt1);
2647 /* Delete dead statements. */
2648 gsi = gsi_start_bb (bb);
2649 while (!gsi_end_p (gsi))
2651 stmt = gsi_stmt (gsi);
2652 if (gimple_plf (stmt, GF_PLF_2))
2654 gsi_next (&gsi);
2655 continue;
2657 if (dump_file && (dump_flags & TDF_DETAILS))
2659 fprintf (dump_file, "Delete dead stmt in bb#%d\n", bb->index);
2660 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2662 gsi_remove (&gsi, true);
2663 release_defs (stmt);
2667 /* If-convert LOOP when it is legal. For the moment this pass has no
2668 profitability analysis. Returns non-zero todo flags when something
2669 changed. */
2671 static unsigned int
2672 tree_if_conversion (struct loop *loop)
2674 unsigned int todo = 0;
2675 ifc_bbs = NULL;
2676 bool any_mask_load_store = false;
2678 /* Set-up aggressive if-conversion for loops marked with simd pragma. */
2679 aggressive_if_conv = loop->force_vectorize;
2680 /* Check either outer loop was marked with simd pragma. */
2681 if (!aggressive_if_conv)
2683 struct loop *outer_loop = loop_outer (loop);
2684 if (outer_loop && outer_loop->force_vectorize)
2685 aggressive_if_conv = true;
2688 if (aggressive_if_conv)
2689 if (!ifcvt_split_critical_edges (loop))
2690 goto cleanup;
2692 if (!if_convertible_loop_p (loop, &any_mask_load_store)
2693 || !dbg_cnt (if_conversion_tree))
2694 goto cleanup;
2696 if (any_mask_load_store
2697 && ((!flag_tree_loop_vectorize && !loop->force_vectorize)
2698 || loop->dont_vectorize))
2699 goto cleanup;
2701 if (any_mask_load_store && !version_loop_for_if_conversion (loop))
2702 goto cleanup;
2704 /* Now all statements are if-convertible. Combine all the basic
2705 blocks into one huge basic block doing the if-conversion
2706 on-the-fly. */
2707 combine_blocks (loop, any_mask_load_store);
2709 /* Delete dead predicate computations and repair tree correspondent
2710 to bool pattern to delete multiple uses of preidcates. */
2711 if (aggressive_if_conv)
2713 ifcvt_local_dce (loop->header);
2714 ifcvt_repair_bool_pattern (loop->header);
2717 todo |= TODO_cleanup_cfg;
2718 if (flag_tree_loop_if_convert_stores || any_mask_load_store)
2720 mark_virtual_operands_for_renaming (cfun);
2721 todo |= TODO_update_ssa_only_virtuals;
2724 cleanup:
2725 if (ifc_bbs)
2727 unsigned int i;
2729 for (i = 0; i < loop->num_nodes; i++)
2730 free_bb_predicate (ifc_bbs[i]);
2732 free (ifc_bbs);
2733 ifc_bbs = NULL;
2735 free_dominance_info (CDI_POST_DOMINATORS);
2737 return todo;
2740 /* Tree if-conversion pass management. */
2742 namespace {
2744 const pass_data pass_data_if_conversion =
2746 GIMPLE_PASS, /* type */
2747 "ifcvt", /* name */
2748 OPTGROUP_NONE, /* optinfo_flags */
2749 TV_NONE, /* tv_id */
2750 ( PROP_cfg | PROP_ssa ), /* properties_required */
2751 0, /* properties_provided */
2752 0, /* properties_destroyed */
2753 0, /* todo_flags_start */
2754 0, /* todo_flags_finish */
2757 class pass_if_conversion : public gimple_opt_pass
2759 public:
2760 pass_if_conversion (gcc::context *ctxt)
2761 : gimple_opt_pass (pass_data_if_conversion, ctxt)
2764 /* opt_pass methods: */
2765 virtual bool gate (function *);
2766 virtual unsigned int execute (function *);
2768 }; // class pass_if_conversion
2770 bool
2771 pass_if_conversion::gate (function *fun)
2773 return (((flag_tree_loop_vectorize || fun->has_force_vectorize_loops)
2774 && flag_tree_loop_if_convert != 0)
2775 || flag_tree_loop_if_convert == 1
2776 || flag_tree_loop_if_convert_stores == 1);
2779 unsigned int
2780 pass_if_conversion::execute (function *fun)
2782 struct loop *loop;
2783 unsigned todo = 0;
2785 if (number_of_loops (fun) <= 1)
2786 return 0;
2788 FOR_EACH_LOOP (loop, 0)
2789 if (flag_tree_loop_if_convert == 1
2790 || flag_tree_loop_if_convert_stores == 1
2791 || ((flag_tree_loop_vectorize || loop->force_vectorize)
2792 && !loop->dont_vectorize))
2793 todo |= tree_if_conversion (loop);
2795 #ifdef ENABLE_CHECKING
2797 basic_block bb;
2798 FOR_EACH_BB_FN (bb, fun)
2799 gcc_assert (!bb->aux);
2801 #endif
2803 return todo;
2806 } // anon namespace
2808 gimple_opt_pass *
2809 make_pass_if_conversion (gcc::context *ctxt)
2811 return new pass_if_conversion (ctxt);