gcc/
[official-gcc.git] / gcc / tree-if-conv.c
blob59853c8addeadd14d9337cff99e631ebcaa9092e
1 /* If-conversion for vectorizer.
2 Copyright (C) 2004-2015 Free Software Foundation, Inc.
3 Contributed by Devang Patel <dpatel@apple.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass implements a tree level if-conversion of loops. Its
22 initial goal is to help the vectorizer to vectorize loops with
23 conditions.
25 A short description of if-conversion:
27 o Decide if a loop is if-convertible or not.
28 o Walk all loop basic blocks in breadth first order (BFS order).
29 o Remove conditional statements (at the end of basic block)
30 and propagate condition into destination basic blocks'
31 predicate list.
32 o Replace modify expression with conditional modify expression
33 using current basic block's condition.
34 o Merge all basic blocks
35 o Replace phi nodes with conditional modify expr
36 o Merge all basic blocks into header
38 Sample transformation:
40 INPUT
41 -----
43 # i_23 = PHI <0(0), i_18(10)>;
44 <L0>:;
45 j_15 = A[i_23];
46 if (j_15 > 41) goto <L1>; else goto <L17>;
48 <L17>:;
49 goto <bb 3> (<L3>);
51 <L1>:;
53 # iftmp.2_4 = PHI <0(8), 42(2)>;
54 <L3>:;
55 A[i_23] = iftmp.2_4;
56 i_18 = i_23 + 1;
57 if (i_18 <= 15) goto <L19>; else goto <L18>;
59 <L19>:;
60 goto <bb 1> (<L0>);
62 <L18>:;
64 OUTPUT
65 ------
67 # i_23 = PHI <0(0), i_18(10)>;
68 <L0>:;
69 j_15 = A[i_23];
71 <L3>:;
72 iftmp.2_4 = j_15 > 41 ? 42 : 0;
73 A[i_23] = iftmp.2_4;
74 i_18 = i_23 + 1;
75 if (i_18 <= 15) goto <L19>; else goto <L18>;
77 <L19>:;
78 goto <bb 1> (<L0>);
80 <L18>:;
83 #include "config.h"
84 #include "system.h"
85 #include "coretypes.h"
86 #include "tm.h"
87 #include "alias.h"
88 #include "symtab.h"
89 #include "tree.h"
90 #include "fold-const.h"
91 #include "stor-layout.h"
92 #include "flags.h"
93 #include "predict.h"
94 #include "hard-reg-set.h"
95 #include "function.h"
96 #include "dominance.h"
97 #include "cfg.h"
98 #include "basic-block.h"
99 #include "gimple-pretty-print.h"
100 #include "tree-ssa-alias.h"
101 #include "internal-fn.h"
102 #include "gimple-fold.h"
103 #include "gimple-expr.h"
104 #include "gimple.h"
105 #include "gimplify.h"
106 #include "gimple-iterator.h"
107 #include "gimplify-me.h"
108 #include "gimple-ssa.h"
109 #include "tree-cfg.h"
110 #include "tree-phinodes.h"
111 #include "ssa-iterators.h"
112 #include "stringpool.h"
113 #include "tree-ssanames.h"
114 #include "tree-into-ssa.h"
115 #include "tree-ssa.h"
116 #include "cfgloop.h"
117 #include "tree-chrec.h"
118 #include "tree-data-ref.h"
119 #include "tree-scalar-evolution.h"
120 #include "tree-ssa-loop-ivopts.h"
121 #include "tree-ssa-address.h"
122 #include "tree-pass.h"
123 #include "dbgcnt.h"
124 #include "rtl.h"
125 #include "insn-config.h"
126 #include "expmed.h"
127 #include "dojump.h"
128 #include "explow.h"
129 #include "calls.h"
130 #include "emit-rtl.h"
131 #include "varasm.h"
132 #include "stmt.h"
133 #include "expr.h"
134 #include "insn-codes.h"
135 #include "optabs.h"
136 #include "tree-hash-traits.h"
138 /* List of basic blocks in if-conversion-suitable order. */
139 static basic_block *ifc_bbs;
141 /* Apply more aggressive (extended) if-conversion if true. */
142 static bool aggressive_if_conv;
144 /* Structure used to predicate basic blocks. This is attached to the
145 ->aux field of the BBs in the loop to be if-converted. */
146 typedef struct bb_predicate_s {
148 /* The condition under which this basic block is executed. */
149 tree predicate;
151 /* PREDICATE is gimplified, and the sequence of statements is
152 recorded here, in order to avoid the duplication of computations
153 that occur in previous conditions. See PR44483. */
154 gimple_seq predicate_gimplified_stmts;
155 } *bb_predicate_p;
157 /* Returns true when the basic block BB has a predicate. */
159 static inline bool
160 bb_has_predicate (basic_block bb)
162 return bb->aux != NULL;
165 /* Returns the gimplified predicate for basic block BB. */
167 static inline tree
168 bb_predicate (basic_block bb)
170 return ((bb_predicate_p) bb->aux)->predicate;
173 /* Sets the gimplified predicate COND for basic block BB. */
175 static inline void
176 set_bb_predicate (basic_block bb, tree cond)
178 gcc_assert ((TREE_CODE (cond) == TRUTH_NOT_EXPR
179 && is_gimple_condexpr (TREE_OPERAND (cond, 0)))
180 || is_gimple_condexpr (cond));
181 ((bb_predicate_p) bb->aux)->predicate = cond;
184 /* Returns the sequence of statements of the gimplification of the
185 predicate for basic block BB. */
187 static inline gimple_seq
188 bb_predicate_gimplified_stmts (basic_block bb)
190 return ((bb_predicate_p) bb->aux)->predicate_gimplified_stmts;
193 /* Sets the sequence of statements STMTS of the gimplification of the
194 predicate for basic block BB. */
196 static inline void
197 set_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
199 ((bb_predicate_p) bb->aux)->predicate_gimplified_stmts = stmts;
202 /* Adds the sequence of statements STMTS to the sequence of statements
203 of the predicate for basic block BB. */
205 static inline void
206 add_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
208 gimple_seq_add_seq
209 (&(((bb_predicate_p) bb->aux)->predicate_gimplified_stmts), stmts);
212 /* Initializes to TRUE the predicate of basic block BB. */
214 static inline void
215 init_bb_predicate (basic_block bb)
217 bb->aux = XNEW (struct bb_predicate_s);
218 set_bb_predicate_gimplified_stmts (bb, NULL);
219 set_bb_predicate (bb, boolean_true_node);
222 /* Release the SSA_NAMEs associated with the predicate of basic block BB,
223 but don't actually free it. */
225 static inline void
226 release_bb_predicate (basic_block bb)
228 gimple_seq stmts = bb_predicate_gimplified_stmts (bb);
229 if (stmts)
231 gimple_stmt_iterator i;
233 for (i = gsi_start (stmts); !gsi_end_p (i); gsi_next (&i))
234 free_stmt_operands (cfun, gsi_stmt (i));
235 set_bb_predicate_gimplified_stmts (bb, NULL);
239 /* Free the predicate of basic block BB. */
241 static inline void
242 free_bb_predicate (basic_block bb)
244 if (!bb_has_predicate (bb))
245 return;
247 release_bb_predicate (bb);
248 free (bb->aux);
249 bb->aux = NULL;
252 /* Reinitialize predicate of BB with the true predicate. */
254 static inline void
255 reset_bb_predicate (basic_block bb)
257 if (!bb_has_predicate (bb))
258 init_bb_predicate (bb);
259 else
261 release_bb_predicate (bb);
262 set_bb_predicate (bb, boolean_true_node);
266 /* Returns a new SSA_NAME of type TYPE that is assigned the value of
267 the expression EXPR. Inserts the statement created for this
268 computation before GSI and leaves the iterator GSI at the same
269 statement. */
271 static tree
272 ifc_temp_var (tree type, tree expr, gimple_stmt_iterator *gsi)
274 tree new_name = make_temp_ssa_name (type, NULL, "_ifc_");
275 gimple stmt = gimple_build_assign (new_name, expr);
276 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
277 return new_name;
280 /* Return true when COND is a true predicate. */
282 static inline bool
283 is_true_predicate (tree cond)
285 return (cond == NULL_TREE
286 || cond == boolean_true_node
287 || integer_onep (cond));
290 /* Returns true when BB has a predicate that is not trivial: true or
291 NULL_TREE. */
293 static inline bool
294 is_predicated (basic_block bb)
296 return !is_true_predicate (bb_predicate (bb));
299 /* Parses the predicate COND and returns its comparison code and
300 operands OP0 and OP1. */
302 static enum tree_code
303 parse_predicate (tree cond, tree *op0, tree *op1)
305 gimple s;
307 if (TREE_CODE (cond) == SSA_NAME
308 && is_gimple_assign (s = SSA_NAME_DEF_STMT (cond)))
310 if (TREE_CODE_CLASS (gimple_assign_rhs_code (s)) == tcc_comparison)
312 *op0 = gimple_assign_rhs1 (s);
313 *op1 = gimple_assign_rhs2 (s);
314 return gimple_assign_rhs_code (s);
317 else if (gimple_assign_rhs_code (s) == TRUTH_NOT_EXPR)
319 tree op = gimple_assign_rhs1 (s);
320 tree type = TREE_TYPE (op);
321 enum tree_code code = parse_predicate (op, op0, op1);
323 return code == ERROR_MARK ? ERROR_MARK
324 : invert_tree_comparison (code, HONOR_NANS (type));
327 return ERROR_MARK;
330 if (COMPARISON_CLASS_P (cond))
332 *op0 = TREE_OPERAND (cond, 0);
333 *op1 = TREE_OPERAND (cond, 1);
334 return TREE_CODE (cond);
337 return ERROR_MARK;
340 /* Returns the fold of predicate C1 OR C2 at location LOC. */
342 static tree
343 fold_or_predicates (location_t loc, tree c1, tree c2)
345 tree op1a, op1b, op2a, op2b;
346 enum tree_code code1 = parse_predicate (c1, &op1a, &op1b);
347 enum tree_code code2 = parse_predicate (c2, &op2a, &op2b);
349 if (code1 != ERROR_MARK && code2 != ERROR_MARK)
351 tree t = maybe_fold_or_comparisons (code1, op1a, op1b,
352 code2, op2a, op2b);
353 if (t)
354 return t;
357 return fold_build2_loc (loc, TRUTH_OR_EXPR, boolean_type_node, c1, c2);
360 /* Returns true if N is either a constant or a SSA_NAME. */
362 static bool
363 constant_or_ssa_name (tree n)
365 switch (TREE_CODE (n))
367 case SSA_NAME:
368 case INTEGER_CST:
369 case REAL_CST:
370 case COMPLEX_CST:
371 case VECTOR_CST:
372 return true;
373 default:
374 return false;
378 /* Returns either a COND_EXPR or the folded expression if the folded
379 expression is a MIN_EXPR, a MAX_EXPR, an ABS_EXPR,
380 a constant or a SSA_NAME. */
382 static tree
383 fold_build_cond_expr (tree type, tree cond, tree rhs, tree lhs)
385 tree rhs1, lhs1, cond_expr;
387 /* If COND is comparison r != 0 and r has boolean type, convert COND
388 to SSA_NAME to accept by vect bool pattern. */
389 if (TREE_CODE (cond) == NE_EXPR)
391 tree op0 = TREE_OPERAND (cond, 0);
392 tree op1 = TREE_OPERAND (cond, 1);
393 if (TREE_CODE (op0) == SSA_NAME
394 && TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE
395 && (integer_zerop (op1)))
396 cond = op0;
398 cond_expr = fold_ternary (COND_EXPR, type, cond,
399 rhs, lhs);
401 if (cond_expr == NULL_TREE)
402 return build3 (COND_EXPR, type, cond, rhs, lhs);
404 STRIP_USELESS_TYPE_CONVERSION (cond_expr);
406 if (constant_or_ssa_name (cond_expr))
407 return cond_expr;
409 if (TREE_CODE (cond_expr) == ABS_EXPR)
411 rhs1 = TREE_OPERAND (cond_expr, 1);
412 STRIP_USELESS_TYPE_CONVERSION (rhs1);
413 if (constant_or_ssa_name (rhs1))
414 return build1 (ABS_EXPR, type, rhs1);
417 if (TREE_CODE (cond_expr) == MIN_EXPR
418 || TREE_CODE (cond_expr) == MAX_EXPR)
420 lhs1 = TREE_OPERAND (cond_expr, 0);
421 STRIP_USELESS_TYPE_CONVERSION (lhs1);
422 rhs1 = TREE_OPERAND (cond_expr, 1);
423 STRIP_USELESS_TYPE_CONVERSION (rhs1);
424 if (constant_or_ssa_name (rhs1)
425 && constant_or_ssa_name (lhs1))
426 return build2 (TREE_CODE (cond_expr), type, lhs1, rhs1);
428 return build3 (COND_EXPR, type, cond, rhs, lhs);
431 /* Add condition NC to the predicate list of basic block BB. LOOP is
432 the loop to be if-converted. Use predicate of cd-equivalent block
433 for join bb if it exists: we call basic blocks bb1 and bb2
434 cd-equivalent if they are executed under the same condition. */
436 static inline void
437 add_to_predicate_list (struct loop *loop, basic_block bb, tree nc)
439 tree bc, *tp;
440 basic_block dom_bb;
442 if (is_true_predicate (nc))
443 return;
445 /* If dominance tells us this basic block is always executed,
446 don't record any predicates for it. */
447 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
448 return;
450 dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb);
451 /* We use notion of cd equivalence to get simpler predicate for
452 join block, e.g. if join block has 2 predecessors with predicates
453 p1 & p2 and p1 & !p2, we'd like to get p1 for it instead of
454 p1 & p2 | p1 & !p2. */
455 if (dom_bb != loop->header
456 && get_immediate_dominator (CDI_POST_DOMINATORS, dom_bb) == bb)
458 gcc_assert (flow_bb_inside_loop_p (loop, dom_bb));
459 bc = bb_predicate (dom_bb);
460 if (!is_true_predicate (bc))
461 set_bb_predicate (bb, bc);
462 else
463 gcc_assert (is_true_predicate (bb_predicate (bb)));
464 if (dump_file && (dump_flags & TDF_DETAILS))
465 fprintf (dump_file, "Use predicate of bb#%d for bb#%d\n",
466 dom_bb->index, bb->index);
467 return;
470 if (!is_predicated (bb))
471 bc = nc;
472 else
474 bc = bb_predicate (bb);
475 bc = fold_or_predicates (EXPR_LOCATION (bc), nc, bc);
476 if (is_true_predicate (bc))
478 reset_bb_predicate (bb);
479 return;
483 /* Allow a TRUTH_NOT_EXPR around the main predicate. */
484 if (TREE_CODE (bc) == TRUTH_NOT_EXPR)
485 tp = &TREE_OPERAND (bc, 0);
486 else
487 tp = &bc;
488 if (!is_gimple_condexpr (*tp))
490 gimple_seq stmts;
491 *tp = force_gimple_operand_1 (*tp, &stmts, is_gimple_condexpr, NULL_TREE);
492 add_bb_predicate_gimplified_stmts (bb, stmts);
494 set_bb_predicate (bb, bc);
497 /* Add the condition COND to the previous condition PREV_COND, and add
498 this to the predicate list of the destination of edge E. LOOP is
499 the loop to be if-converted. */
501 static void
502 add_to_dst_predicate_list (struct loop *loop, edge e,
503 tree prev_cond, tree cond)
505 if (!flow_bb_inside_loop_p (loop, e->dest))
506 return;
508 if (!is_true_predicate (prev_cond))
509 cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
510 prev_cond, cond);
512 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, e->dest))
513 add_to_predicate_list (loop, e->dest, cond);
516 /* Return true if one of the successor edges of BB exits LOOP. */
518 static bool
519 bb_with_exit_edge_p (struct loop *loop, basic_block bb)
521 edge e;
522 edge_iterator ei;
524 FOR_EACH_EDGE (e, ei, bb->succs)
525 if (loop_exit_edge_p (loop, e))
526 return true;
528 return false;
531 /* Return true when PHI is if-convertible. PHI is part of loop LOOP
532 and it belongs to basic block BB.
534 PHI is not if-convertible if:
535 - it has more than 2 arguments.
537 When the flag_tree_loop_if_convert_stores is not set, PHI is not
538 if-convertible if:
539 - a virtual PHI is immediately used in another PHI node,
540 - there is a virtual PHI in a BB other than the loop->header.
541 When the aggressive_if_conv is set, PHI can have more than
542 two arguments. */
544 static bool
545 if_convertible_phi_p (struct loop *loop, basic_block bb, gphi *phi,
546 bool any_mask_load_store)
548 if (dump_file && (dump_flags & TDF_DETAILS))
550 fprintf (dump_file, "-------------------------\n");
551 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
554 if (bb != loop->header)
556 if (gimple_phi_num_args (phi) != 2
557 && !aggressive_if_conv)
559 if (dump_file && (dump_flags & TDF_DETAILS))
560 fprintf (dump_file, "More than two phi node args.\n");
561 return false;
565 if (flag_tree_loop_if_convert_stores || any_mask_load_store)
566 return true;
568 /* When the flag_tree_loop_if_convert_stores is not set, check
569 that there are no memory writes in the branches of the loop to be
570 if-converted. */
571 if (virtual_operand_p (gimple_phi_result (phi)))
573 imm_use_iterator imm_iter;
574 use_operand_p use_p;
576 if (bb != loop->header)
578 if (dump_file && (dump_flags & TDF_DETAILS))
579 fprintf (dump_file, "Virtual phi not on loop->header.\n");
580 return false;
583 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_phi_result (phi))
585 if (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI
586 && USE_STMT (use_p) != (gimple) phi)
588 if (dump_file && (dump_flags & TDF_DETAILS))
589 fprintf (dump_file, "Difficult to handle this virtual phi.\n");
590 return false;
595 return true;
598 /* Records the status of a data reference. This struct is attached to
599 each DR->aux field. */
601 struct ifc_dr {
602 /* -1 when not initialized, 0 when false, 1 when true. */
603 int written_at_least_once;
605 /* -1 when not initialized, 0 when false, 1 when true. */
606 int rw_unconditionally;
609 #define IFC_DR(DR) ((struct ifc_dr *) (DR)->aux)
610 #define DR_WRITTEN_AT_LEAST_ONCE(DR) (IFC_DR (DR)->written_at_least_once)
611 #define DR_RW_UNCONDITIONALLY(DR) (IFC_DR (DR)->rw_unconditionally)
613 /* Returns true when the memory references of STMT are read or written
614 unconditionally. In other words, this function returns true when
615 for every data reference A in STMT there exist other accesses to
616 a data reference with the same base with predicates that add up (OR-up) to
617 the true predicate: this ensures that the data reference A is touched
618 (read or written) on every iteration of the if-converted loop. */
620 static bool
621 memrefs_read_or_written_unconditionally (gimple stmt,
622 vec<data_reference_p> drs)
624 int i, j;
625 data_reference_p a, b;
626 tree ca = bb_predicate (gimple_bb (stmt));
628 for (i = 0; drs.iterate (i, &a); i++)
629 if (DR_STMT (a) == stmt)
631 bool found = false;
632 int x = DR_RW_UNCONDITIONALLY (a);
634 if (x == 0)
635 return false;
637 if (x == 1)
638 continue;
640 for (j = 0; drs.iterate (j, &b); j++)
642 tree ref_base_a = DR_REF (a);
643 tree ref_base_b = DR_REF (b);
645 if (DR_STMT (b) == stmt)
646 continue;
648 while (TREE_CODE (ref_base_a) == COMPONENT_REF
649 || TREE_CODE (ref_base_a) == IMAGPART_EXPR
650 || TREE_CODE (ref_base_a) == REALPART_EXPR)
651 ref_base_a = TREE_OPERAND (ref_base_a, 0);
653 while (TREE_CODE (ref_base_b) == COMPONENT_REF
654 || TREE_CODE (ref_base_b) == IMAGPART_EXPR
655 || TREE_CODE (ref_base_b) == REALPART_EXPR)
656 ref_base_b = TREE_OPERAND (ref_base_b, 0);
658 if (!operand_equal_p (ref_base_a, ref_base_b, 0))
660 tree cb = bb_predicate (gimple_bb (DR_STMT (b)));
662 if (DR_RW_UNCONDITIONALLY (b) == 1
663 || is_true_predicate (cb)
664 || is_true_predicate (ca
665 = fold_or_predicates (EXPR_LOCATION (cb), ca, cb)))
667 DR_RW_UNCONDITIONALLY (a) = 1;
668 DR_RW_UNCONDITIONALLY (b) = 1;
669 found = true;
670 break;
675 if (!found)
677 DR_RW_UNCONDITIONALLY (a) = 0;
678 return false;
682 return true;
685 /* Returns true when the memory references of STMT are unconditionally
686 written. In other words, this function returns true when for every
687 data reference A written in STMT, there exist other writes to the
688 same data reference with predicates that add up (OR-up) to the true
689 predicate: this ensures that the data reference A is written on
690 every iteration of the if-converted loop. */
692 static bool
693 write_memrefs_written_at_least_once (gimple stmt,
694 vec<data_reference_p> drs)
696 int i, j;
697 data_reference_p a, b;
698 tree ca = bb_predicate (gimple_bb (stmt));
700 for (i = 0; drs.iterate (i, &a); i++)
701 if (DR_STMT (a) == stmt
702 && DR_IS_WRITE (a))
704 bool found = false;
705 int x = DR_WRITTEN_AT_LEAST_ONCE (a);
707 if (x == 0)
708 return false;
710 if (x == 1)
711 continue;
713 for (j = 0; drs.iterate (j, &b); j++)
714 if (DR_STMT (b) != stmt
715 && DR_IS_WRITE (b)
716 && same_data_refs_base_objects (a, b))
718 tree cb = bb_predicate (gimple_bb (DR_STMT (b)));
720 if (DR_WRITTEN_AT_LEAST_ONCE (b) == 1
721 || is_true_predicate (cb)
722 || is_true_predicate (ca = fold_or_predicates (EXPR_LOCATION (cb),
723 ca, cb)))
725 DR_WRITTEN_AT_LEAST_ONCE (a) = 1;
726 DR_WRITTEN_AT_LEAST_ONCE (b) = 1;
727 found = true;
728 break;
732 if (!found)
734 DR_WRITTEN_AT_LEAST_ONCE (a) = 0;
735 return false;
739 return true;
742 /* Return true when the memory references of STMT won't trap in the
743 if-converted code. There are two things that we have to check for:
745 - writes to memory occur to writable memory: if-conversion of
746 memory writes transforms the conditional memory writes into
747 unconditional writes, i.e. "if (cond) A[i] = foo" is transformed
748 into "A[i] = cond ? foo : A[i]", and as the write to memory may not
749 be executed at all in the original code, it may be a readonly
750 memory. To check that A is not const-qualified, we check that
751 there exists at least an unconditional write to A in the current
752 function.
754 - reads or writes to memory are valid memory accesses for every
755 iteration. To check that the memory accesses are correctly formed
756 and that we are allowed to read and write in these locations, we
757 check that the memory accesses to be if-converted occur at every
758 iteration unconditionally. */
760 static bool
761 ifcvt_memrefs_wont_trap (gimple stmt, vec<data_reference_p> refs)
763 return write_memrefs_written_at_least_once (stmt, refs)
764 && memrefs_read_or_written_unconditionally (stmt, refs);
767 /* Wrapper around gimple_could_trap_p refined for the needs of the
768 if-conversion. Try to prove that the memory accesses of STMT could
769 not trap in the innermost loop containing STMT. */
771 static bool
772 ifcvt_could_trap_p (gimple stmt, vec<data_reference_p> refs)
774 if (gimple_vuse (stmt)
775 && !gimple_could_trap_p_1 (stmt, false, false)
776 && ifcvt_memrefs_wont_trap (stmt, refs))
777 return false;
779 return gimple_could_trap_p (stmt);
782 /* Return true if STMT could be converted into a masked load or store
783 (conditional load or store based on a mask computed from bb predicate). */
785 static bool
786 ifcvt_can_use_mask_load_store (gimple stmt)
788 tree lhs, ref;
789 machine_mode mode;
790 basic_block bb = gimple_bb (stmt);
791 bool is_load;
793 if (!(flag_tree_loop_vectorize || bb->loop_father->force_vectorize)
794 || bb->loop_father->dont_vectorize
795 || !gimple_assign_single_p (stmt)
796 || gimple_has_volatile_ops (stmt))
797 return false;
799 /* Check whether this is a load or store. */
800 lhs = gimple_assign_lhs (stmt);
801 if (gimple_store_p (stmt))
803 if (!is_gimple_val (gimple_assign_rhs1 (stmt)))
804 return false;
805 is_load = false;
806 ref = lhs;
808 else if (gimple_assign_load_p (stmt))
810 is_load = true;
811 ref = gimple_assign_rhs1 (stmt);
813 else
814 return false;
816 if (may_be_nonaddressable_p (ref))
817 return false;
819 /* Mask should be integer mode of the same size as the load/store
820 mode. */
821 mode = TYPE_MODE (TREE_TYPE (lhs));
822 if (int_mode_for_mode (mode) == BLKmode
823 || VECTOR_MODE_P (mode))
824 return false;
826 if (can_vec_mask_load_store_p (mode, is_load))
827 return true;
829 return false;
832 /* Return true when STMT is if-convertible.
834 GIMPLE_ASSIGN statement is not if-convertible if,
835 - it is not movable,
836 - it could trap,
837 - LHS is not var decl. */
839 static bool
840 if_convertible_gimple_assign_stmt_p (gimple stmt,
841 vec<data_reference_p> refs,
842 bool *any_mask_load_store)
844 tree lhs = gimple_assign_lhs (stmt);
845 basic_block bb;
847 if (dump_file && (dump_flags & TDF_DETAILS))
849 fprintf (dump_file, "-------------------------\n");
850 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
853 if (!is_gimple_reg_type (TREE_TYPE (lhs)))
854 return false;
856 /* Some of these constrains might be too conservative. */
857 if (stmt_ends_bb_p (stmt)
858 || gimple_has_volatile_ops (stmt)
859 || (TREE_CODE (lhs) == SSA_NAME
860 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
861 || gimple_has_side_effects (stmt))
863 if (dump_file && (dump_flags & TDF_DETAILS))
864 fprintf (dump_file, "stmt not suitable for ifcvt\n");
865 return false;
868 /* tree-into-ssa.c uses GF_PLF_1, so avoid it, because
869 in between if_convertible_loop_p and combine_blocks
870 we can perform loop versioning. */
871 gimple_set_plf (stmt, GF_PLF_2, false);
873 if (flag_tree_loop_if_convert_stores)
875 if (ifcvt_could_trap_p (stmt, refs))
877 if (ifcvt_can_use_mask_load_store (stmt))
879 gimple_set_plf (stmt, GF_PLF_2, true);
880 *any_mask_load_store = true;
881 return true;
883 if (dump_file && (dump_flags & TDF_DETAILS))
884 fprintf (dump_file, "tree could trap...\n");
885 return false;
887 return true;
890 if (gimple_assign_rhs_could_trap_p (stmt))
892 if (ifcvt_can_use_mask_load_store (stmt))
894 gimple_set_plf (stmt, GF_PLF_2, true);
895 *any_mask_load_store = true;
896 return true;
898 if (dump_file && (dump_flags & TDF_DETAILS))
899 fprintf (dump_file, "tree could trap...\n");
900 return false;
903 bb = gimple_bb (stmt);
905 if (TREE_CODE (lhs) != SSA_NAME
906 && bb != bb->loop_father->header
907 && !bb_with_exit_edge_p (bb->loop_father, bb))
909 if (ifcvt_can_use_mask_load_store (stmt))
911 gimple_set_plf (stmt, GF_PLF_2, true);
912 *any_mask_load_store = true;
913 return true;
915 if (dump_file && (dump_flags & TDF_DETAILS))
917 fprintf (dump_file, "LHS is not var\n");
918 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
920 return false;
923 return true;
926 /* Return true when STMT is if-convertible.
928 A statement is if-convertible if:
929 - it is an if-convertible GIMPLE_ASSIGN,
930 - it is a GIMPLE_LABEL or a GIMPLE_COND,
931 - it is builtins call. */
933 static bool
934 if_convertible_stmt_p (gimple stmt, vec<data_reference_p> refs,
935 bool *any_mask_load_store)
937 switch (gimple_code (stmt))
939 case GIMPLE_LABEL:
940 case GIMPLE_DEBUG:
941 case GIMPLE_COND:
942 return true;
944 case GIMPLE_ASSIGN:
945 return if_convertible_gimple_assign_stmt_p (stmt, refs,
946 any_mask_load_store);
948 case GIMPLE_CALL:
950 tree fndecl = gimple_call_fndecl (stmt);
951 if (fndecl)
953 int flags = gimple_call_flags (stmt);
954 if ((flags & ECF_CONST)
955 && !(flags & ECF_LOOPING_CONST_OR_PURE)
956 /* We can only vectorize some builtins at the moment,
957 so restrict if-conversion to those. */
958 && DECL_BUILT_IN (fndecl))
959 return true;
961 return false;
964 default:
965 /* Don't know what to do with 'em so don't do anything. */
966 if (dump_file && (dump_flags & TDF_DETAILS))
968 fprintf (dump_file, "don't know what to do\n");
969 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
971 return false;
972 break;
975 return true;
978 /* Assumes that BB has more than 1 predecessors.
979 Returns false if at least one successor is not on critical edge
980 and true otherwise. */
982 static inline bool
983 all_preds_critical_p (basic_block bb)
985 edge e;
986 edge_iterator ei;
988 FOR_EACH_EDGE (e, ei, bb->preds)
989 if (EDGE_COUNT (e->src->succs) == 1)
990 return false;
991 return true;
994 /* Returns true if at least one successor in on critical edge. */
995 static inline bool
996 has_pred_critical_p (basic_block bb)
998 edge e;
999 edge_iterator ei;
1001 FOR_EACH_EDGE (e, ei, bb->preds)
1002 if (EDGE_COUNT (e->src->succs) > 1)
1003 return true;
1004 return false;
1007 /* Return true when BB is if-convertible. This routine does not check
1008 basic block's statements and phis.
1010 A basic block is not if-convertible if:
1011 - it is non-empty and it is after the exit block (in BFS order),
1012 - it is after the exit block but before the latch,
1013 - its edges are not normal.
1015 Last restriction is valid if aggressive_if_conv is false.
1017 EXIT_BB is the basic block containing the exit of the LOOP. BB is
1018 inside LOOP. */
1020 static bool
1021 if_convertible_bb_p (struct loop *loop, basic_block bb, basic_block exit_bb)
1023 edge e;
1024 edge_iterator ei;
1026 if (dump_file && (dump_flags & TDF_DETAILS))
1027 fprintf (dump_file, "----------[%d]-------------\n", bb->index);
1029 if (EDGE_COUNT (bb->succs) > 2)
1030 return false;
1032 if (EDGE_COUNT (bb->preds) > 2
1033 && !aggressive_if_conv)
1034 return false;
1036 if (exit_bb)
1038 if (bb != loop->latch)
1040 if (dump_file && (dump_flags & TDF_DETAILS))
1041 fprintf (dump_file, "basic block after exit bb but before latch\n");
1042 return false;
1044 else if (!empty_block_p (bb))
1046 if (dump_file && (dump_flags & TDF_DETAILS))
1047 fprintf (dump_file, "non empty basic block after exit bb\n");
1048 return false;
1050 else if (bb == loop->latch
1051 && bb != exit_bb
1052 && !dominated_by_p (CDI_DOMINATORS, bb, exit_bb))
1054 if (dump_file && (dump_flags & TDF_DETAILS))
1055 fprintf (dump_file, "latch is not dominated by exit_block\n");
1056 return false;
1060 /* Be less adventurous and handle only normal edges. */
1061 FOR_EACH_EDGE (e, ei, bb->succs)
1062 if (e->flags & (EDGE_EH | EDGE_ABNORMAL | EDGE_IRREDUCIBLE_LOOP))
1064 if (dump_file && (dump_flags & TDF_DETAILS))
1065 fprintf (dump_file, "Difficult to handle edges\n");
1066 return false;
1069 /* At least one incoming edge has to be non-critical as otherwise edge
1070 predicates are not equal to basic-block predicates of the edge
1071 source. This check is skipped if aggressive_if_conv is true. */
1072 if (!aggressive_if_conv
1073 && EDGE_COUNT (bb->preds) > 1
1074 && bb != loop->header
1075 && all_preds_critical_p (bb))
1077 if (dump_file && (dump_flags & TDF_DETAILS))
1078 fprintf (dump_file, "only critical predecessors\n");
1079 return false;
1082 return true;
1085 /* Return true when all predecessor blocks of BB are visited. The
1086 VISITED bitmap keeps track of the visited blocks. */
1088 static bool
1089 pred_blocks_visited_p (basic_block bb, bitmap *visited)
1091 edge e;
1092 edge_iterator ei;
1093 FOR_EACH_EDGE (e, ei, bb->preds)
1094 if (!bitmap_bit_p (*visited, e->src->index))
1095 return false;
1097 return true;
1100 /* Get body of a LOOP in suitable order for if-conversion. It is
1101 caller's responsibility to deallocate basic block list.
1102 If-conversion suitable order is, breadth first sort (BFS) order
1103 with an additional constraint: select a block only if all its
1104 predecessors are already selected. */
1106 static basic_block *
1107 get_loop_body_in_if_conv_order (const struct loop *loop)
1109 basic_block *blocks, *blocks_in_bfs_order;
1110 basic_block bb;
1111 bitmap visited;
1112 unsigned int index = 0;
1113 unsigned int visited_count = 0;
1115 gcc_assert (loop->num_nodes);
1116 gcc_assert (loop->latch != EXIT_BLOCK_PTR_FOR_FN (cfun));
1118 blocks = XCNEWVEC (basic_block, loop->num_nodes);
1119 visited = BITMAP_ALLOC (NULL);
1121 blocks_in_bfs_order = get_loop_body_in_bfs_order (loop);
1123 index = 0;
1124 while (index < loop->num_nodes)
1126 bb = blocks_in_bfs_order [index];
1128 if (bb->flags & BB_IRREDUCIBLE_LOOP)
1130 free (blocks_in_bfs_order);
1131 BITMAP_FREE (visited);
1132 free (blocks);
1133 return NULL;
1136 if (!bitmap_bit_p (visited, bb->index))
1138 if (pred_blocks_visited_p (bb, &visited)
1139 || bb == loop->header)
1141 /* This block is now visited. */
1142 bitmap_set_bit (visited, bb->index);
1143 blocks[visited_count++] = bb;
1147 index++;
1149 if (index == loop->num_nodes
1150 && visited_count != loop->num_nodes)
1151 /* Not done yet. */
1152 index = 0;
1154 free (blocks_in_bfs_order);
1155 BITMAP_FREE (visited);
1156 return blocks;
1159 /* Returns true when the analysis of the predicates for all the basic
1160 blocks in LOOP succeeded.
1162 predicate_bbs first allocates the predicates of the basic blocks.
1163 These fields are then initialized with the tree expressions
1164 representing the predicates under which a basic block is executed
1165 in the LOOP. As the loop->header is executed at each iteration, it
1166 has the "true" predicate. Other statements executed under a
1167 condition are predicated with that condition, for example
1169 | if (x)
1170 | S1;
1171 | else
1172 | S2;
1174 S1 will be predicated with "x", and
1175 S2 will be predicated with "!x". */
1177 static void
1178 predicate_bbs (loop_p loop)
1180 unsigned int i;
1182 for (i = 0; i < loop->num_nodes; i++)
1183 init_bb_predicate (ifc_bbs[i]);
1185 for (i = 0; i < loop->num_nodes; i++)
1187 basic_block bb = ifc_bbs[i];
1188 tree cond;
1189 gimple stmt;
1191 /* The loop latch and loop exit block are always executed and
1192 have no extra conditions to be processed: skip them. */
1193 if (bb == loop->latch
1194 || bb_with_exit_edge_p (loop, bb))
1196 reset_bb_predicate (bb);
1197 continue;
1200 cond = bb_predicate (bb);
1201 stmt = last_stmt (bb);
1202 if (stmt && gimple_code (stmt) == GIMPLE_COND)
1204 tree c2;
1205 edge true_edge, false_edge;
1206 location_t loc = gimple_location (stmt);
1207 tree c = build2_loc (loc, gimple_cond_code (stmt),
1208 boolean_type_node,
1209 gimple_cond_lhs (stmt),
1210 gimple_cond_rhs (stmt));
1212 /* Add new condition into destination's predicate list. */
1213 extract_true_false_edges_from_block (gimple_bb (stmt),
1214 &true_edge, &false_edge);
1216 /* If C is true, then TRUE_EDGE is taken. */
1217 add_to_dst_predicate_list (loop, true_edge, unshare_expr (cond),
1218 unshare_expr (c));
1220 /* If C is false, then FALSE_EDGE is taken. */
1221 c2 = build1_loc (loc, TRUTH_NOT_EXPR, boolean_type_node,
1222 unshare_expr (c));
1223 add_to_dst_predicate_list (loop, false_edge,
1224 unshare_expr (cond), c2);
1226 cond = NULL_TREE;
1229 /* If current bb has only one successor, then consider it as an
1230 unconditional goto. */
1231 if (single_succ_p (bb))
1233 basic_block bb_n = single_succ (bb);
1235 /* The successor bb inherits the predicate of its
1236 predecessor. If there is no predicate in the predecessor
1237 bb, then consider the successor bb as always executed. */
1238 if (cond == NULL_TREE)
1239 cond = boolean_true_node;
1241 add_to_predicate_list (loop, bb_n, cond);
1245 /* The loop header is always executed. */
1246 reset_bb_predicate (loop->header);
1247 gcc_assert (bb_predicate_gimplified_stmts (loop->header) == NULL
1248 && bb_predicate_gimplified_stmts (loop->latch) == NULL);
1251 /* Return true when LOOP is if-convertible. This is a helper function
1252 for if_convertible_loop_p. REFS and DDRS are initialized and freed
1253 in if_convertible_loop_p. */
1255 static bool
1256 if_convertible_loop_p_1 (struct loop *loop,
1257 vec<loop_p> *loop_nest,
1258 vec<data_reference_p> *refs,
1259 vec<ddr_p> *ddrs, bool *any_mask_load_store)
1261 bool res;
1262 unsigned int i;
1263 basic_block exit_bb = NULL;
1265 /* Don't if-convert the loop when the data dependences cannot be
1266 computed: the loop won't be vectorized in that case. */
1267 res = compute_data_dependences_for_loop (loop, true, loop_nest, refs, ddrs);
1268 if (!res)
1269 return false;
1271 calculate_dominance_info (CDI_DOMINATORS);
1272 calculate_dominance_info (CDI_POST_DOMINATORS);
1274 /* Allow statements that can be handled during if-conversion. */
1275 ifc_bbs = get_loop_body_in_if_conv_order (loop);
1276 if (!ifc_bbs)
1278 if (dump_file && (dump_flags & TDF_DETAILS))
1279 fprintf (dump_file, "Irreducible loop\n");
1280 return false;
1283 for (i = 0; i < loop->num_nodes; i++)
1285 basic_block bb = ifc_bbs[i];
1287 if (!if_convertible_bb_p (loop, bb, exit_bb))
1288 return false;
1290 if (bb_with_exit_edge_p (loop, bb))
1291 exit_bb = bb;
1294 for (i = 0; i < loop->num_nodes; i++)
1296 basic_block bb = ifc_bbs[i];
1297 gimple_stmt_iterator gsi;
1299 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1300 switch (gimple_code (gsi_stmt (gsi)))
1302 case GIMPLE_LABEL:
1303 case GIMPLE_ASSIGN:
1304 case GIMPLE_CALL:
1305 case GIMPLE_DEBUG:
1306 case GIMPLE_COND:
1307 break;
1308 default:
1309 return false;
1313 if (flag_tree_loop_if_convert_stores)
1315 data_reference_p dr;
1317 for (i = 0; refs->iterate (i, &dr); i++)
1319 dr->aux = XNEW (struct ifc_dr);
1320 DR_WRITTEN_AT_LEAST_ONCE (dr) = -1;
1321 DR_RW_UNCONDITIONALLY (dr) = -1;
1323 predicate_bbs (loop);
1326 for (i = 0; i < loop->num_nodes; i++)
1328 basic_block bb = ifc_bbs[i];
1329 gimple_stmt_iterator itr;
1331 /* Check the if-convertibility of statements in predicated BBs. */
1332 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1333 for (itr = gsi_start_bb (bb); !gsi_end_p (itr); gsi_next (&itr))
1334 if (!if_convertible_stmt_p (gsi_stmt (itr), *refs,
1335 any_mask_load_store))
1336 return false;
1339 if (flag_tree_loop_if_convert_stores)
1340 for (i = 0; i < loop->num_nodes; i++)
1341 free_bb_predicate (ifc_bbs[i]);
1343 /* Checking PHIs needs to be done after stmts, as the fact whether there
1344 are any masked loads or stores affects the tests. */
1345 for (i = 0; i < loop->num_nodes; i++)
1347 basic_block bb = ifc_bbs[i];
1348 gphi_iterator itr;
1350 for (itr = gsi_start_phis (bb); !gsi_end_p (itr); gsi_next (&itr))
1351 if (!if_convertible_phi_p (loop, bb, itr.phi (),
1352 *any_mask_load_store))
1353 return false;
1356 if (dump_file)
1357 fprintf (dump_file, "Applying if-conversion\n");
1359 return true;
1362 /* Return true when LOOP is if-convertible.
1363 LOOP is if-convertible if:
1364 - it is innermost,
1365 - it has two or more basic blocks,
1366 - it has only one exit,
1367 - loop header is not the exit edge,
1368 - if its basic blocks and phi nodes are if convertible. */
1370 static bool
1371 if_convertible_loop_p (struct loop *loop, bool *any_mask_load_store)
1373 edge e;
1374 edge_iterator ei;
1375 bool res = false;
1376 vec<data_reference_p> refs;
1377 vec<ddr_p> ddrs;
1379 /* Handle only innermost loop. */
1380 if (!loop || loop->inner)
1382 if (dump_file && (dump_flags & TDF_DETAILS))
1383 fprintf (dump_file, "not innermost loop\n");
1384 return false;
1387 /* If only one block, no need for if-conversion. */
1388 if (loop->num_nodes <= 2)
1390 if (dump_file && (dump_flags & TDF_DETAILS))
1391 fprintf (dump_file, "less than 2 basic blocks\n");
1392 return false;
1395 /* More than one loop exit is too much to handle. */
1396 if (!single_exit (loop))
1398 if (dump_file && (dump_flags & TDF_DETAILS))
1399 fprintf (dump_file, "multiple exits\n");
1400 return false;
1403 /* If one of the loop header's edge is an exit edge then do not
1404 apply if-conversion. */
1405 FOR_EACH_EDGE (e, ei, loop->header->succs)
1406 if (loop_exit_edge_p (loop, e))
1407 return false;
1409 refs.create (5);
1410 ddrs.create (25);
1411 auto_vec<loop_p, 3> loop_nest;
1412 res = if_convertible_loop_p_1 (loop, &loop_nest, &refs, &ddrs,
1413 any_mask_load_store);
1415 if (flag_tree_loop_if_convert_stores)
1417 data_reference_p dr;
1418 unsigned int i;
1420 for (i = 0; refs.iterate (i, &dr); i++)
1421 free (dr->aux);
1424 free_data_refs (refs);
1425 free_dependence_relations (ddrs);
1426 return res;
1429 /* Returns true if def-stmt for phi argument ARG is simple increment/decrement
1430 which is in predicated basic block.
1431 In fact, the following PHI pattern is searching:
1432 loop-header:
1433 reduc_1 = PHI <..., reduc_2>
1435 if (...)
1436 reduc_3 = ...
1437 reduc_2 = PHI <reduc_1, reduc_3>
1439 ARG_0 and ARG_1 are correspondent PHI arguments.
1440 REDUC, OP0 and OP1 contain reduction stmt and its operands.
1441 EXTENDED is true if PHI has > 2 arguments. */
1443 static bool
1444 is_cond_scalar_reduction (gimple phi, gimple *reduc, tree arg_0, tree arg_1,
1445 tree *op0, tree *op1, bool extended)
1447 tree lhs, r_op1, r_op2;
1448 gimple stmt;
1449 gimple header_phi = NULL;
1450 enum tree_code reduction_op;
1451 basic_block bb = gimple_bb (phi);
1452 struct loop *loop = bb->loop_father;
1453 edge latch_e = loop_latch_edge (loop);
1454 imm_use_iterator imm_iter;
1455 use_operand_p use_p;
1456 edge e;
1457 edge_iterator ei;
1458 bool result = false;
1459 if (TREE_CODE (arg_0) != SSA_NAME || TREE_CODE (arg_1) != SSA_NAME)
1460 return false;
1462 if (!extended && gimple_code (SSA_NAME_DEF_STMT (arg_0)) == GIMPLE_PHI)
1464 lhs = arg_1;
1465 header_phi = SSA_NAME_DEF_STMT (arg_0);
1466 stmt = SSA_NAME_DEF_STMT (arg_1);
1468 else if (gimple_code (SSA_NAME_DEF_STMT (arg_1)) == GIMPLE_PHI)
1470 lhs = arg_0;
1471 header_phi = SSA_NAME_DEF_STMT (arg_1);
1472 stmt = SSA_NAME_DEF_STMT (arg_0);
1474 else
1475 return false;
1476 if (gimple_bb (header_phi) != loop->header)
1477 return false;
1479 if (PHI_ARG_DEF_FROM_EDGE (header_phi, latch_e) != PHI_RESULT (phi))
1480 return false;
1482 if (gimple_code (stmt) != GIMPLE_ASSIGN
1483 || gimple_has_volatile_ops (stmt))
1484 return false;
1486 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
1487 return false;
1489 if (!is_predicated (gimple_bb (stmt)))
1490 return false;
1492 /* Check that stmt-block is predecessor of phi-block. */
1493 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->succs)
1494 if (e->dest == bb)
1496 result = true;
1497 break;
1499 if (!result)
1500 return false;
1502 if (!has_single_use (lhs))
1503 return false;
1505 reduction_op = gimple_assign_rhs_code (stmt);
1506 if (reduction_op != PLUS_EXPR && reduction_op != MINUS_EXPR)
1507 return false;
1508 r_op1 = gimple_assign_rhs1 (stmt);
1509 r_op2 = gimple_assign_rhs2 (stmt);
1511 /* Make R_OP1 to hold reduction variable. */
1512 if (r_op2 == PHI_RESULT (header_phi)
1513 && reduction_op == PLUS_EXPR)
1514 std::swap (r_op1, r_op2);
1515 else if (r_op1 != PHI_RESULT (header_phi))
1516 return false;
1518 /* Check that R_OP1 is used in reduction stmt or in PHI only. */
1519 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_op1)
1521 gimple use_stmt = USE_STMT (use_p);
1522 if (is_gimple_debug (use_stmt))
1523 continue;
1524 if (use_stmt == stmt)
1525 continue;
1526 if (gimple_code (use_stmt) != GIMPLE_PHI)
1527 return false;
1530 *op0 = r_op1; *op1 = r_op2;
1531 *reduc = stmt;
1532 return true;
1535 /* Converts conditional scalar reduction into unconditional form, e.g.
1536 bb_4
1537 if (_5 != 0) goto bb_5 else goto bb_6
1538 end_bb_4
1539 bb_5
1540 res_6 = res_13 + 1;
1541 end_bb_5
1542 bb_6
1543 # res_2 = PHI <res_13(4), res_6(5)>
1544 end_bb_6
1546 will be converted into sequence
1547 _ifc__1 = _5 != 0 ? 1 : 0;
1548 res_2 = res_13 + _ifc__1;
1549 Argument SWAP tells that arguments of conditional expression should be
1550 swapped.
1551 Returns rhs of resulting PHI assignment. */
1553 static tree
1554 convert_scalar_cond_reduction (gimple reduc, gimple_stmt_iterator *gsi,
1555 tree cond, tree op0, tree op1, bool swap)
1557 gimple_stmt_iterator stmt_it;
1558 gimple new_assign;
1559 tree rhs;
1560 tree rhs1 = gimple_assign_rhs1 (reduc);
1561 tree tmp = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, "_ifc_");
1562 tree c;
1563 tree zero = build_zero_cst (TREE_TYPE (rhs1));
1565 if (dump_file && (dump_flags & TDF_DETAILS))
1567 fprintf (dump_file, "Found cond scalar reduction.\n");
1568 print_gimple_stmt (dump_file, reduc, 0, TDF_SLIM);
1571 /* Build cond expression using COND and constant operand
1572 of reduction rhs. */
1573 c = fold_build_cond_expr (TREE_TYPE (rhs1),
1574 unshare_expr (cond),
1575 swap ? zero : op1,
1576 swap ? op1 : zero);
1578 /* Create assignment stmt and insert it at GSI. */
1579 new_assign = gimple_build_assign (tmp, c);
1580 gsi_insert_before (gsi, new_assign, GSI_SAME_STMT);
1581 /* Build rhs for unconditional increment/decrement. */
1582 rhs = fold_build2 (gimple_assign_rhs_code (reduc),
1583 TREE_TYPE (rhs1), op0, tmp);
1585 /* Delete original reduction stmt. */
1586 stmt_it = gsi_for_stmt (reduc);
1587 gsi_remove (&stmt_it, true);
1588 release_defs (reduc);
1589 return rhs;
1592 typedef simple_hashmap_traits <tree_operand_hash> phi_args_hash_traits;
1594 /* Produce condition for all occurrences of ARG in PHI node. */
1596 static tree
1597 gen_phi_arg_condition (gphi *phi, vec<int> *occur,
1598 gimple_stmt_iterator *gsi)
1600 int len;
1601 int i;
1602 tree cond = NULL_TREE;
1603 tree c;
1604 edge e;
1606 len = occur->length ();
1607 gcc_assert (len > 0);
1608 for (i = 0; i < len; i++)
1610 e = gimple_phi_arg_edge (phi, (*occur)[i]);
1611 c = bb_predicate (e->src);
1612 if (is_true_predicate (c))
1613 continue;
1614 c = force_gimple_operand_gsi_1 (gsi, unshare_expr (c),
1615 is_gimple_condexpr, NULL_TREE,
1616 true, GSI_SAME_STMT);
1617 if (cond != NULL_TREE)
1619 /* Must build OR expression. */
1620 cond = fold_or_predicates (EXPR_LOCATION (c), c, cond);
1621 cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond),
1622 is_gimple_condexpr, NULL_TREE,
1623 true, GSI_SAME_STMT);
1625 else
1626 cond = c;
1628 gcc_assert (cond != NULL_TREE);
1629 return cond;
1632 /* Replace a scalar PHI node with a COND_EXPR using COND as condition.
1633 This routine can handle PHI nodes with more than two arguments.
1635 For example,
1636 S1: A = PHI <x1(1), x2(5)>
1637 is converted into,
1638 S2: A = cond ? x1 : x2;
1640 The generated code is inserted at GSI that points to the top of
1641 basic block's statement list.
1642 If PHI node has more than two arguments a chain of conditional
1643 expression is produced. */
1646 static void
1647 predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
1649 gimple new_stmt = NULL, reduc;
1650 tree rhs, res, arg0, arg1, op0, op1, scev;
1651 tree cond;
1652 unsigned int index0;
1653 unsigned int max, args_len;
1654 edge e;
1655 basic_block bb;
1656 unsigned int i;
1658 res = gimple_phi_result (phi);
1659 if (virtual_operand_p (res))
1660 return;
1662 if ((rhs = degenerate_phi_result (phi))
1663 || ((scev = analyze_scalar_evolution (gimple_bb (phi)->loop_father,
1664 res))
1665 && !chrec_contains_undetermined (scev)
1666 && scev != res
1667 && (rhs = gimple_phi_arg_def (phi, 0))))
1669 if (dump_file && (dump_flags & TDF_DETAILS))
1671 fprintf (dump_file, "Degenerate phi!\n");
1672 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
1674 new_stmt = gimple_build_assign (res, rhs);
1675 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1676 update_stmt (new_stmt);
1677 return;
1680 bb = gimple_bb (phi);
1681 if (EDGE_COUNT (bb->preds) == 2)
1683 /* Predicate ordinary PHI node with 2 arguments. */
1684 edge first_edge, second_edge;
1685 basic_block true_bb;
1686 first_edge = EDGE_PRED (bb, 0);
1687 second_edge = EDGE_PRED (bb, 1);
1688 cond = bb_predicate (first_edge->src);
1689 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1690 std::swap (first_edge, second_edge);
1691 if (EDGE_COUNT (first_edge->src->succs) > 1)
1693 cond = bb_predicate (second_edge->src);
1694 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1695 cond = TREE_OPERAND (cond, 0);
1696 else
1697 first_edge = second_edge;
1699 else
1700 cond = bb_predicate (first_edge->src);
1701 /* Gimplify the condition to a valid cond-expr conditonal operand. */
1702 cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond),
1703 is_gimple_condexpr, NULL_TREE,
1704 true, GSI_SAME_STMT);
1705 true_bb = first_edge->src;
1706 if (EDGE_PRED (bb, 1)->src == true_bb)
1708 arg0 = gimple_phi_arg_def (phi, 1);
1709 arg1 = gimple_phi_arg_def (phi, 0);
1711 else
1713 arg0 = gimple_phi_arg_def (phi, 0);
1714 arg1 = gimple_phi_arg_def (phi, 1);
1716 if (is_cond_scalar_reduction (phi, &reduc, arg0, arg1,
1717 &op0, &op1, false))
1718 /* Convert reduction stmt into vectorizable form. */
1719 rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
1720 true_bb != gimple_bb (reduc));
1721 else
1722 /* Build new RHS using selected condition and arguments. */
1723 rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
1724 arg0, arg1);
1725 new_stmt = gimple_build_assign (res, rhs);
1726 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1727 update_stmt (new_stmt);
1729 if (dump_file && (dump_flags & TDF_DETAILS))
1731 fprintf (dump_file, "new phi replacement stmt\n");
1732 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
1734 return;
1737 /* Create hashmap for PHI node which contain vector of argument indexes
1738 having the same value. */
1739 bool swap = false;
1740 hash_map<tree, auto_vec<int>, phi_args_hash_traits> phi_arg_map;
1741 unsigned int num_args = gimple_phi_num_args (phi);
1742 int max_ind = -1;
1743 /* Vector of different PHI argument values. */
1744 auto_vec<tree> args (num_args);
1746 /* Compute phi_arg_map. */
1747 for (i = 0; i < num_args; i++)
1749 tree arg;
1751 arg = gimple_phi_arg_def (phi, i);
1752 if (!phi_arg_map.get (arg))
1753 args.quick_push (arg);
1754 phi_arg_map.get_or_insert (arg).safe_push (i);
1757 /* Determine element with max number of occurrences. */
1758 max_ind = -1;
1759 max = 1;
1760 args_len = args.length ();
1761 for (i = 0; i < args_len; i++)
1763 unsigned int len;
1764 if ((len = phi_arg_map.get (args[i])->length ()) > max)
1766 max_ind = (int) i;
1767 max = len;
1771 /* Put element with max number of occurences to the end of ARGS. */
1772 if (max_ind != -1 && max_ind +1 != (int) args_len)
1773 std::swap (args[args_len - 1], args[max_ind]);
1775 /* Handle one special case when number of arguments with different values
1776 is equal 2 and one argument has the only occurrence. Such PHI can be
1777 handled as if would have only 2 arguments. */
1778 if (args_len == 2 && phi_arg_map.get (args[0])->length () == 1)
1780 vec<int> *indexes;
1781 indexes = phi_arg_map.get (args[0]);
1782 index0 = (*indexes)[0];
1783 arg0 = args[0];
1784 arg1 = args[1];
1785 e = gimple_phi_arg_edge (phi, index0);
1786 cond = bb_predicate (e->src);
1787 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1789 swap = true;
1790 cond = TREE_OPERAND (cond, 0);
1792 /* Gimplify the condition to a valid cond-expr conditonal operand. */
1793 cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond),
1794 is_gimple_condexpr, NULL_TREE,
1795 true, GSI_SAME_STMT);
1796 if (!(is_cond_scalar_reduction (phi, &reduc, arg0 , arg1,
1797 &op0, &op1, true)))
1798 rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
1799 swap? arg1 : arg0,
1800 swap? arg0 : arg1);
1801 else
1802 /* Convert reduction stmt into vectorizable form. */
1803 rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
1804 swap);
1805 new_stmt = gimple_build_assign (res, rhs);
1806 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1807 update_stmt (new_stmt);
1809 else
1811 /* Common case. */
1812 vec<int> *indexes;
1813 tree type = TREE_TYPE (gimple_phi_result (phi));
1814 tree lhs;
1815 arg1 = args[1];
1816 for (i = 0; i < args_len; i++)
1818 arg0 = args[i];
1819 indexes = phi_arg_map.get (args[i]);
1820 if (i != args_len - 1)
1821 lhs = make_temp_ssa_name (type, NULL, "_ifc_");
1822 else
1823 lhs = res;
1824 cond = gen_phi_arg_condition (phi, indexes, gsi);
1825 rhs = fold_build_cond_expr (type, unshare_expr (cond),
1826 arg0, arg1);
1827 new_stmt = gimple_build_assign (lhs, rhs);
1828 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1829 update_stmt (new_stmt);
1830 arg1 = lhs;
1834 if (dump_file && (dump_flags & TDF_DETAILS))
1836 fprintf (dump_file, "new extended phi replacement stmt\n");
1837 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
1841 /* Replaces in LOOP all the scalar phi nodes other than those in the
1842 LOOP->header block with conditional modify expressions. */
1844 static void
1845 predicate_all_scalar_phis (struct loop *loop)
1847 basic_block bb;
1848 unsigned int orig_loop_num_nodes = loop->num_nodes;
1849 unsigned int i;
1851 for (i = 1; i < orig_loop_num_nodes; i++)
1853 gphi *phi;
1854 gimple_stmt_iterator gsi;
1855 gphi_iterator phi_gsi;
1856 bb = ifc_bbs[i];
1858 if (bb == loop->header)
1859 continue;
1861 if (EDGE_COUNT (bb->preds) == 1)
1862 continue;
1864 phi_gsi = gsi_start_phis (bb);
1865 if (gsi_end_p (phi_gsi))
1866 continue;
1868 gsi = gsi_after_labels (bb);
1869 while (!gsi_end_p (phi_gsi))
1871 phi = phi_gsi.phi ();
1872 predicate_scalar_phi (phi, &gsi);
1873 release_phi_node (phi);
1874 gsi_next (&phi_gsi);
1877 set_phi_nodes (bb, NULL);
1881 /* Insert in each basic block of LOOP the statements produced by the
1882 gimplification of the predicates. */
1884 static void
1885 insert_gimplified_predicates (loop_p loop, bool any_mask_load_store)
1887 unsigned int i;
1889 for (i = 0; i < loop->num_nodes; i++)
1891 basic_block bb = ifc_bbs[i];
1892 gimple_seq stmts;
1893 if (!is_predicated (bb))
1894 gcc_assert (bb_predicate_gimplified_stmts (bb) == NULL);
1895 if (!is_predicated (bb))
1897 /* Do not insert statements for a basic block that is not
1898 predicated. Also make sure that the predicate of the
1899 basic block is set to true. */
1900 reset_bb_predicate (bb);
1901 continue;
1904 stmts = bb_predicate_gimplified_stmts (bb);
1905 if (stmts)
1907 if (flag_tree_loop_if_convert_stores
1908 || any_mask_load_store)
1910 /* Insert the predicate of the BB just after the label,
1911 as the if-conversion of memory writes will use this
1912 predicate. */
1913 gimple_stmt_iterator gsi = gsi_after_labels (bb);
1914 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1916 else
1918 /* Insert the predicate of the BB at the end of the BB
1919 as this would reduce the register pressure: the only
1920 use of this predicate will be in successor BBs. */
1921 gimple_stmt_iterator gsi = gsi_last_bb (bb);
1923 if (gsi_end_p (gsi)
1924 || stmt_ends_bb_p (gsi_stmt (gsi)))
1925 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1926 else
1927 gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
1930 /* Once the sequence is code generated, set it to NULL. */
1931 set_bb_predicate_gimplified_stmts (bb, NULL);
1936 /* Helper function for predicate_mem_writes. Returns index of existent
1937 mask if it was created for given SIZE and -1 otherwise. */
1939 static int
1940 mask_exists (int size, vec<int> vec)
1942 unsigned int ix;
1943 int v;
1944 FOR_EACH_VEC_ELT (vec, ix, v)
1945 if (v == size)
1946 return (int) ix;
1947 return -1;
1950 /* Predicate each write to memory in LOOP.
1952 This function transforms control flow constructs containing memory
1953 writes of the form:
1955 | for (i = 0; i < N; i++)
1956 | if (cond)
1957 | A[i] = expr;
1959 into the following form that does not contain control flow:
1961 | for (i = 0; i < N; i++)
1962 | A[i] = cond ? expr : A[i];
1964 The original CFG looks like this:
1966 | bb_0
1967 | i = 0
1968 | end_bb_0
1970 | bb_1
1971 | if (i < N) goto bb_5 else goto bb_2
1972 | end_bb_1
1974 | bb_2
1975 | cond = some_computation;
1976 | if (cond) goto bb_3 else goto bb_4
1977 | end_bb_2
1979 | bb_3
1980 | A[i] = expr;
1981 | goto bb_4
1982 | end_bb_3
1984 | bb_4
1985 | goto bb_1
1986 | end_bb_4
1988 insert_gimplified_predicates inserts the computation of the COND
1989 expression at the beginning of the destination basic block:
1991 | bb_0
1992 | i = 0
1993 | end_bb_0
1995 | bb_1
1996 | if (i < N) goto bb_5 else goto bb_2
1997 | end_bb_1
1999 | bb_2
2000 | cond = some_computation;
2001 | if (cond) goto bb_3 else goto bb_4
2002 | end_bb_2
2004 | bb_3
2005 | cond = some_computation;
2006 | A[i] = expr;
2007 | goto bb_4
2008 | end_bb_3
2010 | bb_4
2011 | goto bb_1
2012 | end_bb_4
2014 predicate_mem_writes is then predicating the memory write as follows:
2016 | bb_0
2017 | i = 0
2018 | end_bb_0
2020 | bb_1
2021 | if (i < N) goto bb_5 else goto bb_2
2022 | end_bb_1
2024 | bb_2
2025 | if (cond) goto bb_3 else goto bb_4
2026 | end_bb_2
2028 | bb_3
2029 | cond = some_computation;
2030 | A[i] = cond ? expr : A[i];
2031 | goto bb_4
2032 | end_bb_3
2034 | bb_4
2035 | goto bb_1
2036 | end_bb_4
2038 and finally combine_blocks removes the basic block boundaries making
2039 the loop vectorizable:
2041 | bb_0
2042 | i = 0
2043 | if (i < N) goto bb_5 else goto bb_1
2044 | end_bb_0
2046 | bb_1
2047 | cond = some_computation;
2048 | A[i] = cond ? expr : A[i];
2049 | if (i < N) goto bb_5 else goto bb_4
2050 | end_bb_1
2052 | bb_4
2053 | goto bb_1
2054 | end_bb_4
2057 static void
2058 predicate_mem_writes (loop_p loop)
2060 unsigned int i, orig_loop_num_nodes = loop->num_nodes;
2061 auto_vec<int, 1> vect_sizes;
2062 auto_vec<tree, 1> vect_masks;
2064 for (i = 1; i < orig_loop_num_nodes; i++)
2066 gimple_stmt_iterator gsi;
2067 basic_block bb = ifc_bbs[i];
2068 tree cond = bb_predicate (bb);
2069 bool swap;
2070 gimple stmt;
2071 int index;
2073 if (is_true_predicate (cond))
2074 continue;
2076 swap = false;
2077 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2079 swap = true;
2080 cond = TREE_OPERAND (cond, 0);
2083 vect_sizes.truncate (0);
2084 vect_masks.truncate (0);
2086 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2087 if (!gimple_assign_single_p (stmt = gsi_stmt (gsi)))
2088 continue;
2089 else if (gimple_plf (stmt, GF_PLF_2))
2091 tree lhs = gimple_assign_lhs (stmt);
2092 tree rhs = gimple_assign_rhs1 (stmt);
2093 tree ref, addr, ptr, masktype, mask_op0, mask_op1, mask;
2094 gimple new_stmt;
2095 int bitsize = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (lhs)));
2096 ref = TREE_CODE (lhs) == SSA_NAME ? rhs : lhs;
2097 mark_addressable (ref);
2098 addr = force_gimple_operand_gsi (&gsi, build_fold_addr_expr (ref),
2099 true, NULL_TREE, true,
2100 GSI_SAME_STMT);
2101 if (!vect_sizes.is_empty ()
2102 && (index = mask_exists (bitsize, vect_sizes)) != -1)
2103 /* Use created mask. */
2104 mask = vect_masks[index];
2105 else
2107 masktype = build_nonstandard_integer_type (bitsize, 1);
2108 mask_op0 = build_int_cst (masktype, swap ? 0 : -1);
2109 mask_op1 = build_int_cst (masktype, swap ? -1 : 0);
2110 cond = force_gimple_operand_gsi_1 (&gsi, unshare_expr (cond),
2111 is_gimple_condexpr,
2112 NULL_TREE,
2113 true, GSI_SAME_STMT);
2114 mask = fold_build_cond_expr (masktype, unshare_expr (cond),
2115 mask_op0, mask_op1);
2116 mask = ifc_temp_var (masktype, mask, &gsi);
2117 /* Save mask and its size for further use. */
2118 vect_sizes.safe_push (bitsize);
2119 vect_masks.safe_push (mask);
2121 ptr = build_int_cst (reference_alias_ptr_type (ref), 0);
2122 /* Copy points-to info if possible. */
2123 if (TREE_CODE (addr) == SSA_NAME && !SSA_NAME_PTR_INFO (addr))
2124 copy_ref_info (build2 (MEM_REF, TREE_TYPE (ref), addr, ptr),
2125 ref);
2126 if (TREE_CODE (lhs) == SSA_NAME)
2128 new_stmt
2129 = gimple_build_call_internal (IFN_MASK_LOAD, 3, addr,
2130 ptr, mask);
2131 gimple_call_set_lhs (new_stmt, lhs);
2133 else
2134 new_stmt
2135 = gimple_build_call_internal (IFN_MASK_STORE, 4, addr, ptr,
2136 mask, rhs);
2137 gsi_replace (&gsi, new_stmt, true);
2139 else if (gimple_vdef (stmt))
2141 tree lhs = gimple_assign_lhs (stmt);
2142 tree rhs = gimple_assign_rhs1 (stmt);
2143 tree type = TREE_TYPE (lhs);
2145 lhs = ifc_temp_var (type, unshare_expr (lhs), &gsi);
2146 rhs = ifc_temp_var (type, unshare_expr (rhs), &gsi);
2147 if (swap)
2148 std::swap (lhs, rhs);
2149 cond = force_gimple_operand_gsi_1 (&gsi, unshare_expr (cond),
2150 is_gimple_condexpr, NULL_TREE,
2151 true, GSI_SAME_STMT);
2152 rhs = fold_build_cond_expr (type, unshare_expr (cond), rhs, lhs);
2153 gimple_assign_set_rhs1 (stmt, ifc_temp_var (type, rhs, &gsi));
2154 update_stmt (stmt);
2159 /* Remove all GIMPLE_CONDs and GIMPLE_LABELs of all the basic blocks
2160 other than the exit and latch of the LOOP. Also resets the
2161 GIMPLE_DEBUG information. */
2163 static void
2164 remove_conditions_and_labels (loop_p loop)
2166 gimple_stmt_iterator gsi;
2167 unsigned int i;
2169 for (i = 0; i < loop->num_nodes; i++)
2171 basic_block bb = ifc_bbs[i];
2173 if (bb_with_exit_edge_p (loop, bb)
2174 || bb == loop->latch)
2175 continue;
2177 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
2178 switch (gimple_code (gsi_stmt (gsi)))
2180 case GIMPLE_COND:
2181 case GIMPLE_LABEL:
2182 gsi_remove (&gsi, true);
2183 break;
2185 case GIMPLE_DEBUG:
2186 /* ??? Should there be conditional GIMPLE_DEBUG_BINDs? */
2187 if (gimple_debug_bind_p (gsi_stmt (gsi)))
2189 gimple_debug_bind_reset_value (gsi_stmt (gsi));
2190 update_stmt (gsi_stmt (gsi));
2192 gsi_next (&gsi);
2193 break;
2195 default:
2196 gsi_next (&gsi);
2201 /* Combine all the basic blocks from LOOP into one or two super basic
2202 blocks. Replace PHI nodes with conditional modify expressions. */
2204 static void
2205 combine_blocks (struct loop *loop, bool any_mask_load_store)
2207 basic_block bb, exit_bb, merge_target_bb;
2208 unsigned int orig_loop_num_nodes = loop->num_nodes;
2209 unsigned int i;
2210 edge e;
2211 edge_iterator ei;
2213 predicate_bbs (loop);
2214 remove_conditions_and_labels (loop);
2215 insert_gimplified_predicates (loop, any_mask_load_store);
2216 predicate_all_scalar_phis (loop);
2218 if (flag_tree_loop_if_convert_stores || any_mask_load_store)
2219 predicate_mem_writes (loop);
2221 /* Merge basic blocks: first remove all the edges in the loop,
2222 except for those from the exit block. */
2223 exit_bb = NULL;
2224 for (i = 0; i < orig_loop_num_nodes; i++)
2226 bb = ifc_bbs[i];
2227 free_bb_predicate (bb);
2228 if (bb_with_exit_edge_p (loop, bb))
2230 gcc_assert (exit_bb == NULL);
2231 exit_bb = bb;
2234 gcc_assert (exit_bb != loop->latch);
2236 for (i = 1; i < orig_loop_num_nodes; i++)
2238 bb = ifc_bbs[i];
2240 for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));)
2242 if (e->src == exit_bb)
2243 ei_next (&ei);
2244 else
2245 remove_edge (e);
2249 if (exit_bb != NULL)
2251 if (exit_bb != loop->header)
2253 /* Connect this node to loop header. */
2254 make_edge (loop->header, exit_bb, EDGE_FALLTHRU);
2255 set_immediate_dominator (CDI_DOMINATORS, exit_bb, loop->header);
2258 /* Redirect non-exit edges to loop->latch. */
2259 FOR_EACH_EDGE (e, ei, exit_bb->succs)
2261 if (!loop_exit_edge_p (loop, e))
2262 redirect_edge_and_branch (e, loop->latch);
2264 set_immediate_dominator (CDI_DOMINATORS, loop->latch, exit_bb);
2266 else
2268 /* If the loop does not have an exit, reconnect header and latch. */
2269 make_edge (loop->header, loop->latch, EDGE_FALLTHRU);
2270 set_immediate_dominator (CDI_DOMINATORS, loop->latch, loop->header);
2273 merge_target_bb = loop->header;
2274 for (i = 1; i < orig_loop_num_nodes; i++)
2276 gimple_stmt_iterator gsi;
2277 gimple_stmt_iterator last;
2279 bb = ifc_bbs[i];
2281 if (bb == exit_bb || bb == loop->latch)
2282 continue;
2284 /* Make stmts member of loop->header. */
2285 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2286 gimple_set_bb (gsi_stmt (gsi), merge_target_bb);
2288 /* Update stmt list. */
2289 last = gsi_last_bb (merge_target_bb);
2290 gsi_insert_seq_after (&last, bb_seq (bb), GSI_NEW_STMT);
2291 set_bb_seq (bb, NULL);
2293 delete_basic_block (bb);
2296 /* If possible, merge loop header to the block with the exit edge.
2297 This reduces the number of basic blocks to two, to please the
2298 vectorizer that handles only loops with two nodes. */
2299 if (exit_bb
2300 && exit_bb != loop->header
2301 && can_merge_blocks_p (loop->header, exit_bb))
2302 merge_blocks (loop->header, exit_bb);
2304 free (ifc_bbs);
2305 ifc_bbs = NULL;
2308 /* Version LOOP before if-converting it, the original loop
2309 will be then if-converted, the new copy of the loop will not,
2310 and the LOOP_VECTORIZED internal call will be guarding which
2311 loop to execute. The vectorizer pass will fold this
2312 internal call into either true or false. */
2314 static bool
2315 version_loop_for_if_conversion (struct loop *loop)
2317 basic_block cond_bb;
2318 tree cond = make_ssa_name (boolean_type_node);
2319 struct loop *new_loop;
2320 gimple g;
2321 gimple_stmt_iterator gsi;
2323 g = gimple_build_call_internal (IFN_LOOP_VECTORIZED, 2,
2324 build_int_cst (integer_type_node, loop->num),
2325 integer_zero_node);
2326 gimple_call_set_lhs (g, cond);
2328 initialize_original_copy_tables ();
2329 new_loop = loop_version (loop, cond, &cond_bb,
2330 REG_BR_PROB_BASE, REG_BR_PROB_BASE,
2331 REG_BR_PROB_BASE, true);
2332 free_original_copy_tables ();
2333 if (new_loop == NULL)
2334 return false;
2335 new_loop->dont_vectorize = true;
2336 new_loop->force_vectorize = false;
2337 gsi = gsi_last_bb (cond_bb);
2338 gimple_call_set_arg (g, 1, build_int_cst (integer_type_node, new_loop->num));
2339 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2340 update_ssa (TODO_update_ssa);
2341 return true;
2344 /* Performs splitting of critical edges if aggressive_if_conv is true.
2345 Returns false if loop won't be if converted and true otherwise. */
2347 static bool
2348 ifcvt_split_critical_edges (struct loop *loop)
2350 basic_block *body;
2351 basic_block bb;
2352 unsigned int num = loop->num_nodes;
2353 unsigned int i;
2354 gimple stmt;
2355 edge e;
2356 edge_iterator ei;
2358 if (num <= 2)
2359 return false;
2360 if (loop->inner)
2361 return false;
2362 if (!single_exit (loop))
2363 return false;
2365 body = get_loop_body (loop);
2366 for (i = 0; i < num; i++)
2368 bb = body[i];
2369 if (bb == loop->latch
2370 || bb_with_exit_edge_p (loop, bb))
2371 continue;
2372 stmt = last_stmt (bb);
2373 /* Skip basic blocks not ending with conditional branch. */
2374 if (!(stmt && gimple_code (stmt) == GIMPLE_COND))
2375 continue;
2376 FOR_EACH_EDGE (e, ei, bb->succs)
2377 if (EDGE_CRITICAL_P (e) && e->dest->loop_father == loop)
2378 split_edge (e);
2380 free (body);
2381 return true;
2384 /* Assumes that lhs of DEF_STMT have multiple uses.
2385 Delete one use by (1) creation of copy DEF_STMT with
2386 unique lhs; (2) change original use of lhs in one
2387 use statement with newly created lhs. */
2389 static void
2390 ifcvt_split_def_stmt (gimple def_stmt, gimple use_stmt)
2392 tree var;
2393 tree lhs;
2394 gimple copy_stmt;
2395 gimple_stmt_iterator gsi;
2396 use_operand_p use_p;
2397 imm_use_iterator imm_iter;
2399 var = gimple_assign_lhs (def_stmt);
2400 copy_stmt = gimple_copy (def_stmt);
2401 lhs = make_temp_ssa_name (TREE_TYPE (var), NULL, "_ifc_");
2402 gimple_assign_set_lhs (copy_stmt, lhs);
2403 SSA_NAME_DEF_STMT (lhs) = copy_stmt;
2404 /* Insert copy of DEF_STMT. */
2405 gsi = gsi_for_stmt (def_stmt);
2406 gsi_insert_after (&gsi, copy_stmt, GSI_SAME_STMT);
2407 /* Change use of var to lhs in use_stmt. */
2408 if (dump_file && (dump_flags & TDF_DETAILS))
2410 fprintf (dump_file, "Change use of var ");
2411 print_generic_expr (dump_file, var, TDF_SLIM);
2412 fprintf (dump_file, " to ");
2413 print_generic_expr (dump_file, lhs, TDF_SLIM);
2414 fprintf (dump_file, "\n");
2416 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, var)
2418 if (USE_STMT (use_p) != use_stmt)
2419 continue;
2420 SET_USE (use_p, lhs);
2421 break;
2425 /* Traverse bool pattern recursively starting from VAR.
2426 Save its def and use statements to defuse_list if VAR does
2427 not have single use. */
2429 static void
2430 ifcvt_walk_pattern_tree (tree var, vec<gimple> *defuse_list,
2431 gimple use_stmt)
2433 tree rhs1, rhs2;
2434 enum tree_code code;
2435 gimple def_stmt;
2437 def_stmt = SSA_NAME_DEF_STMT (var);
2438 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
2439 return;
2440 if (!has_single_use (var))
2442 /* Put def and use stmts into defuse_list. */
2443 defuse_list->safe_push (def_stmt);
2444 defuse_list->safe_push (use_stmt);
2445 if (dump_file && (dump_flags & TDF_DETAILS))
2447 fprintf (dump_file, "Multiple lhs uses in stmt\n");
2448 print_gimple_stmt (dump_file, def_stmt, 0, TDF_SLIM);
2451 rhs1 = gimple_assign_rhs1 (def_stmt);
2452 code = gimple_assign_rhs_code (def_stmt);
2453 switch (code)
2455 case SSA_NAME:
2456 ifcvt_walk_pattern_tree (rhs1, defuse_list, def_stmt);
2457 break;
2458 CASE_CONVERT:
2459 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2460 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2461 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2462 break;
2463 ifcvt_walk_pattern_tree (rhs1, defuse_list, def_stmt);
2464 break;
2465 case BIT_NOT_EXPR:
2466 ifcvt_walk_pattern_tree (rhs1, defuse_list, def_stmt);
2467 break;
2468 case BIT_AND_EXPR:
2469 case BIT_IOR_EXPR:
2470 case BIT_XOR_EXPR:
2471 ifcvt_walk_pattern_tree (rhs1, defuse_list, def_stmt);
2472 rhs2 = gimple_assign_rhs2 (def_stmt);
2473 ifcvt_walk_pattern_tree (rhs2, defuse_list, def_stmt);
2474 break;
2475 default:
2476 break;
2478 return;
2481 /* Returns true if STMT can be a root of bool pattern apllied
2482 by vectorizer. */
2484 static bool
2485 stmt_is_root_of_bool_pattern (gimple stmt)
2487 enum tree_code code;
2488 tree lhs, rhs;
2490 code = gimple_assign_rhs_code (stmt);
2491 if (CONVERT_EXPR_CODE_P (code))
2493 lhs = gimple_assign_lhs (stmt);
2494 rhs = gimple_assign_rhs1 (stmt);
2495 if (TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2496 return false;
2497 if (TREE_CODE (TREE_TYPE (lhs)) == BOOLEAN_TYPE)
2498 return false;
2499 return true;
2501 else if (code == COND_EXPR)
2503 rhs = gimple_assign_rhs1 (stmt);
2504 if (TREE_CODE (rhs) != SSA_NAME)
2505 return false;
2506 return true;
2508 return false;
2511 /* Traverse all statements in BB which correspondent to loop header to
2512 find out all statements which can start bool pattern applied by
2513 vectorizer and convert multiple uses in it to conform pattern
2514 restrictions. Such case can occur if the same predicate is used both
2515 for phi node conversion and load/store mask. */
2517 static void
2518 ifcvt_repair_bool_pattern (basic_block bb)
2520 tree rhs;
2521 gimple stmt;
2522 gimple_stmt_iterator gsi;
2523 vec<gimple> defuse_list = vNULL;
2524 vec<gimple> pattern_roots = vNULL;
2525 bool repeat = true;
2526 int niter = 0;
2527 unsigned int ix;
2529 /* Collect all root pattern statements. */
2530 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2532 stmt = gsi_stmt (gsi);
2533 if (gimple_code (stmt) != GIMPLE_ASSIGN)
2534 continue;
2535 if (!stmt_is_root_of_bool_pattern (stmt))
2536 continue;
2537 pattern_roots.safe_push (stmt);
2540 if (pattern_roots.is_empty ())
2541 return;
2543 /* Split all statements with multiple uses iteratively since splitting
2544 may create new multiple uses. */
2545 while (repeat)
2547 repeat = false;
2548 niter++;
2549 FOR_EACH_VEC_ELT (pattern_roots, ix, stmt)
2551 rhs = gimple_assign_rhs1 (stmt);
2552 ifcvt_walk_pattern_tree (rhs, &defuse_list, stmt);
2553 while (defuse_list.length () > 0)
2555 repeat = true;
2556 gimple def_stmt, use_stmt;
2557 use_stmt = defuse_list.pop ();
2558 def_stmt = defuse_list.pop ();
2559 ifcvt_split_def_stmt (def_stmt, use_stmt);
2564 if (dump_file && (dump_flags & TDF_DETAILS))
2565 fprintf (dump_file, "Repair bool pattern takes %d iterations. \n",
2566 niter);
2569 /* Delete redundant statements produced by predication which prevents
2570 loop vectorization. */
2572 static void
2573 ifcvt_local_dce (basic_block bb)
2575 gimple stmt;
2576 gimple stmt1;
2577 gimple phi;
2578 gimple_stmt_iterator gsi;
2579 vec<gimple> worklist;
2580 enum gimple_code code;
2581 use_operand_p use_p;
2582 imm_use_iterator imm_iter;
2584 worklist.create (64);
2585 /* Consider all phi as live statements. */
2586 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2588 phi = gsi_stmt (gsi);
2589 gimple_set_plf (phi, GF_PLF_2, true);
2590 worklist.safe_push (phi);
2592 /* Consider load/store statemnts, CALL and COND as live. */
2593 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2595 stmt = gsi_stmt (gsi);
2596 if (gimple_store_p (stmt)
2597 || gimple_assign_load_p (stmt)
2598 || is_gimple_debug (stmt))
2600 gimple_set_plf (stmt, GF_PLF_2, true);
2601 worklist.safe_push (stmt);
2602 continue;
2604 code = gimple_code (stmt);
2605 if (code == GIMPLE_COND || code == GIMPLE_CALL)
2607 gimple_set_plf (stmt, GF_PLF_2, true);
2608 worklist.safe_push (stmt);
2609 continue;
2611 gimple_set_plf (stmt, GF_PLF_2, false);
2613 if (code == GIMPLE_ASSIGN)
2615 tree lhs = gimple_assign_lhs (stmt);
2616 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2618 stmt1 = USE_STMT (use_p);
2619 if (gimple_bb (stmt1) != bb)
2621 gimple_set_plf (stmt, GF_PLF_2, true);
2622 worklist.safe_push (stmt);
2623 break;
2628 /* Propagate liveness through arguments of live stmt. */
2629 while (worklist.length () > 0)
2631 ssa_op_iter iter;
2632 use_operand_p use_p;
2633 tree use;
2635 stmt = worklist.pop ();
2636 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2638 use = USE_FROM_PTR (use_p);
2639 if (TREE_CODE (use) != SSA_NAME)
2640 continue;
2641 stmt1 = SSA_NAME_DEF_STMT (use);
2642 if (gimple_bb (stmt1) != bb
2643 || gimple_plf (stmt1, GF_PLF_2))
2644 continue;
2645 gimple_set_plf (stmt1, GF_PLF_2, true);
2646 worklist.safe_push (stmt1);
2649 /* Delete dead statements. */
2650 gsi = gsi_start_bb (bb);
2651 while (!gsi_end_p (gsi))
2653 stmt = gsi_stmt (gsi);
2654 if (gimple_plf (stmt, GF_PLF_2))
2656 gsi_next (&gsi);
2657 continue;
2659 if (dump_file && (dump_flags & TDF_DETAILS))
2661 fprintf (dump_file, "Delete dead stmt in bb#%d\n", bb->index);
2662 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2664 gsi_remove (&gsi, true);
2665 release_defs (stmt);
2669 /* If-convert LOOP when it is legal. For the moment this pass has no
2670 profitability analysis. Returns non-zero todo flags when something
2671 changed. */
2673 static unsigned int
2674 tree_if_conversion (struct loop *loop)
2676 unsigned int todo = 0;
2677 ifc_bbs = NULL;
2678 bool any_mask_load_store = false;
2680 /* Set-up aggressive if-conversion for loops marked with simd pragma. */
2681 aggressive_if_conv = loop->force_vectorize;
2682 /* Check either outer loop was marked with simd pragma. */
2683 if (!aggressive_if_conv)
2685 struct loop *outer_loop = loop_outer (loop);
2686 if (outer_loop && outer_loop->force_vectorize)
2687 aggressive_if_conv = true;
2690 if (aggressive_if_conv)
2691 if (!ifcvt_split_critical_edges (loop))
2692 goto cleanup;
2694 if (!if_convertible_loop_p (loop, &any_mask_load_store)
2695 || !dbg_cnt (if_conversion_tree))
2696 goto cleanup;
2698 if (any_mask_load_store
2699 && ((!flag_tree_loop_vectorize && !loop->force_vectorize)
2700 || loop->dont_vectorize))
2701 goto cleanup;
2703 if (any_mask_load_store && !version_loop_for_if_conversion (loop))
2704 goto cleanup;
2706 /* Now all statements are if-convertible. Combine all the basic
2707 blocks into one huge basic block doing the if-conversion
2708 on-the-fly. */
2709 combine_blocks (loop, any_mask_load_store);
2711 /* Delete dead predicate computations and repair tree correspondent
2712 to bool pattern to delete multiple uses of preidcates. */
2713 if (aggressive_if_conv)
2715 ifcvt_local_dce (loop->header);
2716 ifcvt_repair_bool_pattern (loop->header);
2719 todo |= TODO_cleanup_cfg;
2720 if (flag_tree_loop_if_convert_stores || any_mask_load_store)
2722 mark_virtual_operands_for_renaming (cfun);
2723 todo |= TODO_update_ssa_only_virtuals;
2726 cleanup:
2727 if (ifc_bbs)
2729 unsigned int i;
2731 for (i = 0; i < loop->num_nodes; i++)
2732 free_bb_predicate (ifc_bbs[i]);
2734 free (ifc_bbs);
2735 ifc_bbs = NULL;
2737 free_dominance_info (CDI_POST_DOMINATORS);
2739 return todo;
2742 /* Tree if-conversion pass management. */
2744 namespace {
2746 const pass_data pass_data_if_conversion =
2748 GIMPLE_PASS, /* type */
2749 "ifcvt", /* name */
2750 OPTGROUP_NONE, /* optinfo_flags */
2751 TV_NONE, /* tv_id */
2752 ( PROP_cfg | PROP_ssa ), /* properties_required */
2753 0, /* properties_provided */
2754 0, /* properties_destroyed */
2755 0, /* todo_flags_start */
2756 0, /* todo_flags_finish */
2759 class pass_if_conversion : public gimple_opt_pass
2761 public:
2762 pass_if_conversion (gcc::context *ctxt)
2763 : gimple_opt_pass (pass_data_if_conversion, ctxt)
2766 /* opt_pass methods: */
2767 virtual bool gate (function *);
2768 virtual unsigned int execute (function *);
2770 }; // class pass_if_conversion
2772 bool
2773 pass_if_conversion::gate (function *fun)
2775 return (((flag_tree_loop_vectorize || fun->has_force_vectorize_loops)
2776 && flag_tree_loop_if_convert != 0)
2777 || flag_tree_loop_if_convert == 1
2778 || flag_tree_loop_if_convert_stores == 1);
2781 unsigned int
2782 pass_if_conversion::execute (function *fun)
2784 struct loop *loop;
2785 unsigned todo = 0;
2787 if (number_of_loops (fun) <= 1)
2788 return 0;
2790 FOR_EACH_LOOP (loop, 0)
2791 if (flag_tree_loop_if_convert == 1
2792 || flag_tree_loop_if_convert_stores == 1
2793 || ((flag_tree_loop_vectorize || loop->force_vectorize)
2794 && !loop->dont_vectorize))
2795 todo |= tree_if_conversion (loop);
2797 #ifdef ENABLE_CHECKING
2799 basic_block bb;
2800 FOR_EACH_BB_FN (bb, fun)
2801 gcc_assert (!bb->aux);
2803 #endif
2805 return todo;
2808 } // anon namespace
2810 gimple_opt_pass *
2811 make_pass_if_conversion (gcc::context *ctxt)
2813 return new pass_if_conversion (ctxt);