svn merge -r215707:216846 svn+ssh://gcc.gnu.org/svn/gcc/trunk
[official-gcc.git] / gcc / tree-if-conv.c
blob595c05b46d79559c03954796e090460cd6437f41
1 /* If-conversion for vectorizer.
2 Copyright (C) 2004-2014 Free Software Foundation, Inc.
3 Contributed by Devang Patel <dpatel@apple.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass implements a tree level if-conversion of loops. Its
22 initial goal is to help the vectorizer to vectorize loops with
23 conditions.
25 A short description of if-conversion:
27 o Decide if a loop is if-convertible or not.
28 o Walk all loop basic blocks in breadth first order (BFS order).
29 o Remove conditional statements (at the end of basic block)
30 and propagate condition into destination basic blocks'
31 predicate list.
32 o Replace modify expression with conditional modify expression
33 using current basic block's condition.
34 o Merge all basic blocks
35 o Replace phi nodes with conditional modify expr
36 o Merge all basic blocks into header
38 Sample transformation:
40 INPUT
41 -----
43 # i_23 = PHI <0(0), i_18(10)>;
44 <L0>:;
45 j_15 = A[i_23];
46 if (j_15 > 41) goto <L1>; else goto <L17>;
48 <L17>:;
49 goto <bb 3> (<L3>);
51 <L1>:;
53 # iftmp.2_4 = PHI <0(8), 42(2)>;
54 <L3>:;
55 A[i_23] = iftmp.2_4;
56 i_18 = i_23 + 1;
57 if (i_18 <= 15) goto <L19>; else goto <L18>;
59 <L19>:;
60 goto <bb 1> (<L0>);
62 <L18>:;
64 OUTPUT
65 ------
67 # i_23 = PHI <0(0), i_18(10)>;
68 <L0>:;
69 j_15 = A[i_23];
71 <L3>:;
72 iftmp.2_4 = j_15 > 41 ? 42 : 0;
73 A[i_23] = iftmp.2_4;
74 i_18 = i_23 + 1;
75 if (i_18 <= 15) goto <L19>; else goto <L18>;
77 <L19>:;
78 goto <bb 1> (<L0>);
80 <L18>:;
83 #include "config.h"
84 #include "system.h"
85 #include "coretypes.h"
86 #include "tm.h"
87 #include "tree.h"
88 #include "stor-layout.h"
89 #include "flags.h"
90 #include "predict.h"
91 #include "vec.h"
92 #include "hashtab.h"
93 #include "hash-set.h"
94 #include "machmode.h"
95 #include "hard-reg-set.h"
96 #include "input.h"
97 #include "function.h"
98 #include "dominance.h"
99 #include "cfg.h"
100 #include "basic-block.h"
101 #include "gimple-pretty-print.h"
102 #include "tree-ssa-alias.h"
103 #include "internal-fn.h"
104 #include "gimple-fold.h"
105 #include "gimple-expr.h"
106 #include "is-a.h"
107 #include "gimple.h"
108 #include "gimplify.h"
109 #include "gimple-iterator.h"
110 #include "gimplify-me.h"
111 #include "gimple-ssa.h"
112 #include "tree-cfg.h"
113 #include "tree-phinodes.h"
114 #include "ssa-iterators.h"
115 #include "stringpool.h"
116 #include "tree-ssanames.h"
117 #include "tree-into-ssa.h"
118 #include "tree-ssa.h"
119 #include "cfgloop.h"
120 #include "tree-chrec.h"
121 #include "tree-data-ref.h"
122 #include "tree-scalar-evolution.h"
123 #include "tree-ssa-loop-ivopts.h"
124 #include "tree-ssa-address.h"
125 #include "tree-pass.h"
126 #include "dbgcnt.h"
127 #include "expr.h"
128 #include "optabs.h"
130 /* List of basic blocks in if-conversion-suitable order. */
131 static basic_block *ifc_bbs;
133 /* Structure used to predicate basic blocks. This is attached to the
134 ->aux field of the BBs in the loop to be if-converted. */
135 typedef struct bb_predicate_s {
137 /* The condition under which this basic block is executed. */
138 tree predicate;
140 /* PREDICATE is gimplified, and the sequence of statements is
141 recorded here, in order to avoid the duplication of computations
142 that occur in previous conditions. See PR44483. */
143 gimple_seq predicate_gimplified_stmts;
144 } *bb_predicate_p;
146 /* Returns true when the basic block BB has a predicate. */
148 static inline bool
149 bb_has_predicate (basic_block bb)
151 return bb->aux != NULL;
154 /* Returns the gimplified predicate for basic block BB. */
156 static inline tree
157 bb_predicate (basic_block bb)
159 return ((bb_predicate_p) bb->aux)->predicate;
162 /* Sets the gimplified predicate COND for basic block BB. */
164 static inline void
165 set_bb_predicate (basic_block bb, tree cond)
167 gcc_assert ((TREE_CODE (cond) == TRUTH_NOT_EXPR
168 && is_gimple_condexpr (TREE_OPERAND (cond, 0)))
169 || is_gimple_condexpr (cond));
170 ((bb_predicate_p) bb->aux)->predicate = cond;
173 /* Returns the sequence of statements of the gimplification of the
174 predicate for basic block BB. */
176 static inline gimple_seq
177 bb_predicate_gimplified_stmts (basic_block bb)
179 return ((bb_predicate_p) bb->aux)->predicate_gimplified_stmts;
182 /* Sets the sequence of statements STMTS of the gimplification of the
183 predicate for basic block BB. */
185 static inline void
186 set_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
188 ((bb_predicate_p) bb->aux)->predicate_gimplified_stmts = stmts;
191 /* Adds the sequence of statements STMTS to the sequence of statements
192 of the predicate for basic block BB. */
194 static inline void
195 add_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
197 gimple_seq_add_seq
198 (&(((bb_predicate_p) bb->aux)->predicate_gimplified_stmts), stmts);
201 /* Initializes to TRUE the predicate of basic block BB. */
203 static inline void
204 init_bb_predicate (basic_block bb)
206 bb->aux = XNEW (struct bb_predicate_s);
207 set_bb_predicate_gimplified_stmts (bb, NULL);
208 set_bb_predicate (bb, boolean_true_node);
211 /* Release the SSA_NAMEs associated with the predicate of basic block BB,
212 but don't actually free it. */
214 static inline void
215 release_bb_predicate (basic_block bb)
217 gimple_seq stmts = bb_predicate_gimplified_stmts (bb);
218 if (stmts)
220 gimple_stmt_iterator i;
222 for (i = gsi_start (stmts); !gsi_end_p (i); gsi_next (&i))
223 free_stmt_operands (cfun, gsi_stmt (i));
224 set_bb_predicate_gimplified_stmts (bb, NULL);
228 /* Free the predicate of basic block BB. */
230 static inline void
231 free_bb_predicate (basic_block bb)
233 if (!bb_has_predicate (bb))
234 return;
236 release_bb_predicate (bb);
237 free (bb->aux);
238 bb->aux = NULL;
241 /* Reinitialize predicate of BB with the true predicate. */
243 static inline void
244 reset_bb_predicate (basic_block bb)
246 if (!bb_has_predicate (bb))
247 init_bb_predicate (bb);
248 else
250 release_bb_predicate (bb);
251 set_bb_predicate (bb, boolean_true_node);
255 /* Returns a new SSA_NAME of type TYPE that is assigned the value of
256 the expression EXPR. Inserts the statement created for this
257 computation before GSI and leaves the iterator GSI at the same
258 statement. */
260 static tree
261 ifc_temp_var (tree type, tree expr, gimple_stmt_iterator *gsi)
263 tree new_name = make_temp_ssa_name (type, NULL, "_ifc_");
264 gimple stmt = gimple_build_assign (new_name, expr);
265 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
266 return new_name;
269 /* Return true when COND is a true predicate. */
271 static inline bool
272 is_true_predicate (tree cond)
274 return (cond == NULL_TREE
275 || cond == boolean_true_node
276 || integer_onep (cond));
279 /* Returns true when BB has a predicate that is not trivial: true or
280 NULL_TREE. */
282 static inline bool
283 is_predicated (basic_block bb)
285 return !is_true_predicate (bb_predicate (bb));
288 /* Parses the predicate COND and returns its comparison code and
289 operands OP0 and OP1. */
291 static enum tree_code
292 parse_predicate (tree cond, tree *op0, tree *op1)
294 gimple s;
296 if (TREE_CODE (cond) == SSA_NAME
297 && is_gimple_assign (s = SSA_NAME_DEF_STMT (cond)))
299 if (TREE_CODE_CLASS (gimple_assign_rhs_code (s)) == tcc_comparison)
301 *op0 = gimple_assign_rhs1 (s);
302 *op1 = gimple_assign_rhs2 (s);
303 return gimple_assign_rhs_code (s);
306 else if (gimple_assign_rhs_code (s) == TRUTH_NOT_EXPR)
308 tree op = gimple_assign_rhs1 (s);
309 tree type = TREE_TYPE (op);
310 enum tree_code code = parse_predicate (op, op0, op1);
312 return code == ERROR_MARK ? ERROR_MARK
313 : invert_tree_comparison (code, HONOR_NANS (TYPE_MODE (type)));
316 return ERROR_MARK;
319 if (TREE_CODE_CLASS (TREE_CODE (cond)) == tcc_comparison)
321 *op0 = TREE_OPERAND (cond, 0);
322 *op1 = TREE_OPERAND (cond, 1);
323 return TREE_CODE (cond);
326 return ERROR_MARK;
329 /* Returns the fold of predicate C1 OR C2 at location LOC. */
331 static tree
332 fold_or_predicates (location_t loc, tree c1, tree c2)
334 tree op1a, op1b, op2a, op2b;
335 enum tree_code code1 = parse_predicate (c1, &op1a, &op1b);
336 enum tree_code code2 = parse_predicate (c2, &op2a, &op2b);
338 if (code1 != ERROR_MARK && code2 != ERROR_MARK)
340 tree t = maybe_fold_or_comparisons (code1, op1a, op1b,
341 code2, op2a, op2b);
342 if (t)
343 return t;
346 return fold_build2_loc (loc, TRUTH_OR_EXPR, boolean_type_node, c1, c2);
349 /* Returns true if N is either a constant or a SSA_NAME. */
351 static bool
352 constant_or_ssa_name (tree n)
354 switch (TREE_CODE (n))
356 case SSA_NAME:
357 case INTEGER_CST:
358 case REAL_CST:
359 case COMPLEX_CST:
360 case VECTOR_CST:
361 return true;
362 default:
363 return false;
367 /* Returns either a COND_EXPR or the folded expression if the folded
368 expression is a MIN_EXPR, a MAX_EXPR, an ABS_EXPR,
369 a constant or a SSA_NAME. */
371 static tree
372 fold_build_cond_expr (tree type, tree cond, tree rhs, tree lhs)
374 tree rhs1, lhs1, cond_expr;
375 cond_expr = fold_ternary (COND_EXPR, type, cond,
376 rhs, lhs);
378 if (cond_expr == NULL_TREE)
379 return build3 (COND_EXPR, type, cond, rhs, lhs);
381 STRIP_USELESS_TYPE_CONVERSION (cond_expr);
383 if (constant_or_ssa_name (cond_expr))
384 return cond_expr;
386 if (TREE_CODE (cond_expr) == ABS_EXPR)
388 rhs1 = TREE_OPERAND (cond_expr, 1);
389 STRIP_USELESS_TYPE_CONVERSION (rhs1);
390 if (constant_or_ssa_name (rhs1))
391 return build1 (ABS_EXPR, type, rhs1);
394 if (TREE_CODE (cond_expr) == MIN_EXPR
395 || TREE_CODE (cond_expr) == MAX_EXPR)
397 lhs1 = TREE_OPERAND (cond_expr, 0);
398 STRIP_USELESS_TYPE_CONVERSION (lhs1);
399 rhs1 = TREE_OPERAND (cond_expr, 1);
400 STRIP_USELESS_TYPE_CONVERSION (rhs1);
401 if (constant_or_ssa_name (rhs1)
402 && constant_or_ssa_name (lhs1))
403 return build2 (TREE_CODE (cond_expr), type, lhs1, rhs1);
405 return build3 (COND_EXPR, type, cond, rhs, lhs);
408 /* Add condition NC to the predicate list of basic block BB. LOOP is
409 the loop to be if-converted. */
411 static inline void
412 add_to_predicate_list (struct loop *loop, basic_block bb, tree nc)
414 tree bc, *tp;
416 if (is_true_predicate (nc))
417 return;
419 if (!is_predicated (bb))
421 /* If dominance tells us this basic block is always executed, don't
422 record any predicates for it. */
423 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
424 return;
426 bc = nc;
428 else
430 bc = bb_predicate (bb);
431 bc = fold_or_predicates (EXPR_LOCATION (bc), nc, bc);
432 if (is_true_predicate (bc))
434 reset_bb_predicate (bb);
435 return;
439 /* Allow a TRUTH_NOT_EXPR around the main predicate. */
440 if (TREE_CODE (bc) == TRUTH_NOT_EXPR)
441 tp = &TREE_OPERAND (bc, 0);
442 else
443 tp = &bc;
444 if (!is_gimple_condexpr (*tp))
446 gimple_seq stmts;
447 *tp = force_gimple_operand_1 (*tp, &stmts, is_gimple_condexpr, NULL_TREE);
448 add_bb_predicate_gimplified_stmts (bb, stmts);
450 set_bb_predicate (bb, bc);
453 /* Add the condition COND to the previous condition PREV_COND, and add
454 this to the predicate list of the destination of edge E. LOOP is
455 the loop to be if-converted. */
457 static void
458 add_to_dst_predicate_list (struct loop *loop, edge e,
459 tree prev_cond, tree cond)
461 if (!flow_bb_inside_loop_p (loop, e->dest))
462 return;
464 if (!is_true_predicate (prev_cond))
465 cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
466 prev_cond, cond);
468 add_to_predicate_list (loop, e->dest, cond);
471 /* Return true if one of the successor edges of BB exits LOOP. */
473 static bool
474 bb_with_exit_edge_p (struct loop *loop, basic_block bb)
476 edge e;
477 edge_iterator ei;
479 FOR_EACH_EDGE (e, ei, bb->succs)
480 if (loop_exit_edge_p (loop, e))
481 return true;
483 return false;
486 /* Return true when PHI is if-convertible. PHI is part of loop LOOP
487 and it belongs to basic block BB.
489 PHI is not if-convertible if:
490 - it has more than 2 arguments.
492 When the flag_tree_loop_if_convert_stores is not set, PHI is not
493 if-convertible if:
494 - a virtual PHI is immediately used in another PHI node,
495 - there is a virtual PHI in a BB other than the loop->header. */
497 static bool
498 if_convertible_phi_p (struct loop *loop, basic_block bb, gimple phi,
499 bool any_mask_load_store)
501 if (dump_file && (dump_flags & TDF_DETAILS))
503 fprintf (dump_file, "-------------------------\n");
504 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
507 if (bb != loop->header && gimple_phi_num_args (phi) != 2)
509 if (dump_file && (dump_flags & TDF_DETAILS))
510 fprintf (dump_file, "More than two phi node args.\n");
511 return false;
514 if (flag_tree_loop_if_convert_stores || any_mask_load_store)
515 return true;
517 /* When the flag_tree_loop_if_convert_stores is not set, check
518 that there are no memory writes in the branches of the loop to be
519 if-converted. */
520 if (virtual_operand_p (gimple_phi_result (phi)))
522 imm_use_iterator imm_iter;
523 use_operand_p use_p;
525 if (bb != loop->header)
527 if (dump_file && (dump_flags & TDF_DETAILS))
528 fprintf (dump_file, "Virtual phi not on loop->header.\n");
529 return false;
532 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_phi_result (phi))
534 if (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI)
536 if (dump_file && (dump_flags & TDF_DETAILS))
537 fprintf (dump_file, "Difficult to handle this virtual phi.\n");
538 return false;
543 return true;
546 /* Records the status of a data reference. This struct is attached to
547 each DR->aux field. */
549 struct ifc_dr {
550 /* -1 when not initialized, 0 when false, 1 when true. */
551 int written_at_least_once;
553 /* -1 when not initialized, 0 when false, 1 when true. */
554 int rw_unconditionally;
557 #define IFC_DR(DR) ((struct ifc_dr *) (DR)->aux)
558 #define DR_WRITTEN_AT_LEAST_ONCE(DR) (IFC_DR (DR)->written_at_least_once)
559 #define DR_RW_UNCONDITIONALLY(DR) (IFC_DR (DR)->rw_unconditionally)
561 /* Returns true when the memory references of STMT are read or written
562 unconditionally. In other words, this function returns true when
563 for every data reference A in STMT there exist other accesses to
564 a data reference with the same base with predicates that add up (OR-up) to
565 the true predicate: this ensures that the data reference A is touched
566 (read or written) on every iteration of the if-converted loop. */
568 static bool
569 memrefs_read_or_written_unconditionally (gimple stmt,
570 vec<data_reference_p> drs)
572 int i, j;
573 data_reference_p a, b;
574 tree ca = bb_predicate (gimple_bb (stmt));
576 for (i = 0; drs.iterate (i, &a); i++)
577 if (DR_STMT (a) == stmt)
579 bool found = false;
580 int x = DR_RW_UNCONDITIONALLY (a);
582 if (x == 0)
583 return false;
585 if (x == 1)
586 continue;
588 for (j = 0; drs.iterate (j, &b); j++)
590 tree ref_base_a = DR_REF (a);
591 tree ref_base_b = DR_REF (b);
593 if (DR_STMT (b) == stmt)
594 continue;
596 while (TREE_CODE (ref_base_a) == COMPONENT_REF
597 || TREE_CODE (ref_base_a) == IMAGPART_EXPR
598 || TREE_CODE (ref_base_a) == REALPART_EXPR)
599 ref_base_a = TREE_OPERAND (ref_base_a, 0);
601 while (TREE_CODE (ref_base_b) == COMPONENT_REF
602 || TREE_CODE (ref_base_b) == IMAGPART_EXPR
603 || TREE_CODE (ref_base_b) == REALPART_EXPR)
604 ref_base_b = TREE_OPERAND (ref_base_b, 0);
606 if (!operand_equal_p (ref_base_a, ref_base_b, 0))
608 tree cb = bb_predicate (gimple_bb (DR_STMT (b)));
610 if (DR_RW_UNCONDITIONALLY (b) == 1
611 || is_true_predicate (cb)
612 || is_true_predicate (ca
613 = fold_or_predicates (EXPR_LOCATION (cb), ca, cb)))
615 DR_RW_UNCONDITIONALLY (a) = 1;
616 DR_RW_UNCONDITIONALLY (b) = 1;
617 found = true;
618 break;
623 if (!found)
625 DR_RW_UNCONDITIONALLY (a) = 0;
626 return false;
630 return true;
633 /* Returns true when the memory references of STMT are unconditionally
634 written. In other words, this function returns true when for every
635 data reference A written in STMT, there exist other writes to the
636 same data reference with predicates that add up (OR-up) to the true
637 predicate: this ensures that the data reference A is written on
638 every iteration of the if-converted loop. */
640 static bool
641 write_memrefs_written_at_least_once (gimple stmt,
642 vec<data_reference_p> drs)
644 int i, j;
645 data_reference_p a, b;
646 tree ca = bb_predicate (gimple_bb (stmt));
648 for (i = 0; drs.iterate (i, &a); i++)
649 if (DR_STMT (a) == stmt
650 && DR_IS_WRITE (a))
652 bool found = false;
653 int x = DR_WRITTEN_AT_LEAST_ONCE (a);
655 if (x == 0)
656 return false;
658 if (x == 1)
659 continue;
661 for (j = 0; drs.iterate (j, &b); j++)
662 if (DR_STMT (b) != stmt
663 && DR_IS_WRITE (b)
664 && same_data_refs_base_objects (a, b))
666 tree cb = bb_predicate (gimple_bb (DR_STMT (b)));
668 if (DR_WRITTEN_AT_LEAST_ONCE (b) == 1
669 || is_true_predicate (cb)
670 || is_true_predicate (ca = fold_or_predicates (EXPR_LOCATION (cb),
671 ca, cb)))
673 DR_WRITTEN_AT_LEAST_ONCE (a) = 1;
674 DR_WRITTEN_AT_LEAST_ONCE (b) = 1;
675 found = true;
676 break;
680 if (!found)
682 DR_WRITTEN_AT_LEAST_ONCE (a) = 0;
683 return false;
687 return true;
690 /* Return true when the memory references of STMT won't trap in the
691 if-converted code. There are two things that we have to check for:
693 - writes to memory occur to writable memory: if-conversion of
694 memory writes transforms the conditional memory writes into
695 unconditional writes, i.e. "if (cond) A[i] = foo" is transformed
696 into "A[i] = cond ? foo : A[i]", and as the write to memory may not
697 be executed at all in the original code, it may be a readonly
698 memory. To check that A is not const-qualified, we check that
699 there exists at least an unconditional write to A in the current
700 function.
702 - reads or writes to memory are valid memory accesses for every
703 iteration. To check that the memory accesses are correctly formed
704 and that we are allowed to read and write in these locations, we
705 check that the memory accesses to be if-converted occur at every
706 iteration unconditionally. */
708 static bool
709 ifcvt_memrefs_wont_trap (gimple stmt, vec<data_reference_p> refs)
711 return write_memrefs_written_at_least_once (stmt, refs)
712 && memrefs_read_or_written_unconditionally (stmt, refs);
715 /* Wrapper around gimple_could_trap_p refined for the needs of the
716 if-conversion. Try to prove that the memory accesses of STMT could
717 not trap in the innermost loop containing STMT. */
719 static bool
720 ifcvt_could_trap_p (gimple stmt, vec<data_reference_p> refs)
722 if (gimple_vuse (stmt)
723 && !gimple_could_trap_p_1 (stmt, false, false)
724 && ifcvt_memrefs_wont_trap (stmt, refs))
725 return false;
727 return gimple_could_trap_p (stmt);
730 /* Return true if STMT could be converted into a masked load or store
731 (conditional load or store based on a mask computed from bb predicate). */
733 static bool
734 ifcvt_can_use_mask_load_store (gimple stmt)
736 tree lhs, ref;
737 machine_mode mode;
738 basic_block bb = gimple_bb (stmt);
739 bool is_load;
741 if (!(flag_tree_loop_vectorize || bb->loop_father->force_vectorize)
742 || bb->loop_father->dont_vectorize
743 || !gimple_assign_single_p (stmt)
744 || gimple_has_volatile_ops (stmt))
745 return false;
747 /* Check whether this is a load or store. */
748 lhs = gimple_assign_lhs (stmt);
749 if (gimple_store_p (stmt))
751 if (!is_gimple_val (gimple_assign_rhs1 (stmt)))
752 return false;
753 is_load = false;
754 ref = lhs;
756 else if (gimple_assign_load_p (stmt))
758 is_load = true;
759 ref = gimple_assign_rhs1 (stmt);
761 else
762 return false;
764 if (may_be_nonaddressable_p (ref))
765 return false;
767 /* Mask should be integer mode of the same size as the load/store
768 mode. */
769 mode = TYPE_MODE (TREE_TYPE (lhs));
770 if (int_mode_for_mode (mode) == BLKmode
771 || VECTOR_MODE_P (mode))
772 return false;
774 if (can_vec_mask_load_store_p (mode, is_load))
775 return true;
777 return false;
780 /* Return true when STMT is if-convertible.
782 GIMPLE_ASSIGN statement is not if-convertible if,
783 - it is not movable,
784 - it could trap,
785 - LHS is not var decl. */
787 static bool
788 if_convertible_gimple_assign_stmt_p (gimple stmt,
789 vec<data_reference_p> refs,
790 bool *any_mask_load_store)
792 tree lhs = gimple_assign_lhs (stmt);
793 basic_block bb;
795 if (dump_file && (dump_flags & TDF_DETAILS))
797 fprintf (dump_file, "-------------------------\n");
798 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
801 if (!is_gimple_reg_type (TREE_TYPE (lhs)))
802 return false;
804 /* Some of these constrains might be too conservative. */
805 if (stmt_ends_bb_p (stmt)
806 || gimple_has_volatile_ops (stmt)
807 || (TREE_CODE (lhs) == SSA_NAME
808 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
809 || gimple_has_side_effects (stmt))
811 if (dump_file && (dump_flags & TDF_DETAILS))
812 fprintf (dump_file, "stmt not suitable for ifcvt\n");
813 return false;
816 /* tree-into-ssa.c uses GF_PLF_1, so avoid it, because
817 in between if_convertible_loop_p and combine_blocks
818 we can perform loop versioning. */
819 gimple_set_plf (stmt, GF_PLF_2, false);
821 if (flag_tree_loop_if_convert_stores)
823 if (ifcvt_could_trap_p (stmt, refs))
825 if (ifcvt_can_use_mask_load_store (stmt))
827 gimple_set_plf (stmt, GF_PLF_2, true);
828 *any_mask_load_store = true;
829 return true;
831 if (dump_file && (dump_flags & TDF_DETAILS))
832 fprintf (dump_file, "tree could trap...\n");
833 return false;
835 return true;
838 if (gimple_assign_rhs_could_trap_p (stmt))
840 if (ifcvt_can_use_mask_load_store (stmt))
842 gimple_set_plf (stmt, GF_PLF_2, true);
843 *any_mask_load_store = true;
844 return true;
846 if (dump_file && (dump_flags & TDF_DETAILS))
847 fprintf (dump_file, "tree could trap...\n");
848 return false;
851 bb = gimple_bb (stmt);
853 if (TREE_CODE (lhs) != SSA_NAME
854 && bb != bb->loop_father->header
855 && !bb_with_exit_edge_p (bb->loop_father, bb))
857 if (ifcvt_can_use_mask_load_store (stmt))
859 gimple_set_plf (stmt, GF_PLF_2, true);
860 *any_mask_load_store = true;
861 return true;
863 if (dump_file && (dump_flags & TDF_DETAILS))
865 fprintf (dump_file, "LHS is not var\n");
866 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
868 return false;
871 return true;
874 /* Return true when STMT is if-convertible.
876 A statement is if-convertible if:
877 - it is an if-convertible GIMPLE_ASSIGN,
878 - it is a GIMPLE_LABEL or a GIMPLE_COND. */
880 static bool
881 if_convertible_stmt_p (gimple stmt, vec<data_reference_p> refs,
882 bool *any_mask_load_store)
884 switch (gimple_code (stmt))
886 case GIMPLE_LABEL:
887 case GIMPLE_DEBUG:
888 case GIMPLE_COND:
889 return true;
891 case GIMPLE_ASSIGN:
892 return if_convertible_gimple_assign_stmt_p (stmt, refs,
893 any_mask_load_store);
895 case GIMPLE_CALL:
897 tree fndecl = gimple_call_fndecl (stmt);
898 if (fndecl)
900 int flags = gimple_call_flags (stmt);
901 if ((flags & ECF_CONST)
902 && !(flags & ECF_LOOPING_CONST_OR_PURE)
903 /* We can only vectorize some builtins at the moment,
904 so restrict if-conversion to those. */
905 && DECL_BUILT_IN (fndecl))
906 return true;
908 return false;
911 default:
912 /* Don't know what to do with 'em so don't do anything. */
913 if (dump_file && (dump_flags & TDF_DETAILS))
915 fprintf (dump_file, "don't know what to do\n");
916 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
918 return false;
919 break;
922 return true;
925 /* Return true when BB is if-convertible. This routine does not check
926 basic block's statements and phis.
928 A basic block is not if-convertible if:
929 - it is non-empty and it is after the exit block (in BFS order),
930 - it is after the exit block but before the latch,
931 - its edges are not normal.
933 EXIT_BB is the basic block containing the exit of the LOOP. BB is
934 inside LOOP. */
936 static bool
937 if_convertible_bb_p (struct loop *loop, basic_block bb, basic_block exit_bb)
939 edge e;
940 edge_iterator ei;
942 if (dump_file && (dump_flags & TDF_DETAILS))
943 fprintf (dump_file, "----------[%d]-------------\n", bb->index);
945 if (EDGE_COUNT (bb->preds) > 2
946 || EDGE_COUNT (bb->succs) > 2)
947 return false;
949 if (exit_bb)
951 if (bb != loop->latch)
953 if (dump_file && (dump_flags & TDF_DETAILS))
954 fprintf (dump_file, "basic block after exit bb but before latch\n");
955 return false;
957 else if (!empty_block_p (bb))
959 if (dump_file && (dump_flags & TDF_DETAILS))
960 fprintf (dump_file, "non empty basic block after exit bb\n");
961 return false;
963 else if (bb == loop->latch
964 && bb != exit_bb
965 && !dominated_by_p (CDI_DOMINATORS, bb, exit_bb))
967 if (dump_file && (dump_flags & TDF_DETAILS))
968 fprintf (dump_file, "latch is not dominated by exit_block\n");
969 return false;
973 /* Be less adventurous and handle only normal edges. */
974 FOR_EACH_EDGE (e, ei, bb->succs)
975 if (e->flags & (EDGE_EH | EDGE_ABNORMAL | EDGE_IRREDUCIBLE_LOOP))
977 if (dump_file && (dump_flags & TDF_DETAILS))
978 fprintf (dump_file, "Difficult to handle edges\n");
979 return false;
982 /* At least one incoming edge has to be non-critical as otherwise edge
983 predicates are not equal to basic-block predicates of the edge
984 source. */
985 if (EDGE_COUNT (bb->preds) > 1
986 && bb != loop->header)
988 bool found = false;
989 FOR_EACH_EDGE (e, ei, bb->preds)
990 if (EDGE_COUNT (e->src->succs) == 1)
991 found = true;
992 if (!found)
994 if (dump_file && (dump_flags & TDF_DETAILS))
995 fprintf (dump_file, "only critical predecessors\n");
996 return false;
1000 return true;
1003 /* Return true when all predecessor blocks of BB are visited. The
1004 VISITED bitmap keeps track of the visited blocks. */
1006 static bool
1007 pred_blocks_visited_p (basic_block bb, bitmap *visited)
1009 edge e;
1010 edge_iterator ei;
1011 FOR_EACH_EDGE (e, ei, bb->preds)
1012 if (!bitmap_bit_p (*visited, e->src->index))
1013 return false;
1015 return true;
1018 /* Get body of a LOOP in suitable order for if-conversion. It is
1019 caller's responsibility to deallocate basic block list.
1020 If-conversion suitable order is, breadth first sort (BFS) order
1021 with an additional constraint: select a block only if all its
1022 predecessors are already selected. */
1024 static basic_block *
1025 get_loop_body_in_if_conv_order (const struct loop *loop)
1027 basic_block *blocks, *blocks_in_bfs_order;
1028 basic_block bb;
1029 bitmap visited;
1030 unsigned int index = 0;
1031 unsigned int visited_count = 0;
1033 gcc_assert (loop->num_nodes);
1034 gcc_assert (loop->latch != EXIT_BLOCK_PTR_FOR_FN (cfun));
1036 blocks = XCNEWVEC (basic_block, loop->num_nodes);
1037 visited = BITMAP_ALLOC (NULL);
1039 blocks_in_bfs_order = get_loop_body_in_bfs_order (loop);
1041 index = 0;
1042 while (index < loop->num_nodes)
1044 bb = blocks_in_bfs_order [index];
1046 if (bb->flags & BB_IRREDUCIBLE_LOOP)
1048 free (blocks_in_bfs_order);
1049 BITMAP_FREE (visited);
1050 free (blocks);
1051 return NULL;
1054 if (!bitmap_bit_p (visited, bb->index))
1056 if (pred_blocks_visited_p (bb, &visited)
1057 || bb == loop->header)
1059 /* This block is now visited. */
1060 bitmap_set_bit (visited, bb->index);
1061 blocks[visited_count++] = bb;
1065 index++;
1067 if (index == loop->num_nodes
1068 && visited_count != loop->num_nodes)
1069 /* Not done yet. */
1070 index = 0;
1072 free (blocks_in_bfs_order);
1073 BITMAP_FREE (visited);
1074 return blocks;
1077 /* Returns true when the analysis of the predicates for all the basic
1078 blocks in LOOP succeeded.
1080 predicate_bbs first allocates the predicates of the basic blocks.
1081 These fields are then initialized with the tree expressions
1082 representing the predicates under which a basic block is executed
1083 in the LOOP. As the loop->header is executed at each iteration, it
1084 has the "true" predicate. Other statements executed under a
1085 condition are predicated with that condition, for example
1087 | if (x)
1088 | S1;
1089 | else
1090 | S2;
1092 S1 will be predicated with "x", and
1093 S2 will be predicated with "!x". */
1095 static void
1096 predicate_bbs (loop_p loop)
1098 unsigned int i;
1100 for (i = 0; i < loop->num_nodes; i++)
1101 init_bb_predicate (ifc_bbs[i]);
1103 for (i = 0; i < loop->num_nodes; i++)
1105 basic_block bb = ifc_bbs[i];
1106 tree cond;
1107 gimple stmt;
1109 /* The loop latch is always executed and has no extra conditions
1110 to be processed: skip it. */
1111 if (bb == loop->latch)
1113 reset_bb_predicate (loop->latch);
1114 continue;
1117 cond = bb_predicate (bb);
1118 stmt = last_stmt (bb);
1119 if (stmt && gimple_code (stmt) == GIMPLE_COND)
1121 tree c2;
1122 edge true_edge, false_edge;
1123 location_t loc = gimple_location (stmt);
1124 tree c = fold_build2_loc (loc, gimple_cond_code (stmt),
1125 boolean_type_node,
1126 gimple_cond_lhs (stmt),
1127 gimple_cond_rhs (stmt));
1129 /* Add new condition into destination's predicate list. */
1130 extract_true_false_edges_from_block (gimple_bb (stmt),
1131 &true_edge, &false_edge);
1133 /* If C is true, then TRUE_EDGE is taken. */
1134 add_to_dst_predicate_list (loop, true_edge, unshare_expr (cond),
1135 unshare_expr (c));
1137 /* If C is false, then FALSE_EDGE is taken. */
1138 c2 = build1_loc (loc, TRUTH_NOT_EXPR, boolean_type_node,
1139 unshare_expr (c));
1140 add_to_dst_predicate_list (loop, false_edge,
1141 unshare_expr (cond), c2);
1143 cond = NULL_TREE;
1146 /* If current bb has only one successor, then consider it as an
1147 unconditional goto. */
1148 if (single_succ_p (bb))
1150 basic_block bb_n = single_succ (bb);
1152 /* The successor bb inherits the predicate of its
1153 predecessor. If there is no predicate in the predecessor
1154 bb, then consider the successor bb as always executed. */
1155 if (cond == NULL_TREE)
1156 cond = boolean_true_node;
1158 add_to_predicate_list (loop, bb_n, cond);
1162 /* The loop header is always executed. */
1163 reset_bb_predicate (loop->header);
1164 gcc_assert (bb_predicate_gimplified_stmts (loop->header) == NULL
1165 && bb_predicate_gimplified_stmts (loop->latch) == NULL);
1168 /* Return true when LOOP is if-convertible. This is a helper function
1169 for if_convertible_loop_p. REFS and DDRS are initialized and freed
1170 in if_convertible_loop_p. */
1172 static bool
1173 if_convertible_loop_p_1 (struct loop *loop,
1174 vec<loop_p> *loop_nest,
1175 vec<data_reference_p> *refs,
1176 vec<ddr_p> *ddrs, bool *any_mask_load_store)
1178 bool res;
1179 unsigned int i;
1180 basic_block exit_bb = NULL;
1182 /* Don't if-convert the loop when the data dependences cannot be
1183 computed: the loop won't be vectorized in that case. */
1184 res = compute_data_dependences_for_loop (loop, true, loop_nest, refs, ddrs);
1185 if (!res)
1186 return false;
1188 calculate_dominance_info (CDI_DOMINATORS);
1190 /* Allow statements that can be handled during if-conversion. */
1191 ifc_bbs = get_loop_body_in_if_conv_order (loop);
1192 if (!ifc_bbs)
1194 if (dump_file && (dump_flags & TDF_DETAILS))
1195 fprintf (dump_file, "Irreducible loop\n");
1196 return false;
1199 for (i = 0; i < loop->num_nodes; i++)
1201 basic_block bb = ifc_bbs[i];
1203 if (!if_convertible_bb_p (loop, bb, exit_bb))
1204 return false;
1206 if (bb_with_exit_edge_p (loop, bb))
1207 exit_bb = bb;
1210 for (i = 0; i < loop->num_nodes; i++)
1212 basic_block bb = ifc_bbs[i];
1213 gimple_stmt_iterator gsi;
1215 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1216 switch (gimple_code (gsi_stmt (gsi)))
1218 case GIMPLE_LABEL:
1219 case GIMPLE_ASSIGN:
1220 case GIMPLE_CALL:
1221 case GIMPLE_DEBUG:
1222 case GIMPLE_COND:
1223 break;
1224 default:
1225 return false;
1229 if (flag_tree_loop_if_convert_stores)
1231 data_reference_p dr;
1233 for (i = 0; refs->iterate (i, &dr); i++)
1235 dr->aux = XNEW (struct ifc_dr);
1236 DR_WRITTEN_AT_LEAST_ONCE (dr) = -1;
1237 DR_RW_UNCONDITIONALLY (dr) = -1;
1239 predicate_bbs (loop);
1242 for (i = 0; i < loop->num_nodes; i++)
1244 basic_block bb = ifc_bbs[i];
1245 gimple_stmt_iterator itr;
1247 /* Check the if-convertibility of statements in predicated BBs. */
1248 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1249 for (itr = gsi_start_bb (bb); !gsi_end_p (itr); gsi_next (&itr))
1250 if (!if_convertible_stmt_p (gsi_stmt (itr), *refs,
1251 any_mask_load_store))
1252 return false;
1255 if (flag_tree_loop_if_convert_stores)
1256 for (i = 0; i < loop->num_nodes; i++)
1257 free_bb_predicate (ifc_bbs[i]);
1259 /* Checking PHIs needs to be done after stmts, as the fact whether there
1260 are any masked loads or stores affects the tests. */
1261 for (i = 0; i < loop->num_nodes; i++)
1263 basic_block bb = ifc_bbs[i];
1264 gimple_stmt_iterator itr;
1266 for (itr = gsi_start_phis (bb); !gsi_end_p (itr); gsi_next (&itr))
1267 if (!if_convertible_phi_p (loop, bb, gsi_stmt (itr),
1268 *any_mask_load_store))
1269 return false;
1272 if (dump_file)
1273 fprintf (dump_file, "Applying if-conversion\n");
1275 return true;
1278 /* Return true when LOOP is if-convertible.
1279 LOOP is if-convertible if:
1280 - it is innermost,
1281 - it has two or more basic blocks,
1282 - it has only one exit,
1283 - loop header is not the exit edge,
1284 - if its basic blocks and phi nodes are if convertible. */
1286 static bool
1287 if_convertible_loop_p (struct loop *loop, bool *any_mask_load_store)
1289 edge e;
1290 edge_iterator ei;
1291 bool res = false;
1292 vec<data_reference_p> refs;
1293 vec<ddr_p> ddrs;
1295 /* Handle only innermost loop. */
1296 if (!loop || loop->inner)
1298 if (dump_file && (dump_flags & TDF_DETAILS))
1299 fprintf (dump_file, "not innermost loop\n");
1300 return false;
1303 /* If only one block, no need for if-conversion. */
1304 if (loop->num_nodes <= 2)
1306 if (dump_file && (dump_flags & TDF_DETAILS))
1307 fprintf (dump_file, "less than 2 basic blocks\n");
1308 return false;
1311 /* More than one loop exit is too much to handle. */
1312 if (!single_exit (loop))
1314 if (dump_file && (dump_flags & TDF_DETAILS))
1315 fprintf (dump_file, "multiple exits\n");
1316 return false;
1319 /* If one of the loop header's edge is an exit edge then do not
1320 apply if-conversion. */
1321 FOR_EACH_EDGE (e, ei, loop->header->succs)
1322 if (loop_exit_edge_p (loop, e))
1323 return false;
1325 refs.create (5);
1326 ddrs.create (25);
1327 auto_vec<loop_p, 3> loop_nest;
1328 res = if_convertible_loop_p_1 (loop, &loop_nest, &refs, &ddrs,
1329 any_mask_load_store);
1331 if (flag_tree_loop_if_convert_stores)
1333 data_reference_p dr;
1334 unsigned int i;
1336 for (i = 0; refs.iterate (i, &dr); i++)
1337 free (dr->aux);
1340 free_data_refs (refs);
1341 free_dependence_relations (ddrs);
1342 return res;
1345 /* Basic block BB has two predecessors. Using predecessor's bb
1346 predicate, set an appropriate condition COND for the PHI node
1347 replacement. Return the true block whose phi arguments are
1348 selected when cond is true. LOOP is the loop containing the
1349 if-converted region, GSI is the place to insert the code for the
1350 if-conversion. */
1352 static basic_block
1353 find_phi_replacement_condition (basic_block bb, tree *cond,
1354 gimple_stmt_iterator *gsi)
1356 edge first_edge, second_edge;
1357 tree tmp_cond;
1359 gcc_assert (EDGE_COUNT (bb->preds) == 2);
1360 first_edge = EDGE_PRED (bb, 0);
1361 second_edge = EDGE_PRED (bb, 1);
1363 /* Prefer an edge with a not negated predicate.
1364 ??? That's a very weak cost model. */
1365 tmp_cond = bb_predicate (first_edge->src);
1366 gcc_assert (tmp_cond);
1367 if (TREE_CODE (tmp_cond) == TRUTH_NOT_EXPR)
1369 edge tmp_edge;
1371 tmp_edge = first_edge;
1372 first_edge = second_edge;
1373 second_edge = tmp_edge;
1376 /* Check if the edge we take the condition from is not critical.
1377 We know that at least one non-critical edge exists. */
1378 if (EDGE_COUNT (first_edge->src->succs) > 1)
1380 *cond = bb_predicate (second_edge->src);
1382 if (TREE_CODE (*cond) == TRUTH_NOT_EXPR)
1383 *cond = TREE_OPERAND (*cond, 0);
1384 else
1385 /* Select non loop header bb. */
1386 first_edge = second_edge;
1388 else
1389 *cond = bb_predicate (first_edge->src);
1391 /* Gimplify the condition to a valid cond-expr conditonal operand. */
1392 *cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (*cond),
1393 is_gimple_condexpr, NULL_TREE,
1394 true, GSI_SAME_STMT);
1396 return first_edge->src;
1399 /* Returns true if def-stmt for phi argument ARG is simple increment/decrement
1400 which is in predicated basic block.
1401 In fact, the following PHI pattern is searching:
1402 loop-header:
1403 reduc_1 = PHI <..., reduc_2>
1405 if (...)
1406 reduc_3 = ...
1407 reduc_2 = PHI <reduc_1, reduc_3>
1409 REDUC, OP0 and OP1 contain reduction stmt and its operands. */
1411 static bool
1412 is_cond_scalar_reduction (gimple phi, gimple *reduc,
1413 tree *op0, tree *op1)
1415 tree lhs, r_op1, r_op2;
1416 tree arg_0, arg_1;
1417 gimple stmt;
1418 gimple header_phi = NULL;
1419 enum tree_code reduction_op;
1420 basic_block bb = gimple_bb (phi);
1421 struct loop *loop = bb->loop_father;
1422 edge latch_e = loop_latch_edge (loop);
1423 imm_use_iterator imm_iter;
1424 use_operand_p use_p;
1426 arg_0 = PHI_ARG_DEF (phi, 0);
1427 arg_1 = PHI_ARG_DEF (phi, 1);
1428 if (TREE_CODE (arg_0) != SSA_NAME || TREE_CODE (arg_1) != SSA_NAME)
1429 return false;
1431 if (gimple_code (SSA_NAME_DEF_STMT (arg_0)) == GIMPLE_PHI)
1433 lhs = arg_1;
1434 header_phi = SSA_NAME_DEF_STMT (arg_0);
1435 stmt = SSA_NAME_DEF_STMT (arg_1);
1437 else if (gimple_code (SSA_NAME_DEF_STMT (arg_1)) == GIMPLE_PHI)
1439 lhs = arg_0;
1440 header_phi = SSA_NAME_DEF_STMT (arg_1);
1441 stmt = SSA_NAME_DEF_STMT (arg_0);
1443 else
1444 return false;
1445 if (gimple_bb (header_phi) != loop->header)
1446 return false;
1448 if (PHI_ARG_DEF_FROM_EDGE (header_phi, latch_e) != PHI_RESULT (phi))
1449 return false;
1451 if (gimple_code (stmt) != GIMPLE_ASSIGN
1452 || gimple_has_volatile_ops (stmt))
1453 return false;
1455 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
1456 return false;
1458 if (!is_predicated (gimple_bb (stmt)))
1459 return false;
1461 /* Check that stmt-block is predecessor of phi-block. */
1462 if (EDGE_PRED (bb, 0)->src != gimple_bb (stmt)
1463 && EDGE_PRED (bb, 1)->src != gimple_bb (stmt))
1464 return false;
1466 if (!has_single_use (lhs))
1467 return false;
1469 reduction_op = gimple_assign_rhs_code (stmt);
1470 if (reduction_op != PLUS_EXPR && reduction_op != MINUS_EXPR)
1471 return false;
1472 r_op1 = gimple_assign_rhs1 (stmt);
1473 r_op2 = gimple_assign_rhs2 (stmt);
1475 /* Make R_OP1 to hold reduction variable. */
1476 if (r_op2 == PHI_RESULT (header_phi)
1477 && reduction_op == PLUS_EXPR)
1479 tree tmp = r_op1;
1480 r_op1 = r_op2;
1481 r_op2 = tmp;
1483 else if (r_op1 != PHI_RESULT (header_phi))
1484 return false;
1486 /* Check that R_OP1 is used in reduction stmt or in PHI only. */
1487 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_op1)
1489 gimple use_stmt = USE_STMT (use_p);
1490 if (is_gimple_debug (use_stmt))
1491 continue;
1492 if (use_stmt == stmt)
1493 continue;
1494 if (gimple_code (use_stmt) != GIMPLE_PHI)
1495 return false;
1498 *op0 = r_op1; *op1 = r_op2;
1499 *reduc = stmt;
1500 return true;
1503 /* Converts conditional scalar reduction into unconditional form, e.g.
1504 bb_4
1505 if (_5 != 0) goto bb_5 else goto bb_6
1506 end_bb_4
1507 bb_5
1508 res_6 = res_13 + 1;
1509 end_bb_5
1510 bb_6
1511 # res_2 = PHI <res_13(4), res_6(5)>
1512 end_bb_6
1514 will be converted into sequence
1515 _ifc__1 = _5 != 0 ? 1 : 0;
1516 res_2 = res_13 + _ifc__1;
1517 Argument SWAP tells that arguments of conditional expression should be
1518 swapped.
1519 Returns rhs of resulting PHI assignment. */
1521 static tree
1522 convert_scalar_cond_reduction (gimple reduc, gimple_stmt_iterator *gsi,
1523 tree cond, tree op0, tree op1, bool swap)
1525 gimple_stmt_iterator stmt_it;
1526 gimple new_assign;
1527 tree rhs;
1528 tree rhs1 = gimple_assign_rhs1 (reduc);
1529 tree tmp = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, "_ifc_");
1530 tree c;
1531 tree zero = build_zero_cst (TREE_TYPE (rhs1));
1533 if (dump_file && (dump_flags & TDF_DETAILS))
1535 fprintf (dump_file, "Found cond scalar reduction.\n");
1536 print_gimple_stmt (dump_file, reduc, 0, TDF_SLIM);
1539 /* Build cond expression using COND and constant operand
1540 of reduction rhs. */
1541 c = fold_build_cond_expr (TREE_TYPE (rhs1),
1542 unshare_expr (cond),
1543 swap ? zero : op1,
1544 swap ? op1 : zero);
1546 /* Create assignment stmt and insert it at GSI. */
1547 new_assign = gimple_build_assign (tmp, c);
1548 gsi_insert_before (gsi, new_assign, GSI_SAME_STMT);
1549 /* Build rhs for unconditional increment/decrement. */
1550 rhs = fold_build2 (gimple_assign_rhs_code (reduc),
1551 TREE_TYPE (rhs1), op0, tmp);
1553 /* Delete original reduction stmt. */
1554 stmt_it = gsi_for_stmt (reduc);
1555 gsi_remove (&stmt_it, true);
1556 release_defs (reduc);
1557 return rhs;
1560 /* Replace a scalar PHI node with a COND_EXPR using COND as condition.
1561 This routine does not handle PHI nodes with more than two
1562 arguments.
1564 For example,
1565 S1: A = PHI <x1(1), x2(5)>
1566 is converted into,
1567 S2: A = cond ? x1 : x2;
1569 The generated code is inserted at GSI that points to the top of
1570 basic block's statement list. When COND is true, phi arg from
1571 TRUE_BB is selected. */
1573 static void
1574 predicate_scalar_phi (gimple phi, tree cond,
1575 basic_block true_bb,
1576 gimple_stmt_iterator *gsi)
1578 gimple new_stmt;
1579 basic_block bb;
1580 tree rhs, res, arg, scev;
1582 gcc_assert (gimple_code (phi) == GIMPLE_PHI
1583 && gimple_phi_num_args (phi) == 2);
1585 res = gimple_phi_result (phi);
1586 /* Do not handle virtual phi nodes. */
1587 if (virtual_operand_p (res))
1588 return;
1590 bb = gimple_bb (phi);
1592 if ((arg = degenerate_phi_result (phi))
1593 || ((scev = analyze_scalar_evolution (gimple_bb (phi)->loop_father,
1594 res))
1595 && !chrec_contains_undetermined (scev)
1596 && scev != res
1597 && (arg = gimple_phi_arg_def (phi, 0))))
1598 rhs = arg;
1599 else
1601 tree arg_0, arg_1;
1602 tree op0, op1;
1603 gimple reduc;
1605 /* Use condition that is not TRUTH_NOT_EXPR in conditional modify expr. */
1606 if (EDGE_PRED (bb, 1)->src == true_bb)
1608 arg_0 = gimple_phi_arg_def (phi, 1);
1609 arg_1 = gimple_phi_arg_def (phi, 0);
1611 else
1613 arg_0 = gimple_phi_arg_def (phi, 0);
1614 arg_1 = gimple_phi_arg_def (phi, 1);
1616 if (is_cond_scalar_reduction (phi, &reduc, &op0, &op1))
1617 /* Convert reduction stmt into vectorizable form. */
1618 rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
1619 true_bb != gimple_bb (reduc));
1620 else
1621 /* Build new RHS using selected condition and arguments. */
1622 rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
1623 arg_0, arg_1);
1626 new_stmt = gimple_build_assign (res, rhs);
1627 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1628 update_stmt (new_stmt);
1630 if (dump_file && (dump_flags & TDF_DETAILS))
1632 fprintf (dump_file, "new phi replacement stmt\n");
1633 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
1637 /* Replaces in LOOP all the scalar phi nodes other than those in the
1638 LOOP->header block with conditional modify expressions. */
1640 static void
1641 predicate_all_scalar_phis (struct loop *loop)
1643 basic_block bb;
1644 unsigned int orig_loop_num_nodes = loop->num_nodes;
1645 unsigned int i;
1647 for (i = 1; i < orig_loop_num_nodes; i++)
1649 gimple phi;
1650 tree cond = NULL_TREE;
1651 gimple_stmt_iterator gsi, phi_gsi;
1652 basic_block true_bb = NULL;
1653 bb = ifc_bbs[i];
1655 if (bb == loop->header)
1656 continue;
1658 phi_gsi = gsi_start_phis (bb);
1659 if (gsi_end_p (phi_gsi))
1660 continue;
1662 /* BB has two predecessors. Using predecessor's aux field, set
1663 appropriate condition for the PHI node replacement. */
1664 gsi = gsi_after_labels (bb);
1665 true_bb = find_phi_replacement_condition (bb, &cond, &gsi);
1667 while (!gsi_end_p (phi_gsi))
1669 phi = gsi_stmt (phi_gsi);
1670 predicate_scalar_phi (phi, cond, true_bb, &gsi);
1671 release_phi_node (phi);
1672 gsi_next (&phi_gsi);
1675 set_phi_nodes (bb, NULL);
1679 /* Insert in each basic block of LOOP the statements produced by the
1680 gimplification of the predicates. */
1682 static void
1683 insert_gimplified_predicates (loop_p loop, bool any_mask_load_store)
1685 unsigned int i;
1687 for (i = 0; i < loop->num_nodes; i++)
1689 basic_block bb = ifc_bbs[i];
1690 gimple_seq stmts;
1692 if (!is_predicated (bb))
1694 /* Do not insert statements for a basic block that is not
1695 predicated. Also make sure that the predicate of the
1696 basic block is set to true. */
1697 reset_bb_predicate (bb);
1698 continue;
1701 stmts = bb_predicate_gimplified_stmts (bb);
1702 if (stmts)
1704 if (flag_tree_loop_if_convert_stores
1705 || any_mask_load_store)
1707 /* Insert the predicate of the BB just after the label,
1708 as the if-conversion of memory writes will use this
1709 predicate. */
1710 gimple_stmt_iterator gsi = gsi_after_labels (bb);
1711 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1713 else
1715 /* Insert the predicate of the BB at the end of the BB
1716 as this would reduce the register pressure: the only
1717 use of this predicate will be in successor BBs. */
1718 gimple_stmt_iterator gsi = gsi_last_bb (bb);
1720 if (gsi_end_p (gsi)
1721 || stmt_ends_bb_p (gsi_stmt (gsi)))
1722 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1723 else
1724 gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
1727 /* Once the sequence is code generated, set it to NULL. */
1728 set_bb_predicate_gimplified_stmts (bb, NULL);
1733 /* Predicate each write to memory in LOOP.
1735 This function transforms control flow constructs containing memory
1736 writes of the form:
1738 | for (i = 0; i < N; i++)
1739 | if (cond)
1740 | A[i] = expr;
1742 into the following form that does not contain control flow:
1744 | for (i = 0; i < N; i++)
1745 | A[i] = cond ? expr : A[i];
1747 The original CFG looks like this:
1749 | bb_0
1750 | i = 0
1751 | end_bb_0
1753 | bb_1
1754 | if (i < N) goto bb_5 else goto bb_2
1755 | end_bb_1
1757 | bb_2
1758 | cond = some_computation;
1759 | if (cond) goto bb_3 else goto bb_4
1760 | end_bb_2
1762 | bb_3
1763 | A[i] = expr;
1764 | goto bb_4
1765 | end_bb_3
1767 | bb_4
1768 | goto bb_1
1769 | end_bb_4
1771 insert_gimplified_predicates inserts the computation of the COND
1772 expression at the beginning of the destination basic block:
1774 | bb_0
1775 | i = 0
1776 | end_bb_0
1778 | bb_1
1779 | if (i < N) goto bb_5 else goto bb_2
1780 | end_bb_1
1782 | bb_2
1783 | cond = some_computation;
1784 | if (cond) goto bb_3 else goto bb_4
1785 | end_bb_2
1787 | bb_3
1788 | cond = some_computation;
1789 | A[i] = expr;
1790 | goto bb_4
1791 | end_bb_3
1793 | bb_4
1794 | goto bb_1
1795 | end_bb_4
1797 predicate_mem_writes is then predicating the memory write as follows:
1799 | bb_0
1800 | i = 0
1801 | end_bb_0
1803 | bb_1
1804 | if (i < N) goto bb_5 else goto bb_2
1805 | end_bb_1
1807 | bb_2
1808 | if (cond) goto bb_3 else goto bb_4
1809 | end_bb_2
1811 | bb_3
1812 | cond = some_computation;
1813 | A[i] = cond ? expr : A[i];
1814 | goto bb_4
1815 | end_bb_3
1817 | bb_4
1818 | goto bb_1
1819 | end_bb_4
1821 and finally combine_blocks removes the basic block boundaries making
1822 the loop vectorizable:
1824 | bb_0
1825 | i = 0
1826 | if (i < N) goto bb_5 else goto bb_1
1827 | end_bb_0
1829 | bb_1
1830 | cond = some_computation;
1831 | A[i] = cond ? expr : A[i];
1832 | if (i < N) goto bb_5 else goto bb_4
1833 | end_bb_1
1835 | bb_4
1836 | goto bb_1
1837 | end_bb_4
1840 static void
1841 predicate_mem_writes (loop_p loop)
1843 unsigned int i, orig_loop_num_nodes = loop->num_nodes;
1845 for (i = 1; i < orig_loop_num_nodes; i++)
1847 gimple_stmt_iterator gsi;
1848 basic_block bb = ifc_bbs[i];
1849 tree cond = bb_predicate (bb);
1850 bool swap;
1851 gimple stmt;
1853 if (is_true_predicate (cond))
1854 continue;
1856 swap = false;
1857 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1859 swap = true;
1860 cond = TREE_OPERAND (cond, 0);
1863 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1864 if (!gimple_assign_single_p (stmt = gsi_stmt (gsi)))
1865 continue;
1866 else if (gimple_plf (stmt, GF_PLF_2))
1868 tree lhs = gimple_assign_lhs (stmt);
1869 tree rhs = gimple_assign_rhs1 (stmt);
1870 tree ref, addr, ptr, masktype, mask_op0, mask_op1, mask;
1871 gimple new_stmt;
1872 int bitsize = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (lhs)));
1874 masktype = build_nonstandard_integer_type (bitsize, 1);
1875 mask_op0 = build_int_cst (masktype, swap ? 0 : -1);
1876 mask_op1 = build_int_cst (masktype, swap ? -1 : 0);
1877 ref = TREE_CODE (lhs) == SSA_NAME ? rhs : lhs;
1878 mark_addressable (ref);
1879 addr = force_gimple_operand_gsi (&gsi, build_fold_addr_expr (ref),
1880 true, NULL_TREE, true,
1881 GSI_SAME_STMT);
1882 cond = force_gimple_operand_gsi_1 (&gsi, unshare_expr (cond),
1883 is_gimple_condexpr, NULL_TREE,
1884 true, GSI_SAME_STMT);
1885 mask = fold_build_cond_expr (masktype, unshare_expr (cond),
1886 mask_op0, mask_op1);
1887 mask = ifc_temp_var (masktype, mask, &gsi);
1888 ptr = build_int_cst (reference_alias_ptr_type (ref), 0);
1889 /* Copy points-to info if possible. */
1890 if (TREE_CODE (addr) == SSA_NAME && !SSA_NAME_PTR_INFO (addr))
1891 copy_ref_info (build2 (MEM_REF, TREE_TYPE (ref), addr, ptr),
1892 ref);
1893 if (TREE_CODE (lhs) == SSA_NAME)
1895 new_stmt
1896 = gimple_build_call_internal (IFN_MASK_LOAD, 3, addr,
1897 ptr, mask);
1898 gimple_call_set_lhs (new_stmt, lhs);
1900 else
1901 new_stmt
1902 = gimple_build_call_internal (IFN_MASK_STORE, 4, addr, ptr,
1903 mask, rhs);
1904 gsi_replace (&gsi, new_stmt, true);
1906 else if (gimple_vdef (stmt))
1908 tree lhs = gimple_assign_lhs (stmt);
1909 tree rhs = gimple_assign_rhs1 (stmt);
1910 tree type = TREE_TYPE (lhs);
1912 lhs = ifc_temp_var (type, unshare_expr (lhs), &gsi);
1913 rhs = ifc_temp_var (type, unshare_expr (rhs), &gsi);
1914 if (swap)
1916 tree tem = lhs;
1917 lhs = rhs;
1918 rhs = tem;
1920 cond = force_gimple_operand_gsi_1 (&gsi, unshare_expr (cond),
1921 is_gimple_condexpr, NULL_TREE,
1922 true, GSI_SAME_STMT);
1923 rhs = fold_build_cond_expr (type, unshare_expr (cond), rhs, lhs);
1924 gimple_assign_set_rhs1 (stmt, ifc_temp_var (type, rhs, &gsi));
1925 update_stmt (stmt);
1930 /* Remove all GIMPLE_CONDs and GIMPLE_LABELs of all the basic blocks
1931 other than the exit and latch of the LOOP. Also resets the
1932 GIMPLE_DEBUG information. */
1934 static void
1935 remove_conditions_and_labels (loop_p loop)
1937 gimple_stmt_iterator gsi;
1938 unsigned int i;
1940 for (i = 0; i < loop->num_nodes; i++)
1942 basic_block bb = ifc_bbs[i];
1944 if (bb_with_exit_edge_p (loop, bb)
1945 || bb == loop->latch)
1946 continue;
1948 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
1949 switch (gimple_code (gsi_stmt (gsi)))
1951 case GIMPLE_COND:
1952 case GIMPLE_LABEL:
1953 gsi_remove (&gsi, true);
1954 break;
1956 case GIMPLE_DEBUG:
1957 /* ??? Should there be conditional GIMPLE_DEBUG_BINDs? */
1958 if (gimple_debug_bind_p (gsi_stmt (gsi)))
1960 gimple_debug_bind_reset_value (gsi_stmt (gsi));
1961 update_stmt (gsi_stmt (gsi));
1963 gsi_next (&gsi);
1964 break;
1966 default:
1967 gsi_next (&gsi);
1972 /* Combine all the basic blocks from LOOP into one or two super basic
1973 blocks. Replace PHI nodes with conditional modify expressions. */
1975 static void
1976 combine_blocks (struct loop *loop, bool any_mask_load_store)
1978 basic_block bb, exit_bb, merge_target_bb;
1979 unsigned int orig_loop_num_nodes = loop->num_nodes;
1980 unsigned int i;
1981 edge e;
1982 edge_iterator ei;
1984 predicate_bbs (loop);
1985 remove_conditions_and_labels (loop);
1986 insert_gimplified_predicates (loop, any_mask_load_store);
1987 predicate_all_scalar_phis (loop);
1989 if (flag_tree_loop_if_convert_stores || any_mask_load_store)
1990 predicate_mem_writes (loop);
1992 /* Merge basic blocks: first remove all the edges in the loop,
1993 except for those from the exit block. */
1994 exit_bb = NULL;
1995 for (i = 0; i < orig_loop_num_nodes; i++)
1997 bb = ifc_bbs[i];
1998 free_bb_predicate (bb);
1999 if (bb_with_exit_edge_p (loop, bb))
2001 gcc_assert (exit_bb == NULL);
2002 exit_bb = bb;
2005 gcc_assert (exit_bb != loop->latch);
2007 for (i = 1; i < orig_loop_num_nodes; i++)
2009 bb = ifc_bbs[i];
2011 for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));)
2013 if (e->src == exit_bb)
2014 ei_next (&ei);
2015 else
2016 remove_edge (e);
2020 if (exit_bb != NULL)
2022 if (exit_bb != loop->header)
2024 /* Connect this node to loop header. */
2025 make_edge (loop->header, exit_bb, EDGE_FALLTHRU);
2026 set_immediate_dominator (CDI_DOMINATORS, exit_bb, loop->header);
2029 /* Redirect non-exit edges to loop->latch. */
2030 FOR_EACH_EDGE (e, ei, exit_bb->succs)
2032 if (!loop_exit_edge_p (loop, e))
2033 redirect_edge_and_branch (e, loop->latch);
2035 set_immediate_dominator (CDI_DOMINATORS, loop->latch, exit_bb);
2037 else
2039 /* If the loop does not have an exit, reconnect header and latch. */
2040 make_edge (loop->header, loop->latch, EDGE_FALLTHRU);
2041 set_immediate_dominator (CDI_DOMINATORS, loop->latch, loop->header);
2044 merge_target_bb = loop->header;
2045 for (i = 1; i < orig_loop_num_nodes; i++)
2047 gimple_stmt_iterator gsi;
2048 gimple_stmt_iterator last;
2050 bb = ifc_bbs[i];
2052 if (bb == exit_bb || bb == loop->latch)
2053 continue;
2055 /* Make stmts member of loop->header. */
2056 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2057 gimple_set_bb (gsi_stmt (gsi), merge_target_bb);
2059 /* Update stmt list. */
2060 last = gsi_last_bb (merge_target_bb);
2061 gsi_insert_seq_after (&last, bb_seq (bb), GSI_NEW_STMT);
2062 set_bb_seq (bb, NULL);
2064 delete_basic_block (bb);
2067 /* If possible, merge loop header to the block with the exit edge.
2068 This reduces the number of basic blocks to two, to please the
2069 vectorizer that handles only loops with two nodes. */
2070 if (exit_bb
2071 && exit_bb != loop->header
2072 && can_merge_blocks_p (loop->header, exit_bb))
2073 merge_blocks (loop->header, exit_bb);
2075 free (ifc_bbs);
2076 ifc_bbs = NULL;
2079 /* Version LOOP before if-converting it, the original loop
2080 will be then if-converted, the new copy of the loop will not,
2081 and the LOOP_VECTORIZED internal call will be guarding which
2082 loop to execute. The vectorizer pass will fold this
2083 internal call into either true or false. */
2085 static bool
2086 version_loop_for_if_conversion (struct loop *loop)
2088 basic_block cond_bb;
2089 tree cond = make_ssa_name (boolean_type_node, NULL);
2090 struct loop *new_loop;
2091 gimple g;
2092 gimple_stmt_iterator gsi;
2094 g = gimple_build_call_internal (IFN_LOOP_VECTORIZED, 2,
2095 build_int_cst (integer_type_node, loop->num),
2096 integer_zero_node);
2097 gimple_call_set_lhs (g, cond);
2099 initialize_original_copy_tables ();
2100 new_loop = loop_version (loop, cond, &cond_bb,
2101 REG_BR_PROB_BASE, REG_BR_PROB_BASE,
2102 REG_BR_PROB_BASE, true);
2103 free_original_copy_tables ();
2104 if (new_loop == NULL)
2105 return false;
2106 new_loop->dont_vectorize = true;
2107 new_loop->force_vectorize = false;
2108 gsi = gsi_last_bb (cond_bb);
2109 gimple_call_set_arg (g, 1, build_int_cst (integer_type_node, new_loop->num));
2110 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2111 update_ssa (TODO_update_ssa);
2112 return true;
2115 /* If-convert LOOP when it is legal. For the moment this pass has no
2116 profitability analysis. Returns non-zero todo flags when something
2117 changed. */
2119 static unsigned int
2120 tree_if_conversion (struct loop *loop)
2122 unsigned int todo = 0;
2123 ifc_bbs = NULL;
2124 bool any_mask_load_store = false;
2126 if (!if_convertible_loop_p (loop, &any_mask_load_store)
2127 || !dbg_cnt (if_conversion_tree))
2128 goto cleanup;
2130 if (any_mask_load_store
2131 && ((!flag_tree_loop_vectorize && !loop->force_vectorize)
2132 || loop->dont_vectorize))
2133 goto cleanup;
2135 if (any_mask_load_store && !version_loop_for_if_conversion (loop))
2136 goto cleanup;
2138 /* Now all statements are if-convertible. Combine all the basic
2139 blocks into one huge basic block doing the if-conversion
2140 on-the-fly. */
2141 combine_blocks (loop, any_mask_load_store);
2143 todo |= TODO_cleanup_cfg;
2144 if (flag_tree_loop_if_convert_stores || any_mask_load_store)
2146 mark_virtual_operands_for_renaming (cfun);
2147 todo |= TODO_update_ssa_only_virtuals;
2150 cleanup:
2151 if (ifc_bbs)
2153 unsigned int i;
2155 for (i = 0; i < loop->num_nodes; i++)
2156 free_bb_predicate (ifc_bbs[i]);
2158 free (ifc_bbs);
2159 ifc_bbs = NULL;
2162 return todo;
2165 /* Tree if-conversion pass management. */
2167 namespace {
2169 const pass_data pass_data_if_conversion =
2171 GIMPLE_PASS, /* type */
2172 "ifcvt", /* name */
2173 OPTGROUP_NONE, /* optinfo_flags */
2174 TV_NONE, /* tv_id */
2175 ( PROP_cfg | PROP_ssa ), /* properties_required */
2176 0, /* properties_provided */
2177 0, /* properties_destroyed */
2178 0, /* todo_flags_start */
2179 0, /* todo_flags_finish */
2182 class pass_if_conversion : public gimple_opt_pass
2184 public:
2185 pass_if_conversion (gcc::context *ctxt)
2186 : gimple_opt_pass (pass_data_if_conversion, ctxt)
2189 /* opt_pass methods: */
2190 virtual bool gate (function *);
2191 virtual unsigned int execute (function *);
2193 }; // class pass_if_conversion
2195 bool
2196 pass_if_conversion::gate (function *fun)
2198 return (((flag_tree_loop_vectorize || fun->has_force_vectorize_loops)
2199 && flag_tree_loop_if_convert != 0)
2200 || flag_tree_loop_if_convert == 1
2201 || flag_tree_loop_if_convert_stores == 1);
2204 unsigned int
2205 pass_if_conversion::execute (function *fun)
2207 struct loop *loop;
2208 unsigned todo = 0;
2210 if (number_of_loops (fun) <= 1)
2211 return 0;
2213 FOR_EACH_LOOP (loop, 0)
2214 if (flag_tree_loop_if_convert == 1
2215 || flag_tree_loop_if_convert_stores == 1
2216 || ((flag_tree_loop_vectorize || loop->force_vectorize)
2217 && !loop->dont_vectorize))
2218 todo |= tree_if_conversion (loop);
2220 #ifdef ENABLE_CHECKING
2222 basic_block bb;
2223 FOR_EACH_BB_FN (bb, fun)
2224 gcc_assert (!bb->aux);
2226 #endif
2228 return todo;
2231 } // anon namespace
2233 gimple_opt_pass *
2234 make_pass_if_conversion (gcc::context *ctxt)
2236 return new pass_if_conversion (ctxt);