* gcc.dg/store-motion-fgcse-sm.c (dg-final): Cleanup
[official-gcc.git] / gcc / tree-if-conv.c
blob7c3954771e1b9bde48240088a1478eff7a97ab29
1 /* If-conversion for vectorizer.
2 Copyright (C) 2004-2014 Free Software Foundation, Inc.
3 Contributed by Devang Patel <dpatel@apple.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass implements a tree level if-conversion of loops. Its
22 initial goal is to help the vectorizer to vectorize loops with
23 conditions.
25 A short description of if-conversion:
27 o Decide if a loop is if-convertible or not.
28 o Walk all loop basic blocks in breadth first order (BFS order).
29 o Remove conditional statements (at the end of basic block)
30 and propagate condition into destination basic blocks'
31 predicate list.
32 o Replace modify expression with conditional modify expression
33 using current basic block's condition.
34 o Merge all basic blocks
35 o Replace phi nodes with conditional modify expr
36 o Merge all basic blocks into header
38 Sample transformation:
40 INPUT
41 -----
43 # i_23 = PHI <0(0), i_18(10)>;
44 <L0>:;
45 j_15 = A[i_23];
46 if (j_15 > 41) goto <L1>; else goto <L17>;
48 <L17>:;
49 goto <bb 3> (<L3>);
51 <L1>:;
53 # iftmp.2_4 = PHI <0(8), 42(2)>;
54 <L3>:;
55 A[i_23] = iftmp.2_4;
56 i_18 = i_23 + 1;
57 if (i_18 <= 15) goto <L19>; else goto <L18>;
59 <L19>:;
60 goto <bb 1> (<L0>);
62 <L18>:;
64 OUTPUT
65 ------
67 # i_23 = PHI <0(0), i_18(10)>;
68 <L0>:;
69 j_15 = A[i_23];
71 <L3>:;
72 iftmp.2_4 = j_15 > 41 ? 42 : 0;
73 A[i_23] = iftmp.2_4;
74 i_18 = i_23 + 1;
75 if (i_18 <= 15) goto <L19>; else goto <L18>;
77 <L19>:;
78 goto <bb 1> (<L0>);
80 <L18>:;
83 #include "config.h"
84 #include "system.h"
85 #include "coretypes.h"
86 #include "tm.h"
87 #include "tree.h"
88 #include "stor-layout.h"
89 #include "flags.h"
90 #include "predict.h"
91 #include "vec.h"
92 #include "hashtab.h"
93 #include "hash-set.h"
94 #include "machmode.h"
95 #include "hard-reg-set.h"
96 #include "input.h"
97 #include "function.h"
98 #include "dominance.h"
99 #include "cfg.h"
100 #include "basic-block.h"
101 #include "gimple-pretty-print.h"
102 #include "tree-ssa-alias.h"
103 #include "internal-fn.h"
104 #include "gimple-fold.h"
105 #include "gimple-expr.h"
106 #include "is-a.h"
107 #include "gimple.h"
108 #include "gimplify.h"
109 #include "gimple-iterator.h"
110 #include "gimplify-me.h"
111 #include "gimple-ssa.h"
112 #include "tree-cfg.h"
113 #include "tree-phinodes.h"
114 #include "ssa-iterators.h"
115 #include "stringpool.h"
116 #include "tree-ssanames.h"
117 #include "tree-into-ssa.h"
118 #include "tree-ssa.h"
119 #include "cfgloop.h"
120 #include "tree-chrec.h"
121 #include "tree-data-ref.h"
122 #include "tree-scalar-evolution.h"
123 #include "tree-ssa-loop-ivopts.h"
124 #include "tree-ssa-address.h"
125 #include "tree-pass.h"
126 #include "dbgcnt.h"
127 #include "expr.h"
128 #include "insn-codes.h"
129 #include "optabs.h"
131 /* List of basic blocks in if-conversion-suitable order. */
132 static basic_block *ifc_bbs;
134 /* Structure used to predicate basic blocks. This is attached to the
135 ->aux field of the BBs in the loop to be if-converted. */
136 typedef struct bb_predicate_s {
138 /* The condition under which this basic block is executed. */
139 tree predicate;
141 /* PREDICATE is gimplified, and the sequence of statements is
142 recorded here, in order to avoid the duplication of computations
143 that occur in previous conditions. See PR44483. */
144 gimple_seq predicate_gimplified_stmts;
145 } *bb_predicate_p;
147 /* Returns true when the basic block BB has a predicate. */
149 static inline bool
150 bb_has_predicate (basic_block bb)
152 return bb->aux != NULL;
155 /* Returns the gimplified predicate for basic block BB. */
157 static inline tree
158 bb_predicate (basic_block bb)
160 return ((bb_predicate_p) bb->aux)->predicate;
163 /* Sets the gimplified predicate COND for basic block BB. */
165 static inline void
166 set_bb_predicate (basic_block bb, tree cond)
168 gcc_assert ((TREE_CODE (cond) == TRUTH_NOT_EXPR
169 && is_gimple_condexpr (TREE_OPERAND (cond, 0)))
170 || is_gimple_condexpr (cond));
171 ((bb_predicate_p) bb->aux)->predicate = cond;
174 /* Returns the sequence of statements of the gimplification of the
175 predicate for basic block BB. */
177 static inline gimple_seq
178 bb_predicate_gimplified_stmts (basic_block bb)
180 return ((bb_predicate_p) bb->aux)->predicate_gimplified_stmts;
183 /* Sets the sequence of statements STMTS of the gimplification of the
184 predicate for basic block BB. */
186 static inline void
187 set_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
189 ((bb_predicate_p) bb->aux)->predicate_gimplified_stmts = stmts;
192 /* Adds the sequence of statements STMTS to the sequence of statements
193 of the predicate for basic block BB. */
195 static inline void
196 add_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
198 gimple_seq_add_seq
199 (&(((bb_predicate_p) bb->aux)->predicate_gimplified_stmts), stmts);
202 /* Initializes to TRUE the predicate of basic block BB. */
204 static inline void
205 init_bb_predicate (basic_block bb)
207 bb->aux = XNEW (struct bb_predicate_s);
208 set_bb_predicate_gimplified_stmts (bb, NULL);
209 set_bb_predicate (bb, boolean_true_node);
212 /* Release the SSA_NAMEs associated with the predicate of basic block BB,
213 but don't actually free it. */
215 static inline void
216 release_bb_predicate (basic_block bb)
218 gimple_seq stmts = bb_predicate_gimplified_stmts (bb);
219 if (stmts)
221 gimple_stmt_iterator i;
223 for (i = gsi_start (stmts); !gsi_end_p (i); gsi_next (&i))
224 free_stmt_operands (cfun, gsi_stmt (i));
225 set_bb_predicate_gimplified_stmts (bb, NULL);
229 /* Free the predicate of basic block BB. */
231 static inline void
232 free_bb_predicate (basic_block bb)
234 if (!bb_has_predicate (bb))
235 return;
237 release_bb_predicate (bb);
238 free (bb->aux);
239 bb->aux = NULL;
242 /* Reinitialize predicate of BB with the true predicate. */
244 static inline void
245 reset_bb_predicate (basic_block bb)
247 if (!bb_has_predicate (bb))
248 init_bb_predicate (bb);
249 else
251 release_bb_predicate (bb);
252 set_bb_predicate (bb, boolean_true_node);
256 /* Returns a new SSA_NAME of type TYPE that is assigned the value of
257 the expression EXPR. Inserts the statement created for this
258 computation before GSI and leaves the iterator GSI at the same
259 statement. */
261 static tree
262 ifc_temp_var (tree type, tree expr, gimple_stmt_iterator *gsi)
264 tree new_name = make_temp_ssa_name (type, NULL, "_ifc_");
265 gimple stmt = gimple_build_assign (new_name, expr);
266 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
267 return new_name;
270 /* Return true when COND is a true predicate. */
272 static inline bool
273 is_true_predicate (tree cond)
275 return (cond == NULL_TREE
276 || cond == boolean_true_node
277 || integer_onep (cond));
280 /* Returns true when BB has a predicate that is not trivial: true or
281 NULL_TREE. */
283 static inline bool
284 is_predicated (basic_block bb)
286 return !is_true_predicate (bb_predicate (bb));
289 /* Parses the predicate COND and returns its comparison code and
290 operands OP0 and OP1. */
292 static enum tree_code
293 parse_predicate (tree cond, tree *op0, tree *op1)
295 gimple s;
297 if (TREE_CODE (cond) == SSA_NAME
298 && is_gimple_assign (s = SSA_NAME_DEF_STMT (cond)))
300 if (TREE_CODE_CLASS (gimple_assign_rhs_code (s)) == tcc_comparison)
302 *op0 = gimple_assign_rhs1 (s);
303 *op1 = gimple_assign_rhs2 (s);
304 return gimple_assign_rhs_code (s);
307 else if (gimple_assign_rhs_code (s) == TRUTH_NOT_EXPR)
309 tree op = gimple_assign_rhs1 (s);
310 tree type = TREE_TYPE (op);
311 enum tree_code code = parse_predicate (op, op0, op1);
313 return code == ERROR_MARK ? ERROR_MARK
314 : invert_tree_comparison (code, HONOR_NANS (TYPE_MODE (type)));
317 return ERROR_MARK;
320 if (TREE_CODE_CLASS (TREE_CODE (cond)) == tcc_comparison)
322 *op0 = TREE_OPERAND (cond, 0);
323 *op1 = TREE_OPERAND (cond, 1);
324 return TREE_CODE (cond);
327 return ERROR_MARK;
330 /* Returns the fold of predicate C1 OR C2 at location LOC. */
332 static tree
333 fold_or_predicates (location_t loc, tree c1, tree c2)
335 tree op1a, op1b, op2a, op2b;
336 enum tree_code code1 = parse_predicate (c1, &op1a, &op1b);
337 enum tree_code code2 = parse_predicate (c2, &op2a, &op2b);
339 if (code1 != ERROR_MARK && code2 != ERROR_MARK)
341 tree t = maybe_fold_or_comparisons (code1, op1a, op1b,
342 code2, op2a, op2b);
343 if (t)
344 return t;
347 return fold_build2_loc (loc, TRUTH_OR_EXPR, boolean_type_node, c1, c2);
350 /* Returns true if N is either a constant or a SSA_NAME. */
352 static bool
353 constant_or_ssa_name (tree n)
355 switch (TREE_CODE (n))
357 case SSA_NAME:
358 case INTEGER_CST:
359 case REAL_CST:
360 case COMPLEX_CST:
361 case VECTOR_CST:
362 return true;
363 default:
364 return false;
368 /* Returns either a COND_EXPR or the folded expression if the folded
369 expression is a MIN_EXPR, a MAX_EXPR, an ABS_EXPR,
370 a constant or a SSA_NAME. */
372 static tree
373 fold_build_cond_expr (tree type, tree cond, tree rhs, tree lhs)
375 tree rhs1, lhs1, cond_expr;
376 cond_expr = fold_ternary (COND_EXPR, type, cond,
377 rhs, lhs);
379 if (cond_expr == NULL_TREE)
380 return build3 (COND_EXPR, type, cond, rhs, lhs);
382 STRIP_USELESS_TYPE_CONVERSION (cond_expr);
384 if (constant_or_ssa_name (cond_expr))
385 return cond_expr;
387 if (TREE_CODE (cond_expr) == ABS_EXPR)
389 rhs1 = TREE_OPERAND (cond_expr, 1);
390 STRIP_USELESS_TYPE_CONVERSION (rhs1);
391 if (constant_or_ssa_name (rhs1))
392 return build1 (ABS_EXPR, type, rhs1);
395 if (TREE_CODE (cond_expr) == MIN_EXPR
396 || TREE_CODE (cond_expr) == MAX_EXPR)
398 lhs1 = TREE_OPERAND (cond_expr, 0);
399 STRIP_USELESS_TYPE_CONVERSION (lhs1);
400 rhs1 = TREE_OPERAND (cond_expr, 1);
401 STRIP_USELESS_TYPE_CONVERSION (rhs1);
402 if (constant_or_ssa_name (rhs1)
403 && constant_or_ssa_name (lhs1))
404 return build2 (TREE_CODE (cond_expr), type, lhs1, rhs1);
406 return build3 (COND_EXPR, type, cond, rhs, lhs);
409 /* Add condition NC to the predicate list of basic block BB. LOOP is
410 the loop to be if-converted. Use predicate of cd-equivalent block
411 for join bb if it exists: we call basic blocks bb1 and bb2
412 cd-equivalent if they are executed under the same condition. */
414 static inline void
415 add_to_predicate_list (struct loop *loop, basic_block bb, tree nc)
417 tree bc, *tp;
418 basic_block dom_bb;
420 if (is_true_predicate (nc))
421 return;
423 /* If dominance tells us this basic block is always executed,
424 don't record any predicates for it. */
425 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
426 return;
428 dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb);
429 /* We use notion of cd equivalence to get simpler predicate for
430 join block, e.g. if join block has 2 predecessors with predicates
431 p1 & p2 and p1 & !p2, we'd like to get p1 for it instead of
432 p1 & p2 | p1 & !p2. */
433 if (dom_bb != loop->header
434 && get_immediate_dominator (CDI_POST_DOMINATORS, dom_bb) == bb)
436 gcc_assert (flow_bb_inside_loop_p (loop, dom_bb));
437 bc = bb_predicate (dom_bb);
438 gcc_assert (!is_true_predicate (bc));
439 set_bb_predicate (bb, bc);
440 if (dump_file && (dump_flags & TDF_DETAILS))
441 fprintf (dump_file, "Use predicate of bb#%d for bb#%d\n",
442 dom_bb->index, bb->index);
443 return;
446 if (!is_predicated (bb))
447 bc = nc;
448 else
450 bc = bb_predicate (bb);
451 bc = fold_or_predicates (EXPR_LOCATION (bc), nc, bc);
452 if (is_true_predicate (bc))
454 reset_bb_predicate (bb);
455 return;
459 /* Allow a TRUTH_NOT_EXPR around the main predicate. */
460 if (TREE_CODE (bc) == TRUTH_NOT_EXPR)
461 tp = &TREE_OPERAND (bc, 0);
462 else
463 tp = &bc;
464 if (!is_gimple_condexpr (*tp))
466 gimple_seq stmts;
467 *tp = force_gimple_operand_1 (*tp, &stmts, is_gimple_condexpr, NULL_TREE);
468 add_bb_predicate_gimplified_stmts (bb, stmts);
470 set_bb_predicate (bb, bc);
473 /* Add the condition COND to the previous condition PREV_COND, and add
474 this to the predicate list of the destination of edge E. LOOP is
475 the loop to be if-converted. */
477 static void
478 add_to_dst_predicate_list (struct loop *loop, edge e,
479 tree prev_cond, tree cond)
481 if (!flow_bb_inside_loop_p (loop, e->dest))
482 return;
484 if (!is_true_predicate (prev_cond))
485 cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
486 prev_cond, cond);
488 add_to_predicate_list (loop, e->dest, cond);
491 /* Return true if one of the successor edges of BB exits LOOP. */
493 static bool
494 bb_with_exit_edge_p (struct loop *loop, basic_block bb)
496 edge e;
497 edge_iterator ei;
499 FOR_EACH_EDGE (e, ei, bb->succs)
500 if (loop_exit_edge_p (loop, e))
501 return true;
503 return false;
506 /* Return true when PHI is if-convertible. PHI is part of loop LOOP
507 and it belongs to basic block BB.
509 PHI is not if-convertible if:
510 - it has more than 2 arguments.
512 When the flag_tree_loop_if_convert_stores is not set, PHI is not
513 if-convertible if:
514 - a virtual PHI is immediately used in another PHI node,
515 - there is a virtual PHI in a BB other than the loop->header. */
517 static bool
518 if_convertible_phi_p (struct loop *loop, basic_block bb, gphi *phi,
519 bool any_mask_load_store)
521 if (dump_file && (dump_flags & TDF_DETAILS))
523 fprintf (dump_file, "-------------------------\n");
524 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
527 if (bb != loop->header && gimple_phi_num_args (phi) != 2)
529 if (dump_file && (dump_flags & TDF_DETAILS))
530 fprintf (dump_file, "More than two phi node args.\n");
531 return false;
534 if (flag_tree_loop_if_convert_stores || any_mask_load_store)
535 return true;
537 /* When the flag_tree_loop_if_convert_stores is not set, check
538 that there are no memory writes in the branches of the loop to be
539 if-converted. */
540 if (virtual_operand_p (gimple_phi_result (phi)))
542 imm_use_iterator imm_iter;
543 use_operand_p use_p;
545 if (bb != loop->header)
547 if (dump_file && (dump_flags & TDF_DETAILS))
548 fprintf (dump_file, "Virtual phi not on loop->header.\n");
549 return false;
552 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_phi_result (phi))
554 if (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI)
556 if (dump_file && (dump_flags & TDF_DETAILS))
557 fprintf (dump_file, "Difficult to handle this virtual phi.\n");
558 return false;
563 return true;
566 /* Records the status of a data reference. This struct is attached to
567 each DR->aux field. */
569 struct ifc_dr {
570 /* -1 when not initialized, 0 when false, 1 when true. */
571 int written_at_least_once;
573 /* -1 when not initialized, 0 when false, 1 when true. */
574 int rw_unconditionally;
577 #define IFC_DR(DR) ((struct ifc_dr *) (DR)->aux)
578 #define DR_WRITTEN_AT_LEAST_ONCE(DR) (IFC_DR (DR)->written_at_least_once)
579 #define DR_RW_UNCONDITIONALLY(DR) (IFC_DR (DR)->rw_unconditionally)
581 /* Returns true when the memory references of STMT are read or written
582 unconditionally. In other words, this function returns true when
583 for every data reference A in STMT there exist other accesses to
584 a data reference with the same base with predicates that add up (OR-up) to
585 the true predicate: this ensures that the data reference A is touched
586 (read or written) on every iteration of the if-converted loop. */
588 static bool
589 memrefs_read_or_written_unconditionally (gimple stmt,
590 vec<data_reference_p> drs)
592 int i, j;
593 data_reference_p a, b;
594 tree ca = bb_predicate (gimple_bb (stmt));
596 for (i = 0; drs.iterate (i, &a); i++)
597 if (DR_STMT (a) == stmt)
599 bool found = false;
600 int x = DR_RW_UNCONDITIONALLY (a);
602 if (x == 0)
603 return false;
605 if (x == 1)
606 continue;
608 for (j = 0; drs.iterate (j, &b); j++)
610 tree ref_base_a = DR_REF (a);
611 tree ref_base_b = DR_REF (b);
613 if (DR_STMT (b) == stmt)
614 continue;
616 while (TREE_CODE (ref_base_a) == COMPONENT_REF
617 || TREE_CODE (ref_base_a) == IMAGPART_EXPR
618 || TREE_CODE (ref_base_a) == REALPART_EXPR)
619 ref_base_a = TREE_OPERAND (ref_base_a, 0);
621 while (TREE_CODE (ref_base_b) == COMPONENT_REF
622 || TREE_CODE (ref_base_b) == IMAGPART_EXPR
623 || TREE_CODE (ref_base_b) == REALPART_EXPR)
624 ref_base_b = TREE_OPERAND (ref_base_b, 0);
626 if (!operand_equal_p (ref_base_a, ref_base_b, 0))
628 tree cb = bb_predicate (gimple_bb (DR_STMT (b)));
630 if (DR_RW_UNCONDITIONALLY (b) == 1
631 || is_true_predicate (cb)
632 || is_true_predicate (ca
633 = fold_or_predicates (EXPR_LOCATION (cb), ca, cb)))
635 DR_RW_UNCONDITIONALLY (a) = 1;
636 DR_RW_UNCONDITIONALLY (b) = 1;
637 found = true;
638 break;
643 if (!found)
645 DR_RW_UNCONDITIONALLY (a) = 0;
646 return false;
650 return true;
653 /* Returns true when the memory references of STMT are unconditionally
654 written. In other words, this function returns true when for every
655 data reference A written in STMT, there exist other writes to the
656 same data reference with predicates that add up (OR-up) to the true
657 predicate: this ensures that the data reference A is written on
658 every iteration of the if-converted loop. */
660 static bool
661 write_memrefs_written_at_least_once (gimple stmt,
662 vec<data_reference_p> drs)
664 int i, j;
665 data_reference_p a, b;
666 tree ca = bb_predicate (gimple_bb (stmt));
668 for (i = 0; drs.iterate (i, &a); i++)
669 if (DR_STMT (a) == stmt
670 && DR_IS_WRITE (a))
672 bool found = false;
673 int x = DR_WRITTEN_AT_LEAST_ONCE (a);
675 if (x == 0)
676 return false;
678 if (x == 1)
679 continue;
681 for (j = 0; drs.iterate (j, &b); j++)
682 if (DR_STMT (b) != stmt
683 && DR_IS_WRITE (b)
684 && same_data_refs_base_objects (a, b))
686 tree cb = bb_predicate (gimple_bb (DR_STMT (b)));
688 if (DR_WRITTEN_AT_LEAST_ONCE (b) == 1
689 || is_true_predicate (cb)
690 || is_true_predicate (ca = fold_or_predicates (EXPR_LOCATION (cb),
691 ca, cb)))
693 DR_WRITTEN_AT_LEAST_ONCE (a) = 1;
694 DR_WRITTEN_AT_LEAST_ONCE (b) = 1;
695 found = true;
696 break;
700 if (!found)
702 DR_WRITTEN_AT_LEAST_ONCE (a) = 0;
703 return false;
707 return true;
710 /* Return true when the memory references of STMT won't trap in the
711 if-converted code. There are two things that we have to check for:
713 - writes to memory occur to writable memory: if-conversion of
714 memory writes transforms the conditional memory writes into
715 unconditional writes, i.e. "if (cond) A[i] = foo" is transformed
716 into "A[i] = cond ? foo : A[i]", and as the write to memory may not
717 be executed at all in the original code, it may be a readonly
718 memory. To check that A is not const-qualified, we check that
719 there exists at least an unconditional write to A in the current
720 function.
722 - reads or writes to memory are valid memory accesses for every
723 iteration. To check that the memory accesses are correctly formed
724 and that we are allowed to read and write in these locations, we
725 check that the memory accesses to be if-converted occur at every
726 iteration unconditionally. */
728 static bool
729 ifcvt_memrefs_wont_trap (gimple stmt, vec<data_reference_p> refs)
731 return write_memrefs_written_at_least_once (stmt, refs)
732 && memrefs_read_or_written_unconditionally (stmt, refs);
735 /* Wrapper around gimple_could_trap_p refined for the needs of the
736 if-conversion. Try to prove that the memory accesses of STMT could
737 not trap in the innermost loop containing STMT. */
739 static bool
740 ifcvt_could_trap_p (gimple stmt, vec<data_reference_p> refs)
742 if (gimple_vuse (stmt)
743 && !gimple_could_trap_p_1 (stmt, false, false)
744 && ifcvt_memrefs_wont_trap (stmt, refs))
745 return false;
747 return gimple_could_trap_p (stmt);
750 /* Return true if STMT could be converted into a masked load or store
751 (conditional load or store based on a mask computed from bb predicate). */
753 static bool
754 ifcvt_can_use_mask_load_store (gimple stmt)
756 tree lhs, ref;
757 machine_mode mode;
758 basic_block bb = gimple_bb (stmt);
759 bool is_load;
761 if (!(flag_tree_loop_vectorize || bb->loop_father->force_vectorize)
762 || bb->loop_father->dont_vectorize
763 || !gimple_assign_single_p (stmt)
764 || gimple_has_volatile_ops (stmt))
765 return false;
767 /* Check whether this is a load or store. */
768 lhs = gimple_assign_lhs (stmt);
769 if (gimple_store_p (stmt))
771 if (!is_gimple_val (gimple_assign_rhs1 (stmt)))
772 return false;
773 is_load = false;
774 ref = lhs;
776 else if (gimple_assign_load_p (stmt))
778 is_load = true;
779 ref = gimple_assign_rhs1 (stmt);
781 else
782 return false;
784 if (may_be_nonaddressable_p (ref))
785 return false;
787 /* Mask should be integer mode of the same size as the load/store
788 mode. */
789 mode = TYPE_MODE (TREE_TYPE (lhs));
790 if (int_mode_for_mode (mode) == BLKmode
791 || VECTOR_MODE_P (mode))
792 return false;
794 if (can_vec_mask_load_store_p (mode, is_load))
795 return true;
797 return false;
800 /* Return true when STMT is if-convertible.
802 GIMPLE_ASSIGN statement is not if-convertible if,
803 - it is not movable,
804 - it could trap,
805 - LHS is not var decl. */
807 static bool
808 if_convertible_gimple_assign_stmt_p (gimple stmt,
809 vec<data_reference_p> refs,
810 bool *any_mask_load_store)
812 tree lhs = gimple_assign_lhs (stmt);
813 basic_block bb;
815 if (dump_file && (dump_flags & TDF_DETAILS))
817 fprintf (dump_file, "-------------------------\n");
818 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
821 if (!is_gimple_reg_type (TREE_TYPE (lhs)))
822 return false;
824 /* Some of these constrains might be too conservative. */
825 if (stmt_ends_bb_p (stmt)
826 || gimple_has_volatile_ops (stmt)
827 || (TREE_CODE (lhs) == SSA_NAME
828 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
829 || gimple_has_side_effects (stmt))
831 if (dump_file && (dump_flags & TDF_DETAILS))
832 fprintf (dump_file, "stmt not suitable for ifcvt\n");
833 return false;
836 /* tree-into-ssa.c uses GF_PLF_1, so avoid it, because
837 in between if_convertible_loop_p and combine_blocks
838 we can perform loop versioning. */
839 gimple_set_plf (stmt, GF_PLF_2, false);
841 if (flag_tree_loop_if_convert_stores)
843 if (ifcvt_could_trap_p (stmt, refs))
845 if (ifcvt_can_use_mask_load_store (stmt))
847 gimple_set_plf (stmt, GF_PLF_2, true);
848 *any_mask_load_store = true;
849 return true;
851 if (dump_file && (dump_flags & TDF_DETAILS))
852 fprintf (dump_file, "tree could trap...\n");
853 return false;
855 return true;
858 if (gimple_assign_rhs_could_trap_p (stmt))
860 if (ifcvt_can_use_mask_load_store (stmt))
862 gimple_set_plf (stmt, GF_PLF_2, true);
863 *any_mask_load_store = true;
864 return true;
866 if (dump_file && (dump_flags & TDF_DETAILS))
867 fprintf (dump_file, "tree could trap...\n");
868 return false;
871 bb = gimple_bb (stmt);
873 if (TREE_CODE (lhs) != SSA_NAME
874 && bb != bb->loop_father->header
875 && !bb_with_exit_edge_p (bb->loop_father, bb))
877 if (ifcvt_can_use_mask_load_store (stmt))
879 gimple_set_plf (stmt, GF_PLF_2, true);
880 *any_mask_load_store = true;
881 return true;
883 if (dump_file && (dump_flags & TDF_DETAILS))
885 fprintf (dump_file, "LHS is not var\n");
886 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
888 return false;
891 return true;
894 /* Return true when STMT is if-convertible.
896 A statement is if-convertible if:
897 - it is an if-convertible GIMPLE_ASSIGN,
898 - it is a GIMPLE_LABEL or a GIMPLE_COND. */
900 static bool
901 if_convertible_stmt_p (gimple stmt, vec<data_reference_p> refs,
902 bool *any_mask_load_store)
904 switch (gimple_code (stmt))
906 case GIMPLE_LABEL:
907 case GIMPLE_DEBUG:
908 case GIMPLE_COND:
909 return true;
911 case GIMPLE_ASSIGN:
912 return if_convertible_gimple_assign_stmt_p (stmt, refs,
913 any_mask_load_store);
915 case GIMPLE_CALL:
917 tree fndecl = gimple_call_fndecl (stmt);
918 if (fndecl)
920 int flags = gimple_call_flags (stmt);
921 if ((flags & ECF_CONST)
922 && !(flags & ECF_LOOPING_CONST_OR_PURE)
923 /* We can only vectorize some builtins at the moment,
924 so restrict if-conversion to those. */
925 && DECL_BUILT_IN (fndecl))
926 return true;
928 return false;
931 default:
932 /* Don't know what to do with 'em so don't do anything. */
933 if (dump_file && (dump_flags & TDF_DETAILS))
935 fprintf (dump_file, "don't know what to do\n");
936 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
938 return false;
939 break;
942 return true;
945 /* Return true when BB is if-convertible. This routine does not check
946 basic block's statements and phis.
948 A basic block is not if-convertible if:
949 - it is non-empty and it is after the exit block (in BFS order),
950 - it is after the exit block but before the latch,
951 - its edges are not normal.
953 EXIT_BB is the basic block containing the exit of the LOOP. BB is
954 inside LOOP. */
956 static bool
957 if_convertible_bb_p (struct loop *loop, basic_block bb, basic_block exit_bb)
959 edge e;
960 edge_iterator ei;
962 if (dump_file && (dump_flags & TDF_DETAILS))
963 fprintf (dump_file, "----------[%d]-------------\n", bb->index);
965 if (EDGE_COUNT (bb->preds) > 2
966 || EDGE_COUNT (bb->succs) > 2)
967 return false;
969 if (exit_bb)
971 if (bb != loop->latch)
973 if (dump_file && (dump_flags & TDF_DETAILS))
974 fprintf (dump_file, "basic block after exit bb but before latch\n");
975 return false;
977 else if (!empty_block_p (bb))
979 if (dump_file && (dump_flags & TDF_DETAILS))
980 fprintf (dump_file, "non empty basic block after exit bb\n");
981 return false;
983 else if (bb == loop->latch
984 && bb != exit_bb
985 && !dominated_by_p (CDI_DOMINATORS, bb, exit_bb))
987 if (dump_file && (dump_flags & TDF_DETAILS))
988 fprintf (dump_file, "latch is not dominated by exit_block\n");
989 return false;
993 /* Be less adventurous and handle only normal edges. */
994 FOR_EACH_EDGE (e, ei, bb->succs)
995 if (e->flags & (EDGE_EH | EDGE_ABNORMAL | EDGE_IRREDUCIBLE_LOOP))
997 if (dump_file && (dump_flags & TDF_DETAILS))
998 fprintf (dump_file, "Difficult to handle edges\n");
999 return false;
1002 /* At least one incoming edge has to be non-critical as otherwise edge
1003 predicates are not equal to basic-block predicates of the edge
1004 source. */
1005 if (EDGE_COUNT (bb->preds) > 1
1006 && bb != loop->header)
1008 bool found = false;
1009 FOR_EACH_EDGE (e, ei, bb->preds)
1010 if (EDGE_COUNT (e->src->succs) == 1)
1011 found = true;
1012 if (!found)
1014 if (dump_file && (dump_flags & TDF_DETAILS))
1015 fprintf (dump_file, "only critical predecessors\n");
1016 return false;
1020 return true;
1023 /* Return true when all predecessor blocks of BB are visited. The
1024 VISITED bitmap keeps track of the visited blocks. */
1026 static bool
1027 pred_blocks_visited_p (basic_block bb, bitmap *visited)
1029 edge e;
1030 edge_iterator ei;
1031 FOR_EACH_EDGE (e, ei, bb->preds)
1032 if (!bitmap_bit_p (*visited, e->src->index))
1033 return false;
1035 return true;
1038 /* Get body of a LOOP in suitable order for if-conversion. It is
1039 caller's responsibility to deallocate basic block list.
1040 If-conversion suitable order is, breadth first sort (BFS) order
1041 with an additional constraint: select a block only if all its
1042 predecessors are already selected. */
1044 static basic_block *
1045 get_loop_body_in_if_conv_order (const struct loop *loop)
1047 basic_block *blocks, *blocks_in_bfs_order;
1048 basic_block bb;
1049 bitmap visited;
1050 unsigned int index = 0;
1051 unsigned int visited_count = 0;
1053 gcc_assert (loop->num_nodes);
1054 gcc_assert (loop->latch != EXIT_BLOCK_PTR_FOR_FN (cfun));
1056 blocks = XCNEWVEC (basic_block, loop->num_nodes);
1057 visited = BITMAP_ALLOC (NULL);
1059 blocks_in_bfs_order = get_loop_body_in_bfs_order (loop);
1061 index = 0;
1062 while (index < loop->num_nodes)
1064 bb = blocks_in_bfs_order [index];
1066 if (bb->flags & BB_IRREDUCIBLE_LOOP)
1068 free (blocks_in_bfs_order);
1069 BITMAP_FREE (visited);
1070 free (blocks);
1071 return NULL;
1074 if (!bitmap_bit_p (visited, bb->index))
1076 if (pred_blocks_visited_p (bb, &visited)
1077 || bb == loop->header)
1079 /* This block is now visited. */
1080 bitmap_set_bit (visited, bb->index);
1081 blocks[visited_count++] = bb;
1085 index++;
1087 if (index == loop->num_nodes
1088 && visited_count != loop->num_nodes)
1089 /* Not done yet. */
1090 index = 0;
1092 free (blocks_in_bfs_order);
1093 BITMAP_FREE (visited);
1094 return blocks;
1097 /* Returns true when the analysis of the predicates for all the basic
1098 blocks in LOOP succeeded.
1100 predicate_bbs first allocates the predicates of the basic blocks.
1101 These fields are then initialized with the tree expressions
1102 representing the predicates under which a basic block is executed
1103 in the LOOP. As the loop->header is executed at each iteration, it
1104 has the "true" predicate. Other statements executed under a
1105 condition are predicated with that condition, for example
1107 | if (x)
1108 | S1;
1109 | else
1110 | S2;
1112 S1 will be predicated with "x", and
1113 S2 will be predicated with "!x". */
1115 static void
1116 predicate_bbs (loop_p loop)
1118 unsigned int i;
1120 for (i = 0; i < loop->num_nodes; i++)
1121 init_bb_predicate (ifc_bbs[i]);
1123 for (i = 0; i < loop->num_nodes; i++)
1125 basic_block bb = ifc_bbs[i];
1126 tree cond;
1127 gimple stmt;
1129 /* The loop latch is always executed and has no extra conditions
1130 to be processed: skip it. */
1131 if (bb == loop->latch)
1133 reset_bb_predicate (loop->latch);
1134 continue;
1137 cond = bb_predicate (bb);
1138 stmt = last_stmt (bb);
1139 if (stmt && gimple_code (stmt) == GIMPLE_COND)
1141 tree c2;
1142 edge true_edge, false_edge;
1143 location_t loc = gimple_location (stmt);
1144 tree c = fold_build2_loc (loc, gimple_cond_code (stmt),
1145 boolean_type_node,
1146 gimple_cond_lhs (stmt),
1147 gimple_cond_rhs (stmt));
1149 /* Add new condition into destination's predicate list. */
1150 extract_true_false_edges_from_block (gimple_bb (stmt),
1151 &true_edge, &false_edge);
1153 /* If C is true, then TRUE_EDGE is taken. */
1154 add_to_dst_predicate_list (loop, true_edge, unshare_expr (cond),
1155 unshare_expr (c));
1157 /* If C is false, then FALSE_EDGE is taken. */
1158 c2 = build1_loc (loc, TRUTH_NOT_EXPR, boolean_type_node,
1159 unshare_expr (c));
1160 add_to_dst_predicate_list (loop, false_edge,
1161 unshare_expr (cond), c2);
1163 cond = NULL_TREE;
1166 /* If current bb has only one successor, then consider it as an
1167 unconditional goto. */
1168 if (single_succ_p (bb))
1170 basic_block bb_n = single_succ (bb);
1172 /* The successor bb inherits the predicate of its
1173 predecessor. If there is no predicate in the predecessor
1174 bb, then consider the successor bb as always executed. */
1175 if (cond == NULL_TREE)
1176 cond = boolean_true_node;
1178 add_to_predicate_list (loop, bb_n, cond);
1182 /* The loop header is always executed. */
1183 reset_bb_predicate (loop->header);
1184 gcc_assert (bb_predicate_gimplified_stmts (loop->header) == NULL
1185 && bb_predicate_gimplified_stmts (loop->latch) == NULL);
1188 /* Return true when LOOP is if-convertible. This is a helper function
1189 for if_convertible_loop_p. REFS and DDRS are initialized and freed
1190 in if_convertible_loop_p. */
1192 static bool
1193 if_convertible_loop_p_1 (struct loop *loop,
1194 vec<loop_p> *loop_nest,
1195 vec<data_reference_p> *refs,
1196 vec<ddr_p> *ddrs, bool *any_mask_load_store)
1198 bool res;
1199 unsigned int i;
1200 basic_block exit_bb = NULL;
1202 /* Don't if-convert the loop when the data dependences cannot be
1203 computed: the loop won't be vectorized in that case. */
1204 res = compute_data_dependences_for_loop (loop, true, loop_nest, refs, ddrs);
1205 if (!res)
1206 return false;
1208 calculate_dominance_info (CDI_DOMINATORS);
1209 calculate_dominance_info (CDI_POST_DOMINATORS);
1211 /* Allow statements that can be handled during if-conversion. */
1212 ifc_bbs = get_loop_body_in_if_conv_order (loop);
1213 if (!ifc_bbs)
1215 if (dump_file && (dump_flags & TDF_DETAILS))
1216 fprintf (dump_file, "Irreducible loop\n");
1217 return false;
1220 for (i = 0; i < loop->num_nodes; i++)
1222 basic_block bb = ifc_bbs[i];
1224 if (!if_convertible_bb_p (loop, bb, exit_bb))
1225 return false;
1227 if (bb_with_exit_edge_p (loop, bb))
1228 exit_bb = bb;
1231 for (i = 0; i < loop->num_nodes; i++)
1233 basic_block bb = ifc_bbs[i];
1234 gimple_stmt_iterator gsi;
1236 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1237 switch (gimple_code (gsi_stmt (gsi)))
1239 case GIMPLE_LABEL:
1240 case GIMPLE_ASSIGN:
1241 case GIMPLE_CALL:
1242 case GIMPLE_DEBUG:
1243 case GIMPLE_COND:
1244 break;
1245 default:
1246 return false;
1250 if (flag_tree_loop_if_convert_stores)
1252 data_reference_p dr;
1254 for (i = 0; refs->iterate (i, &dr); i++)
1256 dr->aux = XNEW (struct ifc_dr);
1257 DR_WRITTEN_AT_LEAST_ONCE (dr) = -1;
1258 DR_RW_UNCONDITIONALLY (dr) = -1;
1260 predicate_bbs (loop);
1263 for (i = 0; i < loop->num_nodes; i++)
1265 basic_block bb = ifc_bbs[i];
1266 gimple_stmt_iterator itr;
1268 /* Check the if-convertibility of statements in predicated BBs. */
1269 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1270 for (itr = gsi_start_bb (bb); !gsi_end_p (itr); gsi_next (&itr))
1271 if (!if_convertible_stmt_p (gsi_stmt (itr), *refs,
1272 any_mask_load_store))
1273 return false;
1276 if (flag_tree_loop_if_convert_stores)
1277 for (i = 0; i < loop->num_nodes; i++)
1278 free_bb_predicate (ifc_bbs[i]);
1280 /* Checking PHIs needs to be done after stmts, as the fact whether there
1281 are any masked loads or stores affects the tests. */
1282 for (i = 0; i < loop->num_nodes; i++)
1284 basic_block bb = ifc_bbs[i];
1285 gphi_iterator itr;
1287 for (itr = gsi_start_phis (bb); !gsi_end_p (itr); gsi_next (&itr))
1288 if (!if_convertible_phi_p (loop, bb, itr.phi (),
1289 *any_mask_load_store))
1290 return false;
1293 if (dump_file)
1294 fprintf (dump_file, "Applying if-conversion\n");
1296 return true;
1299 /* Return true when LOOP is if-convertible.
1300 LOOP is if-convertible if:
1301 - it is innermost,
1302 - it has two or more basic blocks,
1303 - it has only one exit,
1304 - loop header is not the exit edge,
1305 - if its basic blocks and phi nodes are if convertible. */
1307 static bool
1308 if_convertible_loop_p (struct loop *loop, bool *any_mask_load_store)
1310 edge e;
1311 edge_iterator ei;
1312 bool res = false;
1313 vec<data_reference_p> refs;
1314 vec<ddr_p> ddrs;
1316 /* Handle only innermost loop. */
1317 if (!loop || loop->inner)
1319 if (dump_file && (dump_flags & TDF_DETAILS))
1320 fprintf (dump_file, "not innermost loop\n");
1321 return false;
1324 /* If only one block, no need for if-conversion. */
1325 if (loop->num_nodes <= 2)
1327 if (dump_file && (dump_flags & TDF_DETAILS))
1328 fprintf (dump_file, "less than 2 basic blocks\n");
1329 return false;
1332 /* More than one loop exit is too much to handle. */
1333 if (!single_exit (loop))
1335 if (dump_file && (dump_flags & TDF_DETAILS))
1336 fprintf (dump_file, "multiple exits\n");
1337 return false;
1340 /* If one of the loop header's edge is an exit edge then do not
1341 apply if-conversion. */
1342 FOR_EACH_EDGE (e, ei, loop->header->succs)
1343 if (loop_exit_edge_p (loop, e))
1344 return false;
1346 refs.create (5);
1347 ddrs.create (25);
1348 auto_vec<loop_p, 3> loop_nest;
1349 res = if_convertible_loop_p_1 (loop, &loop_nest, &refs, &ddrs,
1350 any_mask_load_store);
1352 if (flag_tree_loop_if_convert_stores)
1354 data_reference_p dr;
1355 unsigned int i;
1357 for (i = 0; refs.iterate (i, &dr); i++)
1358 free (dr->aux);
1361 free_data_refs (refs);
1362 free_dependence_relations (ddrs);
1363 return res;
1366 /* Basic block BB has two predecessors. Using predecessor's bb
1367 predicate, set an appropriate condition COND for the PHI node
1368 replacement. Return the true block whose phi arguments are
1369 selected when cond is true. LOOP is the loop containing the
1370 if-converted region, GSI is the place to insert the code for the
1371 if-conversion. */
1373 static basic_block
1374 find_phi_replacement_condition (basic_block bb, tree *cond,
1375 gimple_stmt_iterator *gsi)
1377 edge first_edge, second_edge;
1378 tree tmp_cond;
1380 gcc_assert (EDGE_COUNT (bb->preds) == 2);
1381 first_edge = EDGE_PRED (bb, 0);
1382 second_edge = EDGE_PRED (bb, 1);
1384 /* Prefer an edge with a not negated predicate.
1385 ??? That's a very weak cost model. */
1386 tmp_cond = bb_predicate (first_edge->src);
1387 gcc_assert (tmp_cond);
1388 if (TREE_CODE (tmp_cond) == TRUTH_NOT_EXPR)
1390 edge tmp_edge;
1392 tmp_edge = first_edge;
1393 first_edge = second_edge;
1394 second_edge = tmp_edge;
1397 /* Check if the edge we take the condition from is not critical.
1398 We know that at least one non-critical edge exists. */
1399 if (EDGE_COUNT (first_edge->src->succs) > 1)
1401 *cond = bb_predicate (second_edge->src);
1403 if (TREE_CODE (*cond) == TRUTH_NOT_EXPR)
1404 *cond = TREE_OPERAND (*cond, 0);
1405 else
1406 /* Select non loop header bb. */
1407 first_edge = second_edge;
1409 else
1410 *cond = bb_predicate (first_edge->src);
1412 /* Gimplify the condition to a valid cond-expr conditonal operand. */
1413 *cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (*cond),
1414 is_gimple_condexpr, NULL_TREE,
1415 true, GSI_SAME_STMT);
1417 return first_edge->src;
1420 /* Returns true if def-stmt for phi argument ARG is simple increment/decrement
1421 which is in predicated basic block.
1422 In fact, the following PHI pattern is searching:
1423 loop-header:
1424 reduc_1 = PHI <..., reduc_2>
1426 if (...)
1427 reduc_3 = ...
1428 reduc_2 = PHI <reduc_1, reduc_3>
1430 REDUC, OP0 and OP1 contain reduction stmt and its operands. */
1432 static bool
1433 is_cond_scalar_reduction (gimple phi, gimple *reduc,
1434 tree *op0, tree *op1)
1436 tree lhs, r_op1, r_op2;
1437 tree arg_0, arg_1;
1438 gimple stmt;
1439 gimple header_phi = NULL;
1440 enum tree_code reduction_op;
1441 basic_block bb = gimple_bb (phi);
1442 struct loop *loop = bb->loop_father;
1443 edge latch_e = loop_latch_edge (loop);
1444 imm_use_iterator imm_iter;
1445 use_operand_p use_p;
1447 arg_0 = PHI_ARG_DEF (phi, 0);
1448 arg_1 = PHI_ARG_DEF (phi, 1);
1449 if (TREE_CODE (arg_0) != SSA_NAME || TREE_CODE (arg_1) != SSA_NAME)
1450 return false;
1452 if (gimple_code (SSA_NAME_DEF_STMT (arg_0)) == GIMPLE_PHI)
1454 lhs = arg_1;
1455 header_phi = SSA_NAME_DEF_STMT (arg_0);
1456 stmt = SSA_NAME_DEF_STMT (arg_1);
1458 else if (gimple_code (SSA_NAME_DEF_STMT (arg_1)) == GIMPLE_PHI)
1460 lhs = arg_0;
1461 header_phi = SSA_NAME_DEF_STMT (arg_1);
1462 stmt = SSA_NAME_DEF_STMT (arg_0);
1464 else
1465 return false;
1466 if (gimple_bb (header_phi) != loop->header)
1467 return false;
1469 if (PHI_ARG_DEF_FROM_EDGE (header_phi, latch_e) != PHI_RESULT (phi))
1470 return false;
1472 if (gimple_code (stmt) != GIMPLE_ASSIGN
1473 || gimple_has_volatile_ops (stmt))
1474 return false;
1476 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
1477 return false;
1479 if (!is_predicated (gimple_bb (stmt)))
1480 return false;
1482 /* Check that stmt-block is predecessor of phi-block. */
1483 if (EDGE_PRED (bb, 0)->src != gimple_bb (stmt)
1484 && EDGE_PRED (bb, 1)->src != gimple_bb (stmt))
1485 return false;
1487 if (!has_single_use (lhs))
1488 return false;
1490 reduction_op = gimple_assign_rhs_code (stmt);
1491 if (reduction_op != PLUS_EXPR && reduction_op != MINUS_EXPR)
1492 return false;
1493 r_op1 = gimple_assign_rhs1 (stmt);
1494 r_op2 = gimple_assign_rhs2 (stmt);
1496 /* Make R_OP1 to hold reduction variable. */
1497 if (r_op2 == PHI_RESULT (header_phi)
1498 && reduction_op == PLUS_EXPR)
1500 tree tmp = r_op1;
1501 r_op1 = r_op2;
1502 r_op2 = tmp;
1504 else if (r_op1 != PHI_RESULT (header_phi))
1505 return false;
1507 /* Check that R_OP1 is used in reduction stmt or in PHI only. */
1508 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_op1)
1510 gimple use_stmt = USE_STMT (use_p);
1511 if (is_gimple_debug (use_stmt))
1512 continue;
1513 if (use_stmt == stmt)
1514 continue;
1515 if (gimple_code (use_stmt) != GIMPLE_PHI)
1516 return false;
1519 *op0 = r_op1; *op1 = r_op2;
1520 *reduc = stmt;
1521 return true;
1524 /* Converts conditional scalar reduction into unconditional form, e.g.
1525 bb_4
1526 if (_5 != 0) goto bb_5 else goto bb_6
1527 end_bb_4
1528 bb_5
1529 res_6 = res_13 + 1;
1530 end_bb_5
1531 bb_6
1532 # res_2 = PHI <res_13(4), res_6(5)>
1533 end_bb_6
1535 will be converted into sequence
1536 _ifc__1 = _5 != 0 ? 1 : 0;
1537 res_2 = res_13 + _ifc__1;
1538 Argument SWAP tells that arguments of conditional expression should be
1539 swapped.
1540 Returns rhs of resulting PHI assignment. */
1542 static tree
1543 convert_scalar_cond_reduction (gimple reduc, gimple_stmt_iterator *gsi,
1544 tree cond, tree op0, tree op1, bool swap)
1546 gimple_stmt_iterator stmt_it;
1547 gimple new_assign;
1548 tree rhs;
1549 tree rhs1 = gimple_assign_rhs1 (reduc);
1550 tree tmp = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, "_ifc_");
1551 tree c;
1552 tree zero = build_zero_cst (TREE_TYPE (rhs1));
1554 if (dump_file && (dump_flags & TDF_DETAILS))
1556 fprintf (dump_file, "Found cond scalar reduction.\n");
1557 print_gimple_stmt (dump_file, reduc, 0, TDF_SLIM);
1560 /* Build cond expression using COND and constant operand
1561 of reduction rhs. */
1562 c = fold_build_cond_expr (TREE_TYPE (rhs1),
1563 unshare_expr (cond),
1564 swap ? zero : op1,
1565 swap ? op1 : zero);
1567 /* Create assignment stmt and insert it at GSI. */
1568 new_assign = gimple_build_assign (tmp, c);
1569 gsi_insert_before (gsi, new_assign, GSI_SAME_STMT);
1570 /* Build rhs for unconditional increment/decrement. */
1571 rhs = fold_build2 (gimple_assign_rhs_code (reduc),
1572 TREE_TYPE (rhs1), op0, tmp);
1574 /* Delete original reduction stmt. */
1575 stmt_it = gsi_for_stmt (reduc);
1576 gsi_remove (&stmt_it, true);
1577 release_defs (reduc);
1578 return rhs;
1581 /* Replace a scalar PHI node with a COND_EXPR using COND as condition.
1582 This routine does not handle PHI nodes with more than two
1583 arguments.
1585 For example,
1586 S1: A = PHI <x1(1), x2(5)>
1587 is converted into,
1588 S2: A = cond ? x1 : x2;
1590 The generated code is inserted at GSI that points to the top of
1591 basic block's statement list. When COND is true, phi arg from
1592 TRUE_BB is selected. */
1594 static void
1595 predicate_scalar_phi (gphi *phi, tree cond,
1596 basic_block true_bb,
1597 gimple_stmt_iterator *gsi)
1599 gimple new_stmt;
1600 basic_block bb;
1601 tree rhs, res, arg, scev;
1603 gcc_assert (gimple_code (phi) == GIMPLE_PHI
1604 && gimple_phi_num_args (phi) == 2);
1606 res = gimple_phi_result (phi);
1607 /* Do not handle virtual phi nodes. */
1608 if (virtual_operand_p (res))
1609 return;
1611 bb = gimple_bb (phi);
1613 if ((arg = degenerate_phi_result (phi))
1614 || ((scev = analyze_scalar_evolution (gimple_bb (phi)->loop_father,
1615 res))
1616 && !chrec_contains_undetermined (scev)
1617 && scev != res
1618 && (arg = gimple_phi_arg_def (phi, 0))))
1619 rhs = arg;
1620 else
1622 tree arg_0, arg_1;
1623 tree op0, op1;
1624 gimple reduc;
1626 /* Use condition that is not TRUTH_NOT_EXPR in conditional modify expr. */
1627 if (EDGE_PRED (bb, 1)->src == true_bb)
1629 arg_0 = gimple_phi_arg_def (phi, 1);
1630 arg_1 = gimple_phi_arg_def (phi, 0);
1632 else
1634 arg_0 = gimple_phi_arg_def (phi, 0);
1635 arg_1 = gimple_phi_arg_def (phi, 1);
1637 if (is_cond_scalar_reduction (phi, &reduc, &op0, &op1))
1638 /* Convert reduction stmt into vectorizable form. */
1639 rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
1640 true_bb != gimple_bb (reduc));
1641 else
1642 /* Build new RHS using selected condition and arguments. */
1643 rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
1644 arg_0, arg_1);
1647 new_stmt = gimple_build_assign (res, rhs);
1648 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1649 update_stmt (new_stmt);
1651 if (dump_file && (dump_flags & TDF_DETAILS))
1653 fprintf (dump_file, "new phi replacement stmt\n");
1654 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
1658 /* Replaces in LOOP all the scalar phi nodes other than those in the
1659 LOOP->header block with conditional modify expressions. */
1661 static void
1662 predicate_all_scalar_phis (struct loop *loop)
1664 basic_block bb;
1665 unsigned int orig_loop_num_nodes = loop->num_nodes;
1666 unsigned int i;
1668 for (i = 1; i < orig_loop_num_nodes; i++)
1670 gphi *phi;
1671 tree cond = NULL_TREE;
1672 gimple_stmt_iterator gsi;
1673 gphi_iterator phi_gsi;
1674 basic_block true_bb = NULL;
1675 bb = ifc_bbs[i];
1677 if (bb == loop->header)
1678 continue;
1680 phi_gsi = gsi_start_phis (bb);
1681 if (gsi_end_p (phi_gsi))
1682 continue;
1684 /* BB has two predecessors. Using predecessor's aux field, set
1685 appropriate condition for the PHI node replacement. */
1686 gsi = gsi_after_labels (bb);
1687 true_bb = find_phi_replacement_condition (bb, &cond, &gsi);
1689 while (!gsi_end_p (phi_gsi))
1691 phi = phi_gsi.phi ();
1692 predicate_scalar_phi (phi, cond, true_bb, &gsi);
1693 release_phi_node (phi);
1694 gsi_next (&phi_gsi);
1697 set_phi_nodes (bb, NULL);
1701 /* Insert in each basic block of LOOP the statements produced by the
1702 gimplification of the predicates. */
1704 static void
1705 insert_gimplified_predicates (loop_p loop, bool any_mask_load_store)
1707 unsigned int i;
1709 for (i = 0; i < loop->num_nodes; i++)
1711 basic_block bb = ifc_bbs[i];
1712 gimple_seq stmts;
1714 if (!is_predicated (bb))
1716 /* Do not insert statements for a basic block that is not
1717 predicated. Also make sure that the predicate of the
1718 basic block is set to true. */
1719 reset_bb_predicate (bb);
1720 continue;
1723 stmts = bb_predicate_gimplified_stmts (bb);
1724 if (stmts)
1726 if (flag_tree_loop_if_convert_stores
1727 || any_mask_load_store)
1729 /* Insert the predicate of the BB just after the label,
1730 as the if-conversion of memory writes will use this
1731 predicate. */
1732 gimple_stmt_iterator gsi = gsi_after_labels (bb);
1733 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1735 else
1737 /* Insert the predicate of the BB at the end of the BB
1738 as this would reduce the register pressure: the only
1739 use of this predicate will be in successor BBs. */
1740 gimple_stmt_iterator gsi = gsi_last_bb (bb);
1742 if (gsi_end_p (gsi)
1743 || stmt_ends_bb_p (gsi_stmt (gsi)))
1744 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1745 else
1746 gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
1749 /* Once the sequence is code generated, set it to NULL. */
1750 set_bb_predicate_gimplified_stmts (bb, NULL);
1755 /* Predicate each write to memory in LOOP.
1757 This function transforms control flow constructs containing memory
1758 writes of the form:
1760 | for (i = 0; i < N; i++)
1761 | if (cond)
1762 | A[i] = expr;
1764 into the following form that does not contain control flow:
1766 | for (i = 0; i < N; i++)
1767 | A[i] = cond ? expr : A[i];
1769 The original CFG looks like this:
1771 | bb_0
1772 | i = 0
1773 | end_bb_0
1775 | bb_1
1776 | if (i < N) goto bb_5 else goto bb_2
1777 | end_bb_1
1779 | bb_2
1780 | cond = some_computation;
1781 | if (cond) goto bb_3 else goto bb_4
1782 | end_bb_2
1784 | bb_3
1785 | A[i] = expr;
1786 | goto bb_4
1787 | end_bb_3
1789 | bb_4
1790 | goto bb_1
1791 | end_bb_4
1793 insert_gimplified_predicates inserts the computation of the COND
1794 expression at the beginning of the destination basic block:
1796 | bb_0
1797 | i = 0
1798 | end_bb_0
1800 | bb_1
1801 | if (i < N) goto bb_5 else goto bb_2
1802 | end_bb_1
1804 | bb_2
1805 | cond = some_computation;
1806 | if (cond) goto bb_3 else goto bb_4
1807 | end_bb_2
1809 | bb_3
1810 | cond = some_computation;
1811 | A[i] = expr;
1812 | goto bb_4
1813 | end_bb_3
1815 | bb_4
1816 | goto bb_1
1817 | end_bb_4
1819 predicate_mem_writes is then predicating the memory write as follows:
1821 | bb_0
1822 | i = 0
1823 | end_bb_0
1825 | bb_1
1826 | if (i < N) goto bb_5 else goto bb_2
1827 | end_bb_1
1829 | bb_2
1830 | if (cond) goto bb_3 else goto bb_4
1831 | end_bb_2
1833 | bb_3
1834 | cond = some_computation;
1835 | A[i] = cond ? expr : A[i];
1836 | goto bb_4
1837 | end_bb_3
1839 | bb_4
1840 | goto bb_1
1841 | end_bb_4
1843 and finally combine_blocks removes the basic block boundaries making
1844 the loop vectorizable:
1846 | bb_0
1847 | i = 0
1848 | if (i < N) goto bb_5 else goto bb_1
1849 | end_bb_0
1851 | bb_1
1852 | cond = some_computation;
1853 | A[i] = cond ? expr : A[i];
1854 | if (i < N) goto bb_5 else goto bb_4
1855 | end_bb_1
1857 | bb_4
1858 | goto bb_1
1859 | end_bb_4
1862 static void
1863 predicate_mem_writes (loop_p loop)
1865 unsigned int i, orig_loop_num_nodes = loop->num_nodes;
1867 for (i = 1; i < orig_loop_num_nodes; i++)
1869 gimple_stmt_iterator gsi;
1870 basic_block bb = ifc_bbs[i];
1871 tree cond = bb_predicate (bb);
1872 bool swap;
1873 gimple stmt;
1875 if (is_true_predicate (cond))
1876 continue;
1878 swap = false;
1879 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1881 swap = true;
1882 cond = TREE_OPERAND (cond, 0);
1885 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1886 if (!gimple_assign_single_p (stmt = gsi_stmt (gsi)))
1887 continue;
1888 else if (gimple_plf (stmt, GF_PLF_2))
1890 tree lhs = gimple_assign_lhs (stmt);
1891 tree rhs = gimple_assign_rhs1 (stmt);
1892 tree ref, addr, ptr, masktype, mask_op0, mask_op1, mask;
1893 gimple new_stmt;
1894 int bitsize = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (lhs)));
1896 masktype = build_nonstandard_integer_type (bitsize, 1);
1897 mask_op0 = build_int_cst (masktype, swap ? 0 : -1);
1898 mask_op1 = build_int_cst (masktype, swap ? -1 : 0);
1899 ref = TREE_CODE (lhs) == SSA_NAME ? rhs : lhs;
1900 mark_addressable (ref);
1901 addr = force_gimple_operand_gsi (&gsi, build_fold_addr_expr (ref),
1902 true, NULL_TREE, true,
1903 GSI_SAME_STMT);
1904 cond = force_gimple_operand_gsi_1 (&gsi, unshare_expr (cond),
1905 is_gimple_condexpr, NULL_TREE,
1906 true, GSI_SAME_STMT);
1907 mask = fold_build_cond_expr (masktype, unshare_expr (cond),
1908 mask_op0, mask_op1);
1909 mask = ifc_temp_var (masktype, mask, &gsi);
1910 ptr = build_int_cst (reference_alias_ptr_type (ref), 0);
1911 /* Copy points-to info if possible. */
1912 if (TREE_CODE (addr) == SSA_NAME && !SSA_NAME_PTR_INFO (addr))
1913 copy_ref_info (build2 (MEM_REF, TREE_TYPE (ref), addr, ptr),
1914 ref);
1915 if (TREE_CODE (lhs) == SSA_NAME)
1917 new_stmt
1918 = gimple_build_call_internal (IFN_MASK_LOAD, 3, addr,
1919 ptr, mask);
1920 gimple_call_set_lhs (new_stmt, lhs);
1922 else
1923 new_stmt
1924 = gimple_build_call_internal (IFN_MASK_STORE, 4, addr, ptr,
1925 mask, rhs);
1926 gsi_replace (&gsi, new_stmt, true);
1928 else if (gimple_vdef (stmt))
1930 tree lhs = gimple_assign_lhs (stmt);
1931 tree rhs = gimple_assign_rhs1 (stmt);
1932 tree type = TREE_TYPE (lhs);
1934 lhs = ifc_temp_var (type, unshare_expr (lhs), &gsi);
1935 rhs = ifc_temp_var (type, unshare_expr (rhs), &gsi);
1936 if (swap)
1938 tree tem = lhs;
1939 lhs = rhs;
1940 rhs = tem;
1942 cond = force_gimple_operand_gsi_1 (&gsi, unshare_expr (cond),
1943 is_gimple_condexpr, NULL_TREE,
1944 true, GSI_SAME_STMT);
1945 rhs = fold_build_cond_expr (type, unshare_expr (cond), rhs, lhs);
1946 gimple_assign_set_rhs1 (stmt, ifc_temp_var (type, rhs, &gsi));
1947 update_stmt (stmt);
1952 /* Remove all GIMPLE_CONDs and GIMPLE_LABELs of all the basic blocks
1953 other than the exit and latch of the LOOP. Also resets the
1954 GIMPLE_DEBUG information. */
1956 static void
1957 remove_conditions_and_labels (loop_p loop)
1959 gimple_stmt_iterator gsi;
1960 unsigned int i;
1962 for (i = 0; i < loop->num_nodes; i++)
1964 basic_block bb = ifc_bbs[i];
1966 if (bb_with_exit_edge_p (loop, bb)
1967 || bb == loop->latch)
1968 continue;
1970 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
1971 switch (gimple_code (gsi_stmt (gsi)))
1973 case GIMPLE_COND:
1974 case GIMPLE_LABEL:
1975 gsi_remove (&gsi, true);
1976 break;
1978 case GIMPLE_DEBUG:
1979 /* ??? Should there be conditional GIMPLE_DEBUG_BINDs? */
1980 if (gimple_debug_bind_p (gsi_stmt (gsi)))
1982 gimple_debug_bind_reset_value (gsi_stmt (gsi));
1983 update_stmt (gsi_stmt (gsi));
1985 gsi_next (&gsi);
1986 break;
1988 default:
1989 gsi_next (&gsi);
1994 /* Combine all the basic blocks from LOOP into one or two super basic
1995 blocks. Replace PHI nodes with conditional modify expressions. */
1997 static void
1998 combine_blocks (struct loop *loop, bool any_mask_load_store)
2000 basic_block bb, exit_bb, merge_target_bb;
2001 unsigned int orig_loop_num_nodes = loop->num_nodes;
2002 unsigned int i;
2003 edge e;
2004 edge_iterator ei;
2006 predicate_bbs (loop);
2007 remove_conditions_and_labels (loop);
2008 insert_gimplified_predicates (loop, any_mask_load_store);
2009 predicate_all_scalar_phis (loop);
2011 if (flag_tree_loop_if_convert_stores || any_mask_load_store)
2012 predicate_mem_writes (loop);
2014 /* Merge basic blocks: first remove all the edges in the loop,
2015 except for those from the exit block. */
2016 exit_bb = NULL;
2017 for (i = 0; i < orig_loop_num_nodes; i++)
2019 bb = ifc_bbs[i];
2020 free_bb_predicate (bb);
2021 if (bb_with_exit_edge_p (loop, bb))
2023 gcc_assert (exit_bb == NULL);
2024 exit_bb = bb;
2027 gcc_assert (exit_bb != loop->latch);
2029 for (i = 1; i < orig_loop_num_nodes; i++)
2031 bb = ifc_bbs[i];
2033 for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));)
2035 if (e->src == exit_bb)
2036 ei_next (&ei);
2037 else
2038 remove_edge (e);
2042 if (exit_bb != NULL)
2044 if (exit_bb != loop->header)
2046 /* Connect this node to loop header. */
2047 make_edge (loop->header, exit_bb, EDGE_FALLTHRU);
2048 set_immediate_dominator (CDI_DOMINATORS, exit_bb, loop->header);
2051 /* Redirect non-exit edges to loop->latch. */
2052 FOR_EACH_EDGE (e, ei, exit_bb->succs)
2054 if (!loop_exit_edge_p (loop, e))
2055 redirect_edge_and_branch (e, loop->latch);
2057 set_immediate_dominator (CDI_DOMINATORS, loop->latch, exit_bb);
2059 else
2061 /* If the loop does not have an exit, reconnect header and latch. */
2062 make_edge (loop->header, loop->latch, EDGE_FALLTHRU);
2063 set_immediate_dominator (CDI_DOMINATORS, loop->latch, loop->header);
2066 merge_target_bb = loop->header;
2067 for (i = 1; i < orig_loop_num_nodes; i++)
2069 gimple_stmt_iterator gsi;
2070 gimple_stmt_iterator last;
2072 bb = ifc_bbs[i];
2074 if (bb == exit_bb || bb == loop->latch)
2075 continue;
2077 /* Make stmts member of loop->header. */
2078 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2079 gimple_set_bb (gsi_stmt (gsi), merge_target_bb);
2081 /* Update stmt list. */
2082 last = gsi_last_bb (merge_target_bb);
2083 gsi_insert_seq_after (&last, bb_seq (bb), GSI_NEW_STMT);
2084 set_bb_seq (bb, NULL);
2086 delete_basic_block (bb);
2089 /* If possible, merge loop header to the block with the exit edge.
2090 This reduces the number of basic blocks to two, to please the
2091 vectorizer that handles only loops with two nodes. */
2092 if (exit_bb
2093 && exit_bb != loop->header
2094 && can_merge_blocks_p (loop->header, exit_bb))
2095 merge_blocks (loop->header, exit_bb);
2097 free (ifc_bbs);
2098 ifc_bbs = NULL;
2101 /* Version LOOP before if-converting it, the original loop
2102 will be then if-converted, the new copy of the loop will not,
2103 and the LOOP_VECTORIZED internal call will be guarding which
2104 loop to execute. The vectorizer pass will fold this
2105 internal call into either true or false. */
2107 static bool
2108 version_loop_for_if_conversion (struct loop *loop)
2110 basic_block cond_bb;
2111 tree cond = make_ssa_name (boolean_type_node, NULL);
2112 struct loop *new_loop;
2113 gimple g;
2114 gimple_stmt_iterator gsi;
2116 g = gimple_build_call_internal (IFN_LOOP_VECTORIZED, 2,
2117 build_int_cst (integer_type_node, loop->num),
2118 integer_zero_node);
2119 gimple_call_set_lhs (g, cond);
2121 initialize_original_copy_tables ();
2122 new_loop = loop_version (loop, cond, &cond_bb,
2123 REG_BR_PROB_BASE, REG_BR_PROB_BASE,
2124 REG_BR_PROB_BASE, true);
2125 free_original_copy_tables ();
2126 if (new_loop == NULL)
2127 return false;
2128 new_loop->dont_vectorize = true;
2129 new_loop->force_vectorize = false;
2130 gsi = gsi_last_bb (cond_bb);
2131 gimple_call_set_arg (g, 1, build_int_cst (integer_type_node, new_loop->num));
2132 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2133 update_ssa (TODO_update_ssa);
2134 return true;
2137 /* If-convert LOOP when it is legal. For the moment this pass has no
2138 profitability analysis. Returns non-zero todo flags when something
2139 changed. */
2141 static unsigned int
2142 tree_if_conversion (struct loop *loop)
2144 unsigned int todo = 0;
2145 ifc_bbs = NULL;
2146 bool any_mask_load_store = false;
2148 if (!if_convertible_loop_p (loop, &any_mask_load_store)
2149 || !dbg_cnt (if_conversion_tree))
2150 goto cleanup;
2152 if (any_mask_load_store
2153 && ((!flag_tree_loop_vectorize && !loop->force_vectorize)
2154 || loop->dont_vectorize))
2155 goto cleanup;
2157 if (any_mask_load_store && !version_loop_for_if_conversion (loop))
2158 goto cleanup;
2160 /* Now all statements are if-convertible. Combine all the basic
2161 blocks into one huge basic block doing the if-conversion
2162 on-the-fly. */
2163 combine_blocks (loop, any_mask_load_store);
2165 todo |= TODO_cleanup_cfg;
2166 if (flag_tree_loop_if_convert_stores || any_mask_load_store)
2168 mark_virtual_operands_for_renaming (cfun);
2169 todo |= TODO_update_ssa_only_virtuals;
2172 cleanup:
2173 if (ifc_bbs)
2175 unsigned int i;
2177 for (i = 0; i < loop->num_nodes; i++)
2178 free_bb_predicate (ifc_bbs[i]);
2180 free (ifc_bbs);
2181 ifc_bbs = NULL;
2183 free_dominance_info (CDI_POST_DOMINATORS);
2185 return todo;
2188 /* Tree if-conversion pass management. */
2190 namespace {
2192 const pass_data pass_data_if_conversion =
2194 GIMPLE_PASS, /* type */
2195 "ifcvt", /* name */
2196 OPTGROUP_NONE, /* optinfo_flags */
2197 TV_NONE, /* tv_id */
2198 ( PROP_cfg | PROP_ssa ), /* properties_required */
2199 0, /* properties_provided */
2200 0, /* properties_destroyed */
2201 0, /* todo_flags_start */
2202 0, /* todo_flags_finish */
2205 class pass_if_conversion : public gimple_opt_pass
2207 public:
2208 pass_if_conversion (gcc::context *ctxt)
2209 : gimple_opt_pass (pass_data_if_conversion, ctxt)
2212 /* opt_pass methods: */
2213 virtual bool gate (function *);
2214 virtual unsigned int execute (function *);
2216 }; // class pass_if_conversion
2218 bool
2219 pass_if_conversion::gate (function *fun)
2221 return (((flag_tree_loop_vectorize || fun->has_force_vectorize_loops)
2222 && flag_tree_loop_if_convert != 0)
2223 || flag_tree_loop_if_convert == 1
2224 || flag_tree_loop_if_convert_stores == 1);
2227 unsigned int
2228 pass_if_conversion::execute (function *fun)
2230 struct loop *loop;
2231 unsigned todo = 0;
2233 if (number_of_loops (fun) <= 1)
2234 return 0;
2236 FOR_EACH_LOOP (loop, 0)
2237 if (flag_tree_loop_if_convert == 1
2238 || flag_tree_loop_if_convert_stores == 1
2239 || ((flag_tree_loop_vectorize || loop->force_vectorize)
2240 && !loop->dont_vectorize))
2241 todo |= tree_if_conversion (loop);
2243 #ifdef ENABLE_CHECKING
2245 basic_block bb;
2246 FOR_EACH_BB_FN (bb, fun)
2247 gcc_assert (!bb->aux);
2249 #endif
2251 return todo;
2254 } // anon namespace
2256 gimple_opt_pass *
2257 make_pass_if_conversion (gcc::context *ctxt)
2259 return new pass_if_conversion (ctxt);