2014-12-22 Paolo Carlini <paolo.carlini@oracle.com>
[official-gcc.git] / gcc / tree-if-conv.c
blob2ec7774d52db40801d681e842a243b1f7474ef10
1 /* If-conversion for vectorizer.
2 Copyright (C) 2004-2014 Free Software Foundation, Inc.
3 Contributed by Devang Patel <dpatel@apple.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass implements a tree level if-conversion of loops. Its
22 initial goal is to help the vectorizer to vectorize loops with
23 conditions.
25 A short description of if-conversion:
27 o Decide if a loop is if-convertible or not.
28 o Walk all loop basic blocks in breadth first order (BFS order).
29 o Remove conditional statements (at the end of basic block)
30 and propagate condition into destination basic blocks'
31 predicate list.
32 o Replace modify expression with conditional modify expression
33 using current basic block's condition.
34 o Merge all basic blocks
35 o Replace phi nodes with conditional modify expr
36 o Merge all basic blocks into header
38 Sample transformation:
40 INPUT
41 -----
43 # i_23 = PHI <0(0), i_18(10)>;
44 <L0>:;
45 j_15 = A[i_23];
46 if (j_15 > 41) goto <L1>; else goto <L17>;
48 <L17>:;
49 goto <bb 3> (<L3>);
51 <L1>:;
53 # iftmp.2_4 = PHI <0(8), 42(2)>;
54 <L3>:;
55 A[i_23] = iftmp.2_4;
56 i_18 = i_23 + 1;
57 if (i_18 <= 15) goto <L19>; else goto <L18>;
59 <L19>:;
60 goto <bb 1> (<L0>);
62 <L18>:;
64 OUTPUT
65 ------
67 # i_23 = PHI <0(0), i_18(10)>;
68 <L0>:;
69 j_15 = A[i_23];
71 <L3>:;
72 iftmp.2_4 = j_15 > 41 ? 42 : 0;
73 A[i_23] = iftmp.2_4;
74 i_18 = i_23 + 1;
75 if (i_18 <= 15) goto <L19>; else goto <L18>;
77 <L19>:;
78 goto <bb 1> (<L0>);
80 <L18>:;
83 #include "config.h"
84 #include "system.h"
85 #include "coretypes.h"
86 #include "tm.h"
87 #include "tree.h"
88 #include "stor-layout.h"
89 #include "flags.h"
90 #include "predict.h"
91 #include "vec.h"
92 #include "hashtab.h"
93 #include "hash-set.h"
94 #include "machmode.h"
95 #include "hard-reg-set.h"
96 #include "input.h"
97 #include "function.h"
98 #include "dominance.h"
99 #include "cfg.h"
100 #include "basic-block.h"
101 #include "gimple-pretty-print.h"
102 #include "tree-ssa-alias.h"
103 #include "internal-fn.h"
104 #include "gimple-fold.h"
105 #include "gimple-expr.h"
106 #include "is-a.h"
107 #include "gimple.h"
108 #include "gimplify.h"
109 #include "gimple-iterator.h"
110 #include "gimplify-me.h"
111 #include "gimple-ssa.h"
112 #include "tree-cfg.h"
113 #include "tree-phinodes.h"
114 #include "ssa-iterators.h"
115 #include "stringpool.h"
116 #include "tree-ssanames.h"
117 #include "tree-into-ssa.h"
118 #include "tree-ssa.h"
119 #include "cfgloop.h"
120 #include "tree-chrec.h"
121 #include "tree-data-ref.h"
122 #include "tree-scalar-evolution.h"
123 #include "tree-ssa-loop-ivopts.h"
124 #include "tree-ssa-address.h"
125 #include "tree-pass.h"
126 #include "dbgcnt.h"
127 #include "expr.h"
128 #include "insn-codes.h"
129 #include "optabs.h"
131 /* List of basic blocks in if-conversion-suitable order. */
132 static basic_block *ifc_bbs;
134 /* Structure used to predicate basic blocks. This is attached to the
135 ->aux field of the BBs in the loop to be if-converted. */
136 typedef struct bb_predicate_s {
138 /* The condition under which this basic block is executed. */
139 tree predicate;
141 /* PREDICATE is gimplified, and the sequence of statements is
142 recorded here, in order to avoid the duplication of computations
143 that occur in previous conditions. See PR44483. */
144 gimple_seq predicate_gimplified_stmts;
145 } *bb_predicate_p;
147 /* Returns true when the basic block BB has a predicate. */
149 static inline bool
150 bb_has_predicate (basic_block bb)
152 return bb->aux != NULL;
155 /* Returns the gimplified predicate for basic block BB. */
157 static inline tree
158 bb_predicate (basic_block bb)
160 return ((bb_predicate_p) bb->aux)->predicate;
163 /* Sets the gimplified predicate COND for basic block BB. */
165 static inline void
166 set_bb_predicate (basic_block bb, tree cond)
168 gcc_assert ((TREE_CODE (cond) == TRUTH_NOT_EXPR
169 && is_gimple_condexpr (TREE_OPERAND (cond, 0)))
170 || is_gimple_condexpr (cond));
171 ((bb_predicate_p) bb->aux)->predicate = cond;
174 /* Returns the sequence of statements of the gimplification of the
175 predicate for basic block BB. */
177 static inline gimple_seq
178 bb_predicate_gimplified_stmts (basic_block bb)
180 return ((bb_predicate_p) bb->aux)->predicate_gimplified_stmts;
183 /* Sets the sequence of statements STMTS of the gimplification of the
184 predicate for basic block BB. */
186 static inline void
187 set_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
189 ((bb_predicate_p) bb->aux)->predicate_gimplified_stmts = stmts;
192 /* Adds the sequence of statements STMTS to the sequence of statements
193 of the predicate for basic block BB. */
195 static inline void
196 add_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
198 gimple_seq_add_seq
199 (&(((bb_predicate_p) bb->aux)->predicate_gimplified_stmts), stmts);
202 /* Initializes to TRUE the predicate of basic block BB. */
204 static inline void
205 init_bb_predicate (basic_block bb)
207 bb->aux = XNEW (struct bb_predicate_s);
208 set_bb_predicate_gimplified_stmts (bb, NULL);
209 set_bb_predicate (bb, boolean_true_node);
212 /* Release the SSA_NAMEs associated with the predicate of basic block BB,
213 but don't actually free it. */
215 static inline void
216 release_bb_predicate (basic_block bb)
218 gimple_seq stmts = bb_predicate_gimplified_stmts (bb);
219 if (stmts)
221 gimple_stmt_iterator i;
223 for (i = gsi_start (stmts); !gsi_end_p (i); gsi_next (&i))
224 free_stmt_operands (cfun, gsi_stmt (i));
225 set_bb_predicate_gimplified_stmts (bb, NULL);
229 /* Free the predicate of basic block BB. */
231 static inline void
232 free_bb_predicate (basic_block bb)
234 if (!bb_has_predicate (bb))
235 return;
237 release_bb_predicate (bb);
238 free (bb->aux);
239 bb->aux = NULL;
242 /* Reinitialize predicate of BB with the true predicate. */
244 static inline void
245 reset_bb_predicate (basic_block bb)
247 if (!bb_has_predicate (bb))
248 init_bb_predicate (bb);
249 else
251 release_bb_predicate (bb);
252 set_bb_predicate (bb, boolean_true_node);
256 /* Returns a new SSA_NAME of type TYPE that is assigned the value of
257 the expression EXPR. Inserts the statement created for this
258 computation before GSI and leaves the iterator GSI at the same
259 statement. */
261 static tree
262 ifc_temp_var (tree type, tree expr, gimple_stmt_iterator *gsi)
264 tree new_name = make_temp_ssa_name (type, NULL, "_ifc_");
265 gimple stmt = gimple_build_assign (new_name, expr);
266 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
267 return new_name;
270 /* Return true when COND is a true predicate. */
272 static inline bool
273 is_true_predicate (tree cond)
275 return (cond == NULL_TREE
276 || cond == boolean_true_node
277 || integer_onep (cond));
280 /* Returns true when BB has a predicate that is not trivial: true or
281 NULL_TREE. */
283 static inline bool
284 is_predicated (basic_block bb)
286 return !is_true_predicate (bb_predicate (bb));
289 /* Parses the predicate COND and returns its comparison code and
290 operands OP0 and OP1. */
292 static enum tree_code
293 parse_predicate (tree cond, tree *op0, tree *op1)
295 gimple s;
297 if (TREE_CODE (cond) == SSA_NAME
298 && is_gimple_assign (s = SSA_NAME_DEF_STMT (cond)))
300 if (TREE_CODE_CLASS (gimple_assign_rhs_code (s)) == tcc_comparison)
302 *op0 = gimple_assign_rhs1 (s);
303 *op1 = gimple_assign_rhs2 (s);
304 return gimple_assign_rhs_code (s);
307 else if (gimple_assign_rhs_code (s) == TRUTH_NOT_EXPR)
309 tree op = gimple_assign_rhs1 (s);
310 tree type = TREE_TYPE (op);
311 enum tree_code code = parse_predicate (op, op0, op1);
313 return code == ERROR_MARK ? ERROR_MARK
314 : invert_tree_comparison (code, HONOR_NANS (type));
317 return ERROR_MARK;
320 if (TREE_CODE_CLASS (TREE_CODE (cond)) == tcc_comparison)
322 *op0 = TREE_OPERAND (cond, 0);
323 *op1 = TREE_OPERAND (cond, 1);
324 return TREE_CODE (cond);
327 return ERROR_MARK;
330 /* Returns the fold of predicate C1 OR C2 at location LOC. */
332 static tree
333 fold_or_predicates (location_t loc, tree c1, tree c2)
335 tree op1a, op1b, op2a, op2b;
336 enum tree_code code1 = parse_predicate (c1, &op1a, &op1b);
337 enum tree_code code2 = parse_predicate (c2, &op2a, &op2b);
339 if (code1 != ERROR_MARK && code2 != ERROR_MARK)
341 tree t = maybe_fold_or_comparisons (code1, op1a, op1b,
342 code2, op2a, op2b);
343 if (t)
344 return t;
347 return fold_build2_loc (loc, TRUTH_OR_EXPR, boolean_type_node, c1, c2);
350 /* Returns true if N is either a constant or a SSA_NAME. */
352 static bool
353 constant_or_ssa_name (tree n)
355 switch (TREE_CODE (n))
357 case SSA_NAME:
358 case INTEGER_CST:
359 case REAL_CST:
360 case COMPLEX_CST:
361 case VECTOR_CST:
362 return true;
363 default:
364 return false;
368 /* Returns either a COND_EXPR or the folded expression if the folded
369 expression is a MIN_EXPR, a MAX_EXPR, an ABS_EXPR,
370 a constant or a SSA_NAME. */
372 static tree
373 fold_build_cond_expr (tree type, tree cond, tree rhs, tree lhs)
375 tree rhs1, lhs1, cond_expr;
376 cond_expr = fold_ternary (COND_EXPR, type, cond,
377 rhs, lhs);
379 if (cond_expr == NULL_TREE)
380 return build3 (COND_EXPR, type, cond, rhs, lhs);
382 STRIP_USELESS_TYPE_CONVERSION (cond_expr);
384 if (constant_or_ssa_name (cond_expr))
385 return cond_expr;
387 if (TREE_CODE (cond_expr) == ABS_EXPR)
389 rhs1 = TREE_OPERAND (cond_expr, 1);
390 STRIP_USELESS_TYPE_CONVERSION (rhs1);
391 if (constant_or_ssa_name (rhs1))
392 return build1 (ABS_EXPR, type, rhs1);
395 if (TREE_CODE (cond_expr) == MIN_EXPR
396 || TREE_CODE (cond_expr) == MAX_EXPR)
398 lhs1 = TREE_OPERAND (cond_expr, 0);
399 STRIP_USELESS_TYPE_CONVERSION (lhs1);
400 rhs1 = TREE_OPERAND (cond_expr, 1);
401 STRIP_USELESS_TYPE_CONVERSION (rhs1);
402 if (constant_or_ssa_name (rhs1)
403 && constant_or_ssa_name (lhs1))
404 return build2 (TREE_CODE (cond_expr), type, lhs1, rhs1);
406 return build3 (COND_EXPR, type, cond, rhs, lhs);
409 /* Add condition NC to the predicate list of basic block BB. LOOP is
410 the loop to be if-converted. Use predicate of cd-equivalent block
411 for join bb if it exists: we call basic blocks bb1 and bb2
412 cd-equivalent if they are executed under the same condition. */
414 static inline void
415 add_to_predicate_list (struct loop *loop, basic_block bb, tree nc)
417 tree bc, *tp;
418 basic_block dom_bb;
420 if (is_true_predicate (nc))
421 return;
423 /* If dominance tells us this basic block is always executed,
424 don't record any predicates for it. */
425 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
426 return;
428 dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb);
429 /* We use notion of cd equivalence to get simpler predicate for
430 join block, e.g. if join block has 2 predecessors with predicates
431 p1 & p2 and p1 & !p2, we'd like to get p1 for it instead of
432 p1 & p2 | p1 & !p2. */
433 if (dom_bb != loop->header
434 && get_immediate_dominator (CDI_POST_DOMINATORS, dom_bb) == bb)
436 gcc_assert (flow_bb_inside_loop_p (loop, dom_bb));
437 bc = bb_predicate (dom_bb);
438 if (!is_true_predicate (bc))
439 set_bb_predicate (bb, bc);
440 else
441 gcc_assert (is_true_predicate (bb_predicate (bb)));
442 if (dump_file && (dump_flags & TDF_DETAILS))
443 fprintf (dump_file, "Use predicate of bb#%d for bb#%d\n",
444 dom_bb->index, bb->index);
445 return;
448 if (!is_predicated (bb))
449 bc = nc;
450 else
452 bc = bb_predicate (bb);
453 bc = fold_or_predicates (EXPR_LOCATION (bc), nc, bc);
454 if (is_true_predicate (bc))
456 reset_bb_predicate (bb);
457 return;
461 /* Allow a TRUTH_NOT_EXPR around the main predicate. */
462 if (TREE_CODE (bc) == TRUTH_NOT_EXPR)
463 tp = &TREE_OPERAND (bc, 0);
464 else
465 tp = &bc;
466 if (!is_gimple_condexpr (*tp))
468 gimple_seq stmts;
469 *tp = force_gimple_operand_1 (*tp, &stmts, is_gimple_condexpr, NULL_TREE);
470 add_bb_predicate_gimplified_stmts (bb, stmts);
472 set_bb_predicate (bb, bc);
475 /* Add the condition COND to the previous condition PREV_COND, and add
476 this to the predicate list of the destination of edge E. LOOP is
477 the loop to be if-converted. */
479 static void
480 add_to_dst_predicate_list (struct loop *loop, edge e,
481 tree prev_cond, tree cond)
483 if (!flow_bb_inside_loop_p (loop, e->dest))
484 return;
486 if (!is_true_predicate (prev_cond))
487 cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
488 prev_cond, cond);
490 add_to_predicate_list (loop, e->dest, cond);
493 /* Return true if one of the successor edges of BB exits LOOP. */
495 static bool
496 bb_with_exit_edge_p (struct loop *loop, basic_block bb)
498 edge e;
499 edge_iterator ei;
501 FOR_EACH_EDGE (e, ei, bb->succs)
502 if (loop_exit_edge_p (loop, e))
503 return true;
505 return false;
508 /* Return true when PHI is if-convertible. PHI is part of loop LOOP
509 and it belongs to basic block BB.
511 PHI is not if-convertible if:
512 - it has more than 2 arguments.
514 When the flag_tree_loop_if_convert_stores is not set, PHI is not
515 if-convertible if:
516 - a virtual PHI is immediately used in another PHI node,
517 - there is a virtual PHI in a BB other than the loop->header. */
519 static bool
520 if_convertible_phi_p (struct loop *loop, basic_block bb, gphi *phi,
521 bool any_mask_load_store)
523 if (dump_file && (dump_flags & TDF_DETAILS))
525 fprintf (dump_file, "-------------------------\n");
526 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
529 if (bb != loop->header && gimple_phi_num_args (phi) != 2)
531 if (dump_file && (dump_flags & TDF_DETAILS))
532 fprintf (dump_file, "More than two phi node args.\n");
533 return false;
536 if (flag_tree_loop_if_convert_stores || any_mask_load_store)
537 return true;
539 /* When the flag_tree_loop_if_convert_stores is not set, check
540 that there are no memory writes in the branches of the loop to be
541 if-converted. */
542 if (virtual_operand_p (gimple_phi_result (phi)))
544 imm_use_iterator imm_iter;
545 use_operand_p use_p;
547 if (bb != loop->header)
549 if (dump_file && (dump_flags & TDF_DETAILS))
550 fprintf (dump_file, "Virtual phi not on loop->header.\n");
551 return false;
554 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_phi_result (phi))
556 if (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI)
558 if (dump_file && (dump_flags & TDF_DETAILS))
559 fprintf (dump_file, "Difficult to handle this virtual phi.\n");
560 return false;
565 return true;
568 /* Records the status of a data reference. This struct is attached to
569 each DR->aux field. */
571 struct ifc_dr {
572 /* -1 when not initialized, 0 when false, 1 when true. */
573 int written_at_least_once;
575 /* -1 when not initialized, 0 when false, 1 when true. */
576 int rw_unconditionally;
579 #define IFC_DR(DR) ((struct ifc_dr *) (DR)->aux)
580 #define DR_WRITTEN_AT_LEAST_ONCE(DR) (IFC_DR (DR)->written_at_least_once)
581 #define DR_RW_UNCONDITIONALLY(DR) (IFC_DR (DR)->rw_unconditionally)
583 /* Returns true when the memory references of STMT are read or written
584 unconditionally. In other words, this function returns true when
585 for every data reference A in STMT there exist other accesses to
586 a data reference with the same base with predicates that add up (OR-up) to
587 the true predicate: this ensures that the data reference A is touched
588 (read or written) on every iteration of the if-converted loop. */
590 static bool
591 memrefs_read_or_written_unconditionally (gimple stmt,
592 vec<data_reference_p> drs)
594 int i, j;
595 data_reference_p a, b;
596 tree ca = bb_predicate (gimple_bb (stmt));
598 for (i = 0; drs.iterate (i, &a); i++)
599 if (DR_STMT (a) == stmt)
601 bool found = false;
602 int x = DR_RW_UNCONDITIONALLY (a);
604 if (x == 0)
605 return false;
607 if (x == 1)
608 continue;
610 for (j = 0; drs.iterate (j, &b); j++)
612 tree ref_base_a = DR_REF (a);
613 tree ref_base_b = DR_REF (b);
615 if (DR_STMT (b) == stmt)
616 continue;
618 while (TREE_CODE (ref_base_a) == COMPONENT_REF
619 || TREE_CODE (ref_base_a) == IMAGPART_EXPR
620 || TREE_CODE (ref_base_a) == REALPART_EXPR)
621 ref_base_a = TREE_OPERAND (ref_base_a, 0);
623 while (TREE_CODE (ref_base_b) == COMPONENT_REF
624 || TREE_CODE (ref_base_b) == IMAGPART_EXPR
625 || TREE_CODE (ref_base_b) == REALPART_EXPR)
626 ref_base_b = TREE_OPERAND (ref_base_b, 0);
628 if (!operand_equal_p (ref_base_a, ref_base_b, 0))
630 tree cb = bb_predicate (gimple_bb (DR_STMT (b)));
632 if (DR_RW_UNCONDITIONALLY (b) == 1
633 || is_true_predicate (cb)
634 || is_true_predicate (ca
635 = fold_or_predicates (EXPR_LOCATION (cb), ca, cb)))
637 DR_RW_UNCONDITIONALLY (a) = 1;
638 DR_RW_UNCONDITIONALLY (b) = 1;
639 found = true;
640 break;
645 if (!found)
647 DR_RW_UNCONDITIONALLY (a) = 0;
648 return false;
652 return true;
655 /* Returns true when the memory references of STMT are unconditionally
656 written. In other words, this function returns true when for every
657 data reference A written in STMT, there exist other writes to the
658 same data reference with predicates that add up (OR-up) to the true
659 predicate: this ensures that the data reference A is written on
660 every iteration of the if-converted loop. */
662 static bool
663 write_memrefs_written_at_least_once (gimple stmt,
664 vec<data_reference_p> drs)
666 int i, j;
667 data_reference_p a, b;
668 tree ca = bb_predicate (gimple_bb (stmt));
670 for (i = 0; drs.iterate (i, &a); i++)
671 if (DR_STMT (a) == stmt
672 && DR_IS_WRITE (a))
674 bool found = false;
675 int x = DR_WRITTEN_AT_LEAST_ONCE (a);
677 if (x == 0)
678 return false;
680 if (x == 1)
681 continue;
683 for (j = 0; drs.iterate (j, &b); j++)
684 if (DR_STMT (b) != stmt
685 && DR_IS_WRITE (b)
686 && same_data_refs_base_objects (a, b))
688 tree cb = bb_predicate (gimple_bb (DR_STMT (b)));
690 if (DR_WRITTEN_AT_LEAST_ONCE (b) == 1
691 || is_true_predicate (cb)
692 || is_true_predicate (ca = fold_or_predicates (EXPR_LOCATION (cb),
693 ca, cb)))
695 DR_WRITTEN_AT_LEAST_ONCE (a) = 1;
696 DR_WRITTEN_AT_LEAST_ONCE (b) = 1;
697 found = true;
698 break;
702 if (!found)
704 DR_WRITTEN_AT_LEAST_ONCE (a) = 0;
705 return false;
709 return true;
712 /* Return true when the memory references of STMT won't trap in the
713 if-converted code. There are two things that we have to check for:
715 - writes to memory occur to writable memory: if-conversion of
716 memory writes transforms the conditional memory writes into
717 unconditional writes, i.e. "if (cond) A[i] = foo" is transformed
718 into "A[i] = cond ? foo : A[i]", and as the write to memory may not
719 be executed at all in the original code, it may be a readonly
720 memory. To check that A is not const-qualified, we check that
721 there exists at least an unconditional write to A in the current
722 function.
724 - reads or writes to memory are valid memory accesses for every
725 iteration. To check that the memory accesses are correctly formed
726 and that we are allowed to read and write in these locations, we
727 check that the memory accesses to be if-converted occur at every
728 iteration unconditionally. */
730 static bool
731 ifcvt_memrefs_wont_trap (gimple stmt, vec<data_reference_p> refs)
733 return write_memrefs_written_at_least_once (stmt, refs)
734 && memrefs_read_or_written_unconditionally (stmt, refs);
737 /* Wrapper around gimple_could_trap_p refined for the needs of the
738 if-conversion. Try to prove that the memory accesses of STMT could
739 not trap in the innermost loop containing STMT. */
741 static bool
742 ifcvt_could_trap_p (gimple stmt, vec<data_reference_p> refs)
744 if (gimple_vuse (stmt)
745 && !gimple_could_trap_p_1 (stmt, false, false)
746 && ifcvt_memrefs_wont_trap (stmt, refs))
747 return false;
749 return gimple_could_trap_p (stmt);
752 /* Return true if STMT could be converted into a masked load or store
753 (conditional load or store based on a mask computed from bb predicate). */
755 static bool
756 ifcvt_can_use_mask_load_store (gimple stmt)
758 tree lhs, ref;
759 machine_mode mode;
760 basic_block bb = gimple_bb (stmt);
761 bool is_load;
763 if (!(flag_tree_loop_vectorize || bb->loop_father->force_vectorize)
764 || bb->loop_father->dont_vectorize
765 || !gimple_assign_single_p (stmt)
766 || gimple_has_volatile_ops (stmt))
767 return false;
769 /* Check whether this is a load or store. */
770 lhs = gimple_assign_lhs (stmt);
771 if (gimple_store_p (stmt))
773 if (!is_gimple_val (gimple_assign_rhs1 (stmt)))
774 return false;
775 is_load = false;
776 ref = lhs;
778 else if (gimple_assign_load_p (stmt))
780 is_load = true;
781 ref = gimple_assign_rhs1 (stmt);
783 else
784 return false;
786 if (may_be_nonaddressable_p (ref))
787 return false;
789 /* Mask should be integer mode of the same size as the load/store
790 mode. */
791 mode = TYPE_MODE (TREE_TYPE (lhs));
792 if (int_mode_for_mode (mode) == BLKmode
793 || VECTOR_MODE_P (mode))
794 return false;
796 if (can_vec_mask_load_store_p (mode, is_load))
797 return true;
799 return false;
802 /* Return true when STMT is if-convertible.
804 GIMPLE_ASSIGN statement is not if-convertible if,
805 - it is not movable,
806 - it could trap,
807 - LHS is not var decl. */
809 static bool
810 if_convertible_gimple_assign_stmt_p (gimple stmt,
811 vec<data_reference_p> refs,
812 bool *any_mask_load_store)
814 tree lhs = gimple_assign_lhs (stmt);
815 basic_block bb;
817 if (dump_file && (dump_flags & TDF_DETAILS))
819 fprintf (dump_file, "-------------------------\n");
820 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
823 if (!is_gimple_reg_type (TREE_TYPE (lhs)))
824 return false;
826 /* Some of these constrains might be too conservative. */
827 if (stmt_ends_bb_p (stmt)
828 || gimple_has_volatile_ops (stmt)
829 || (TREE_CODE (lhs) == SSA_NAME
830 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
831 || gimple_has_side_effects (stmt))
833 if (dump_file && (dump_flags & TDF_DETAILS))
834 fprintf (dump_file, "stmt not suitable for ifcvt\n");
835 return false;
838 /* tree-into-ssa.c uses GF_PLF_1, so avoid it, because
839 in between if_convertible_loop_p and combine_blocks
840 we can perform loop versioning. */
841 gimple_set_plf (stmt, GF_PLF_2, false);
843 if (flag_tree_loop_if_convert_stores)
845 if (ifcvt_could_trap_p (stmt, refs))
847 if (ifcvt_can_use_mask_load_store (stmt))
849 gimple_set_plf (stmt, GF_PLF_2, true);
850 *any_mask_load_store = true;
851 return true;
853 if (dump_file && (dump_flags & TDF_DETAILS))
854 fprintf (dump_file, "tree could trap...\n");
855 return false;
857 return true;
860 if (gimple_assign_rhs_could_trap_p (stmt))
862 if (ifcvt_can_use_mask_load_store (stmt))
864 gimple_set_plf (stmt, GF_PLF_2, true);
865 *any_mask_load_store = true;
866 return true;
868 if (dump_file && (dump_flags & TDF_DETAILS))
869 fprintf (dump_file, "tree could trap...\n");
870 return false;
873 bb = gimple_bb (stmt);
875 if (TREE_CODE (lhs) != SSA_NAME
876 && bb != bb->loop_father->header
877 && !bb_with_exit_edge_p (bb->loop_father, bb))
879 if (ifcvt_can_use_mask_load_store (stmt))
881 gimple_set_plf (stmt, GF_PLF_2, true);
882 *any_mask_load_store = true;
883 return true;
885 if (dump_file && (dump_flags & TDF_DETAILS))
887 fprintf (dump_file, "LHS is not var\n");
888 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
890 return false;
893 return true;
896 /* Return true when STMT is if-convertible.
898 A statement is if-convertible if:
899 - it is an if-convertible GIMPLE_ASSIGN,
900 - it is a GIMPLE_LABEL or a GIMPLE_COND. */
902 static bool
903 if_convertible_stmt_p (gimple stmt, vec<data_reference_p> refs,
904 bool *any_mask_load_store)
906 switch (gimple_code (stmt))
908 case GIMPLE_LABEL:
909 case GIMPLE_DEBUG:
910 case GIMPLE_COND:
911 return true;
913 case GIMPLE_ASSIGN:
914 return if_convertible_gimple_assign_stmt_p (stmt, refs,
915 any_mask_load_store);
917 case GIMPLE_CALL:
919 tree fndecl = gimple_call_fndecl (stmt);
920 if (fndecl)
922 int flags = gimple_call_flags (stmt);
923 if ((flags & ECF_CONST)
924 && !(flags & ECF_LOOPING_CONST_OR_PURE)
925 /* We can only vectorize some builtins at the moment,
926 so restrict if-conversion to those. */
927 && DECL_BUILT_IN (fndecl))
928 return true;
930 return false;
933 default:
934 /* Don't know what to do with 'em so don't do anything. */
935 if (dump_file && (dump_flags & TDF_DETAILS))
937 fprintf (dump_file, "don't know what to do\n");
938 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
940 return false;
941 break;
944 return true;
947 /* Return true when BB is if-convertible. This routine does not check
948 basic block's statements and phis.
950 A basic block is not if-convertible if:
951 - it is non-empty and it is after the exit block (in BFS order),
952 - it is after the exit block but before the latch,
953 - its edges are not normal.
955 EXIT_BB is the basic block containing the exit of the LOOP. BB is
956 inside LOOP. */
958 static bool
959 if_convertible_bb_p (struct loop *loop, basic_block bb, basic_block exit_bb)
961 edge e;
962 edge_iterator ei;
964 if (dump_file && (dump_flags & TDF_DETAILS))
965 fprintf (dump_file, "----------[%d]-------------\n", bb->index);
967 if (EDGE_COUNT (bb->preds) > 2
968 || EDGE_COUNT (bb->succs) > 2)
969 return false;
971 if (exit_bb)
973 if (bb != loop->latch)
975 if (dump_file && (dump_flags & TDF_DETAILS))
976 fprintf (dump_file, "basic block after exit bb but before latch\n");
977 return false;
979 else if (!empty_block_p (bb))
981 if (dump_file && (dump_flags & TDF_DETAILS))
982 fprintf (dump_file, "non empty basic block after exit bb\n");
983 return false;
985 else if (bb == loop->latch
986 && bb != exit_bb
987 && !dominated_by_p (CDI_DOMINATORS, bb, exit_bb))
989 if (dump_file && (dump_flags & TDF_DETAILS))
990 fprintf (dump_file, "latch is not dominated by exit_block\n");
991 return false;
995 /* Be less adventurous and handle only normal edges. */
996 FOR_EACH_EDGE (e, ei, bb->succs)
997 if (e->flags & (EDGE_EH | EDGE_ABNORMAL | EDGE_IRREDUCIBLE_LOOP))
999 if (dump_file && (dump_flags & TDF_DETAILS))
1000 fprintf (dump_file, "Difficult to handle edges\n");
1001 return false;
1004 /* At least one incoming edge has to be non-critical as otherwise edge
1005 predicates are not equal to basic-block predicates of the edge
1006 source. */
1007 if (EDGE_COUNT (bb->preds) > 1
1008 && bb != loop->header)
1010 bool found = false;
1011 FOR_EACH_EDGE (e, ei, bb->preds)
1012 if (EDGE_COUNT (e->src->succs) == 1)
1013 found = true;
1014 if (!found)
1016 if (dump_file && (dump_flags & TDF_DETAILS))
1017 fprintf (dump_file, "only critical predecessors\n");
1018 return false;
1022 return true;
1025 /* Return true when all predecessor blocks of BB are visited. The
1026 VISITED bitmap keeps track of the visited blocks. */
1028 static bool
1029 pred_blocks_visited_p (basic_block bb, bitmap *visited)
1031 edge e;
1032 edge_iterator ei;
1033 FOR_EACH_EDGE (e, ei, bb->preds)
1034 if (!bitmap_bit_p (*visited, e->src->index))
1035 return false;
1037 return true;
1040 /* Get body of a LOOP in suitable order for if-conversion. It is
1041 caller's responsibility to deallocate basic block list.
1042 If-conversion suitable order is, breadth first sort (BFS) order
1043 with an additional constraint: select a block only if all its
1044 predecessors are already selected. */
1046 static basic_block *
1047 get_loop_body_in_if_conv_order (const struct loop *loop)
1049 basic_block *blocks, *blocks_in_bfs_order;
1050 basic_block bb;
1051 bitmap visited;
1052 unsigned int index = 0;
1053 unsigned int visited_count = 0;
1055 gcc_assert (loop->num_nodes);
1056 gcc_assert (loop->latch != EXIT_BLOCK_PTR_FOR_FN (cfun));
1058 blocks = XCNEWVEC (basic_block, loop->num_nodes);
1059 visited = BITMAP_ALLOC (NULL);
1061 blocks_in_bfs_order = get_loop_body_in_bfs_order (loop);
1063 index = 0;
1064 while (index < loop->num_nodes)
1066 bb = blocks_in_bfs_order [index];
1068 if (bb->flags & BB_IRREDUCIBLE_LOOP)
1070 free (blocks_in_bfs_order);
1071 BITMAP_FREE (visited);
1072 free (blocks);
1073 return NULL;
1076 if (!bitmap_bit_p (visited, bb->index))
1078 if (pred_blocks_visited_p (bb, &visited)
1079 || bb == loop->header)
1081 /* This block is now visited. */
1082 bitmap_set_bit (visited, bb->index);
1083 blocks[visited_count++] = bb;
1087 index++;
1089 if (index == loop->num_nodes
1090 && visited_count != loop->num_nodes)
1091 /* Not done yet. */
1092 index = 0;
1094 free (blocks_in_bfs_order);
1095 BITMAP_FREE (visited);
1096 return blocks;
1099 /* Returns true when the analysis of the predicates for all the basic
1100 blocks in LOOP succeeded.
1102 predicate_bbs first allocates the predicates of the basic blocks.
1103 These fields are then initialized with the tree expressions
1104 representing the predicates under which a basic block is executed
1105 in the LOOP. As the loop->header is executed at each iteration, it
1106 has the "true" predicate. Other statements executed under a
1107 condition are predicated with that condition, for example
1109 | if (x)
1110 | S1;
1111 | else
1112 | S2;
1114 S1 will be predicated with "x", and
1115 S2 will be predicated with "!x". */
1117 static void
1118 predicate_bbs (loop_p loop)
1120 unsigned int i;
1122 for (i = 0; i < loop->num_nodes; i++)
1123 init_bb_predicate (ifc_bbs[i]);
1125 for (i = 0; i < loop->num_nodes; i++)
1127 basic_block bb = ifc_bbs[i];
1128 tree cond;
1129 gimple stmt;
1131 /* The loop latch is always executed and has no extra conditions
1132 to be processed: skip it. */
1133 if (bb == loop->latch)
1135 reset_bb_predicate (loop->latch);
1136 continue;
1139 cond = bb_predicate (bb);
1140 stmt = last_stmt (bb);
1141 if (stmt && gimple_code (stmt) == GIMPLE_COND)
1143 tree c2;
1144 edge true_edge, false_edge;
1145 location_t loc = gimple_location (stmt);
1146 tree c = fold_build2_loc (loc, gimple_cond_code (stmt),
1147 boolean_type_node,
1148 gimple_cond_lhs (stmt),
1149 gimple_cond_rhs (stmt));
1151 /* Add new condition into destination's predicate list. */
1152 extract_true_false_edges_from_block (gimple_bb (stmt),
1153 &true_edge, &false_edge);
1155 /* If C is true, then TRUE_EDGE is taken. */
1156 add_to_dst_predicate_list (loop, true_edge, unshare_expr (cond),
1157 unshare_expr (c));
1159 /* If C is false, then FALSE_EDGE is taken. */
1160 c2 = build1_loc (loc, TRUTH_NOT_EXPR, boolean_type_node,
1161 unshare_expr (c));
1162 add_to_dst_predicate_list (loop, false_edge,
1163 unshare_expr (cond), c2);
1165 cond = NULL_TREE;
1168 /* If current bb has only one successor, then consider it as an
1169 unconditional goto. */
1170 if (single_succ_p (bb))
1172 basic_block bb_n = single_succ (bb);
1174 /* The successor bb inherits the predicate of its
1175 predecessor. If there is no predicate in the predecessor
1176 bb, then consider the successor bb as always executed. */
1177 if (cond == NULL_TREE)
1178 cond = boolean_true_node;
1180 add_to_predicate_list (loop, bb_n, cond);
1184 /* The loop header is always executed. */
1185 reset_bb_predicate (loop->header);
1186 gcc_assert (bb_predicate_gimplified_stmts (loop->header) == NULL
1187 && bb_predicate_gimplified_stmts (loop->latch) == NULL);
1190 /* Return true when LOOP is if-convertible. This is a helper function
1191 for if_convertible_loop_p. REFS and DDRS are initialized and freed
1192 in if_convertible_loop_p. */
1194 static bool
1195 if_convertible_loop_p_1 (struct loop *loop,
1196 vec<loop_p> *loop_nest,
1197 vec<data_reference_p> *refs,
1198 vec<ddr_p> *ddrs, bool *any_mask_load_store)
1200 bool res;
1201 unsigned int i;
1202 basic_block exit_bb = NULL;
1204 /* Don't if-convert the loop when the data dependences cannot be
1205 computed: the loop won't be vectorized in that case. */
1206 res = compute_data_dependences_for_loop (loop, true, loop_nest, refs, ddrs);
1207 if (!res)
1208 return false;
1210 calculate_dominance_info (CDI_DOMINATORS);
1211 calculate_dominance_info (CDI_POST_DOMINATORS);
1213 /* Allow statements that can be handled during if-conversion. */
1214 ifc_bbs = get_loop_body_in_if_conv_order (loop);
1215 if (!ifc_bbs)
1217 if (dump_file && (dump_flags & TDF_DETAILS))
1218 fprintf (dump_file, "Irreducible loop\n");
1219 return false;
1222 for (i = 0; i < loop->num_nodes; i++)
1224 basic_block bb = ifc_bbs[i];
1226 if (!if_convertible_bb_p (loop, bb, exit_bb))
1227 return false;
1229 if (bb_with_exit_edge_p (loop, bb))
1230 exit_bb = bb;
1233 for (i = 0; i < loop->num_nodes; i++)
1235 basic_block bb = ifc_bbs[i];
1236 gimple_stmt_iterator gsi;
1238 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1239 switch (gimple_code (gsi_stmt (gsi)))
1241 case GIMPLE_LABEL:
1242 case GIMPLE_ASSIGN:
1243 case GIMPLE_CALL:
1244 case GIMPLE_DEBUG:
1245 case GIMPLE_COND:
1246 break;
1247 default:
1248 return false;
1252 if (flag_tree_loop_if_convert_stores)
1254 data_reference_p dr;
1256 for (i = 0; refs->iterate (i, &dr); i++)
1258 dr->aux = XNEW (struct ifc_dr);
1259 DR_WRITTEN_AT_LEAST_ONCE (dr) = -1;
1260 DR_RW_UNCONDITIONALLY (dr) = -1;
1262 predicate_bbs (loop);
1265 for (i = 0; i < loop->num_nodes; i++)
1267 basic_block bb = ifc_bbs[i];
1268 gimple_stmt_iterator itr;
1270 /* Check the if-convertibility of statements in predicated BBs. */
1271 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1272 for (itr = gsi_start_bb (bb); !gsi_end_p (itr); gsi_next (&itr))
1273 if (!if_convertible_stmt_p (gsi_stmt (itr), *refs,
1274 any_mask_load_store))
1275 return false;
1278 if (flag_tree_loop_if_convert_stores)
1279 for (i = 0; i < loop->num_nodes; i++)
1280 free_bb_predicate (ifc_bbs[i]);
1282 /* Checking PHIs needs to be done after stmts, as the fact whether there
1283 are any masked loads or stores affects the tests. */
1284 for (i = 0; i < loop->num_nodes; i++)
1286 basic_block bb = ifc_bbs[i];
1287 gphi_iterator itr;
1289 for (itr = gsi_start_phis (bb); !gsi_end_p (itr); gsi_next (&itr))
1290 if (!if_convertible_phi_p (loop, bb, itr.phi (),
1291 *any_mask_load_store))
1292 return false;
1295 if (dump_file)
1296 fprintf (dump_file, "Applying if-conversion\n");
1298 return true;
1301 /* Return true when LOOP is if-convertible.
1302 LOOP is if-convertible if:
1303 - it is innermost,
1304 - it has two or more basic blocks,
1305 - it has only one exit,
1306 - loop header is not the exit edge,
1307 - if its basic blocks and phi nodes are if convertible. */
1309 static bool
1310 if_convertible_loop_p (struct loop *loop, bool *any_mask_load_store)
1312 edge e;
1313 edge_iterator ei;
1314 bool res = false;
1315 vec<data_reference_p> refs;
1316 vec<ddr_p> ddrs;
1318 /* Handle only innermost loop. */
1319 if (!loop || loop->inner)
1321 if (dump_file && (dump_flags & TDF_DETAILS))
1322 fprintf (dump_file, "not innermost loop\n");
1323 return false;
1326 /* If only one block, no need for if-conversion. */
1327 if (loop->num_nodes <= 2)
1329 if (dump_file && (dump_flags & TDF_DETAILS))
1330 fprintf (dump_file, "less than 2 basic blocks\n");
1331 return false;
1334 /* More than one loop exit is too much to handle. */
1335 if (!single_exit (loop))
1337 if (dump_file && (dump_flags & TDF_DETAILS))
1338 fprintf (dump_file, "multiple exits\n");
1339 return false;
1342 /* If one of the loop header's edge is an exit edge then do not
1343 apply if-conversion. */
1344 FOR_EACH_EDGE (e, ei, loop->header->succs)
1345 if (loop_exit_edge_p (loop, e))
1346 return false;
1348 refs.create (5);
1349 ddrs.create (25);
1350 auto_vec<loop_p, 3> loop_nest;
1351 res = if_convertible_loop_p_1 (loop, &loop_nest, &refs, &ddrs,
1352 any_mask_load_store);
1354 if (flag_tree_loop_if_convert_stores)
1356 data_reference_p dr;
1357 unsigned int i;
1359 for (i = 0; refs.iterate (i, &dr); i++)
1360 free (dr->aux);
1363 free_data_refs (refs);
1364 free_dependence_relations (ddrs);
1365 return res;
1368 /* Basic block BB has two predecessors. Using predecessor's bb
1369 predicate, set an appropriate condition COND for the PHI node
1370 replacement. Return the true block whose phi arguments are
1371 selected when cond is true. LOOP is the loop containing the
1372 if-converted region, GSI is the place to insert the code for the
1373 if-conversion. */
1375 static basic_block
1376 find_phi_replacement_condition (basic_block bb, tree *cond,
1377 gimple_stmt_iterator *gsi)
1379 edge first_edge, second_edge;
1380 tree tmp_cond;
1382 gcc_assert (EDGE_COUNT (bb->preds) == 2);
1383 first_edge = EDGE_PRED (bb, 0);
1384 second_edge = EDGE_PRED (bb, 1);
1386 /* Prefer an edge with a not negated predicate.
1387 ??? That's a very weak cost model. */
1388 tmp_cond = bb_predicate (first_edge->src);
1389 gcc_assert (tmp_cond);
1390 if (TREE_CODE (tmp_cond) == TRUTH_NOT_EXPR)
1392 edge tmp_edge;
1394 tmp_edge = first_edge;
1395 first_edge = second_edge;
1396 second_edge = tmp_edge;
1399 /* Check if the edge we take the condition from is not critical.
1400 We know that at least one non-critical edge exists. */
1401 if (EDGE_COUNT (first_edge->src->succs) > 1)
1403 *cond = bb_predicate (second_edge->src);
1405 if (TREE_CODE (*cond) == TRUTH_NOT_EXPR)
1406 *cond = TREE_OPERAND (*cond, 0);
1407 else
1408 /* Select non loop header bb. */
1409 first_edge = second_edge;
1411 else
1412 *cond = bb_predicate (first_edge->src);
1414 /* Gimplify the condition to a valid cond-expr conditonal operand. */
1415 *cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (*cond),
1416 is_gimple_condexpr, NULL_TREE,
1417 true, GSI_SAME_STMT);
1419 return first_edge->src;
1422 /* Returns true if def-stmt for phi argument ARG is simple increment/decrement
1423 which is in predicated basic block.
1424 In fact, the following PHI pattern is searching:
1425 loop-header:
1426 reduc_1 = PHI <..., reduc_2>
1428 if (...)
1429 reduc_3 = ...
1430 reduc_2 = PHI <reduc_1, reduc_3>
1432 REDUC, OP0 and OP1 contain reduction stmt and its operands. */
1434 static bool
1435 is_cond_scalar_reduction (gimple phi, gimple *reduc,
1436 tree *op0, tree *op1)
1438 tree lhs, r_op1, r_op2;
1439 tree arg_0, arg_1;
1440 gimple stmt;
1441 gimple header_phi = NULL;
1442 enum tree_code reduction_op;
1443 basic_block bb = gimple_bb (phi);
1444 struct loop *loop = bb->loop_father;
1445 edge latch_e = loop_latch_edge (loop);
1446 imm_use_iterator imm_iter;
1447 use_operand_p use_p;
1449 arg_0 = PHI_ARG_DEF (phi, 0);
1450 arg_1 = PHI_ARG_DEF (phi, 1);
1451 if (TREE_CODE (arg_0) != SSA_NAME || TREE_CODE (arg_1) != SSA_NAME)
1452 return false;
1454 if (gimple_code (SSA_NAME_DEF_STMT (arg_0)) == GIMPLE_PHI)
1456 lhs = arg_1;
1457 header_phi = SSA_NAME_DEF_STMT (arg_0);
1458 stmt = SSA_NAME_DEF_STMT (arg_1);
1460 else if (gimple_code (SSA_NAME_DEF_STMT (arg_1)) == GIMPLE_PHI)
1462 lhs = arg_0;
1463 header_phi = SSA_NAME_DEF_STMT (arg_1);
1464 stmt = SSA_NAME_DEF_STMT (arg_0);
1466 else
1467 return false;
1468 if (gimple_bb (header_phi) != loop->header)
1469 return false;
1471 if (PHI_ARG_DEF_FROM_EDGE (header_phi, latch_e) != PHI_RESULT (phi))
1472 return false;
1474 if (gimple_code (stmt) != GIMPLE_ASSIGN
1475 || gimple_has_volatile_ops (stmt))
1476 return false;
1478 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
1479 return false;
1481 if (!is_predicated (gimple_bb (stmt)))
1482 return false;
1484 /* Check that stmt-block is predecessor of phi-block. */
1485 if (EDGE_PRED (bb, 0)->src != gimple_bb (stmt)
1486 && EDGE_PRED (bb, 1)->src != gimple_bb (stmt))
1487 return false;
1489 if (!has_single_use (lhs))
1490 return false;
1492 reduction_op = gimple_assign_rhs_code (stmt);
1493 if (reduction_op != PLUS_EXPR && reduction_op != MINUS_EXPR)
1494 return false;
1495 r_op1 = gimple_assign_rhs1 (stmt);
1496 r_op2 = gimple_assign_rhs2 (stmt);
1498 /* Make R_OP1 to hold reduction variable. */
1499 if (r_op2 == PHI_RESULT (header_phi)
1500 && reduction_op == PLUS_EXPR)
1502 tree tmp = r_op1;
1503 r_op1 = r_op2;
1504 r_op2 = tmp;
1506 else if (r_op1 != PHI_RESULT (header_phi))
1507 return false;
1509 /* Check that R_OP1 is used in reduction stmt or in PHI only. */
1510 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_op1)
1512 gimple use_stmt = USE_STMT (use_p);
1513 if (is_gimple_debug (use_stmt))
1514 continue;
1515 if (use_stmt == stmt)
1516 continue;
1517 if (gimple_code (use_stmt) != GIMPLE_PHI)
1518 return false;
1521 *op0 = r_op1; *op1 = r_op2;
1522 *reduc = stmt;
1523 return true;
1526 /* Converts conditional scalar reduction into unconditional form, e.g.
1527 bb_4
1528 if (_5 != 0) goto bb_5 else goto bb_6
1529 end_bb_4
1530 bb_5
1531 res_6 = res_13 + 1;
1532 end_bb_5
1533 bb_6
1534 # res_2 = PHI <res_13(4), res_6(5)>
1535 end_bb_6
1537 will be converted into sequence
1538 _ifc__1 = _5 != 0 ? 1 : 0;
1539 res_2 = res_13 + _ifc__1;
1540 Argument SWAP tells that arguments of conditional expression should be
1541 swapped.
1542 Returns rhs of resulting PHI assignment. */
1544 static tree
1545 convert_scalar_cond_reduction (gimple reduc, gimple_stmt_iterator *gsi,
1546 tree cond, tree op0, tree op1, bool swap)
1548 gimple_stmt_iterator stmt_it;
1549 gimple new_assign;
1550 tree rhs;
1551 tree rhs1 = gimple_assign_rhs1 (reduc);
1552 tree tmp = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, "_ifc_");
1553 tree c;
1554 tree zero = build_zero_cst (TREE_TYPE (rhs1));
1556 if (dump_file && (dump_flags & TDF_DETAILS))
1558 fprintf (dump_file, "Found cond scalar reduction.\n");
1559 print_gimple_stmt (dump_file, reduc, 0, TDF_SLIM);
1562 /* Build cond expression using COND and constant operand
1563 of reduction rhs. */
1564 c = fold_build_cond_expr (TREE_TYPE (rhs1),
1565 unshare_expr (cond),
1566 swap ? zero : op1,
1567 swap ? op1 : zero);
1569 /* Create assignment stmt and insert it at GSI. */
1570 new_assign = gimple_build_assign (tmp, c);
1571 gsi_insert_before (gsi, new_assign, GSI_SAME_STMT);
1572 /* Build rhs for unconditional increment/decrement. */
1573 rhs = fold_build2 (gimple_assign_rhs_code (reduc),
1574 TREE_TYPE (rhs1), op0, tmp);
1576 /* Delete original reduction stmt. */
1577 stmt_it = gsi_for_stmt (reduc);
1578 gsi_remove (&stmt_it, true);
1579 release_defs (reduc);
1580 return rhs;
1583 /* Replace a scalar PHI node with a COND_EXPR using COND as condition.
1584 This routine does not handle PHI nodes with more than two
1585 arguments.
1587 For example,
1588 S1: A = PHI <x1(1), x2(5)>
1589 is converted into,
1590 S2: A = cond ? x1 : x2;
1592 The generated code is inserted at GSI that points to the top of
1593 basic block's statement list. When COND is true, phi arg from
1594 TRUE_BB is selected. */
1596 static void
1597 predicate_scalar_phi (gphi *phi, tree cond,
1598 basic_block true_bb,
1599 gimple_stmt_iterator *gsi)
1601 gimple new_stmt;
1602 basic_block bb;
1603 tree rhs, res, arg, scev;
1605 gcc_assert (gimple_code (phi) == GIMPLE_PHI
1606 && gimple_phi_num_args (phi) == 2);
1608 res = gimple_phi_result (phi);
1609 /* Do not handle virtual phi nodes. */
1610 if (virtual_operand_p (res))
1611 return;
1613 bb = gimple_bb (phi);
1615 if ((arg = degenerate_phi_result (phi))
1616 || ((scev = analyze_scalar_evolution (gimple_bb (phi)->loop_father,
1617 res))
1618 && !chrec_contains_undetermined (scev)
1619 && scev != res
1620 && (arg = gimple_phi_arg_def (phi, 0))))
1621 rhs = arg;
1622 else
1624 tree arg_0, arg_1;
1625 tree op0, op1;
1626 gimple reduc;
1628 /* Use condition that is not TRUTH_NOT_EXPR in conditional modify expr. */
1629 if (EDGE_PRED (bb, 1)->src == true_bb)
1631 arg_0 = gimple_phi_arg_def (phi, 1);
1632 arg_1 = gimple_phi_arg_def (phi, 0);
1634 else
1636 arg_0 = gimple_phi_arg_def (phi, 0);
1637 arg_1 = gimple_phi_arg_def (phi, 1);
1639 if (is_cond_scalar_reduction (phi, &reduc, &op0, &op1))
1640 /* Convert reduction stmt into vectorizable form. */
1641 rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
1642 true_bb != gimple_bb (reduc));
1643 else
1644 /* Build new RHS using selected condition and arguments. */
1645 rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
1646 arg_0, arg_1);
1649 new_stmt = gimple_build_assign (res, rhs);
1650 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1651 update_stmt (new_stmt);
1653 if (dump_file && (dump_flags & TDF_DETAILS))
1655 fprintf (dump_file, "new phi replacement stmt\n");
1656 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
1660 /* Replaces in LOOP all the scalar phi nodes other than those in the
1661 LOOP->header block with conditional modify expressions. */
1663 static void
1664 predicate_all_scalar_phis (struct loop *loop)
1666 basic_block bb;
1667 unsigned int orig_loop_num_nodes = loop->num_nodes;
1668 unsigned int i;
1670 for (i = 1; i < orig_loop_num_nodes; i++)
1672 gphi *phi;
1673 tree cond = NULL_TREE;
1674 gimple_stmt_iterator gsi;
1675 gphi_iterator phi_gsi;
1676 basic_block true_bb = NULL;
1677 bb = ifc_bbs[i];
1679 if (bb == loop->header)
1680 continue;
1682 phi_gsi = gsi_start_phis (bb);
1683 if (gsi_end_p (phi_gsi))
1684 continue;
1686 /* BB has two predecessors. Using predecessor's aux field, set
1687 appropriate condition for the PHI node replacement. */
1688 gsi = gsi_after_labels (bb);
1689 true_bb = find_phi_replacement_condition (bb, &cond, &gsi);
1691 while (!gsi_end_p (phi_gsi))
1693 phi = phi_gsi.phi ();
1694 predicate_scalar_phi (phi, cond, true_bb, &gsi);
1695 release_phi_node (phi);
1696 gsi_next (&phi_gsi);
1699 set_phi_nodes (bb, NULL);
1703 /* Insert in each basic block of LOOP the statements produced by the
1704 gimplification of the predicates. */
1706 static void
1707 insert_gimplified_predicates (loop_p loop, bool any_mask_load_store)
1709 unsigned int i;
1711 for (i = 0; i < loop->num_nodes; i++)
1713 basic_block bb = ifc_bbs[i];
1714 gimple_seq stmts;
1716 if (!is_predicated (bb))
1718 /* Do not insert statements for a basic block that is not
1719 predicated. Also make sure that the predicate of the
1720 basic block is set to true. */
1721 reset_bb_predicate (bb);
1722 continue;
1725 stmts = bb_predicate_gimplified_stmts (bb);
1726 if (stmts)
1728 if (flag_tree_loop_if_convert_stores
1729 || any_mask_load_store)
1731 /* Insert the predicate of the BB just after the label,
1732 as the if-conversion of memory writes will use this
1733 predicate. */
1734 gimple_stmt_iterator gsi = gsi_after_labels (bb);
1735 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1737 else
1739 /* Insert the predicate of the BB at the end of the BB
1740 as this would reduce the register pressure: the only
1741 use of this predicate will be in successor BBs. */
1742 gimple_stmt_iterator gsi = gsi_last_bb (bb);
1744 if (gsi_end_p (gsi)
1745 || stmt_ends_bb_p (gsi_stmt (gsi)))
1746 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1747 else
1748 gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
1751 /* Once the sequence is code generated, set it to NULL. */
1752 set_bb_predicate_gimplified_stmts (bb, NULL);
1757 /* Predicate each write to memory in LOOP.
1759 This function transforms control flow constructs containing memory
1760 writes of the form:
1762 | for (i = 0; i < N; i++)
1763 | if (cond)
1764 | A[i] = expr;
1766 into the following form that does not contain control flow:
1768 | for (i = 0; i < N; i++)
1769 | A[i] = cond ? expr : A[i];
1771 The original CFG looks like this:
1773 | bb_0
1774 | i = 0
1775 | end_bb_0
1777 | bb_1
1778 | if (i < N) goto bb_5 else goto bb_2
1779 | end_bb_1
1781 | bb_2
1782 | cond = some_computation;
1783 | if (cond) goto bb_3 else goto bb_4
1784 | end_bb_2
1786 | bb_3
1787 | A[i] = expr;
1788 | goto bb_4
1789 | end_bb_3
1791 | bb_4
1792 | goto bb_1
1793 | end_bb_4
1795 insert_gimplified_predicates inserts the computation of the COND
1796 expression at the beginning of the destination basic block:
1798 | bb_0
1799 | i = 0
1800 | end_bb_0
1802 | bb_1
1803 | if (i < N) goto bb_5 else goto bb_2
1804 | end_bb_1
1806 | bb_2
1807 | cond = some_computation;
1808 | if (cond) goto bb_3 else goto bb_4
1809 | end_bb_2
1811 | bb_3
1812 | cond = some_computation;
1813 | A[i] = expr;
1814 | goto bb_4
1815 | end_bb_3
1817 | bb_4
1818 | goto bb_1
1819 | end_bb_4
1821 predicate_mem_writes is then predicating the memory write as follows:
1823 | bb_0
1824 | i = 0
1825 | end_bb_0
1827 | bb_1
1828 | if (i < N) goto bb_5 else goto bb_2
1829 | end_bb_1
1831 | bb_2
1832 | if (cond) goto bb_3 else goto bb_4
1833 | end_bb_2
1835 | bb_3
1836 | cond = some_computation;
1837 | A[i] = cond ? expr : A[i];
1838 | goto bb_4
1839 | end_bb_3
1841 | bb_4
1842 | goto bb_1
1843 | end_bb_4
1845 and finally combine_blocks removes the basic block boundaries making
1846 the loop vectorizable:
1848 | bb_0
1849 | i = 0
1850 | if (i < N) goto bb_5 else goto bb_1
1851 | end_bb_0
1853 | bb_1
1854 | cond = some_computation;
1855 | A[i] = cond ? expr : A[i];
1856 | if (i < N) goto bb_5 else goto bb_4
1857 | end_bb_1
1859 | bb_4
1860 | goto bb_1
1861 | end_bb_4
1864 static void
1865 predicate_mem_writes (loop_p loop)
1867 unsigned int i, orig_loop_num_nodes = loop->num_nodes;
1869 for (i = 1; i < orig_loop_num_nodes; i++)
1871 gimple_stmt_iterator gsi;
1872 basic_block bb = ifc_bbs[i];
1873 tree cond = bb_predicate (bb);
1874 bool swap;
1875 gimple stmt;
1877 if (is_true_predicate (cond))
1878 continue;
1880 swap = false;
1881 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1883 swap = true;
1884 cond = TREE_OPERAND (cond, 0);
1887 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1888 if (!gimple_assign_single_p (stmt = gsi_stmt (gsi)))
1889 continue;
1890 else if (gimple_plf (stmt, GF_PLF_2))
1892 tree lhs = gimple_assign_lhs (stmt);
1893 tree rhs = gimple_assign_rhs1 (stmt);
1894 tree ref, addr, ptr, masktype, mask_op0, mask_op1, mask;
1895 gimple new_stmt;
1896 int bitsize = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (lhs)));
1898 masktype = build_nonstandard_integer_type (bitsize, 1);
1899 mask_op0 = build_int_cst (masktype, swap ? 0 : -1);
1900 mask_op1 = build_int_cst (masktype, swap ? -1 : 0);
1901 ref = TREE_CODE (lhs) == SSA_NAME ? rhs : lhs;
1902 mark_addressable (ref);
1903 addr = force_gimple_operand_gsi (&gsi, build_fold_addr_expr (ref),
1904 true, NULL_TREE, true,
1905 GSI_SAME_STMT);
1906 cond = force_gimple_operand_gsi_1 (&gsi, unshare_expr (cond),
1907 is_gimple_condexpr, NULL_TREE,
1908 true, GSI_SAME_STMT);
1909 mask = fold_build_cond_expr (masktype, unshare_expr (cond),
1910 mask_op0, mask_op1);
1911 mask = ifc_temp_var (masktype, mask, &gsi);
1912 ptr = build_int_cst (reference_alias_ptr_type (ref), 0);
1913 /* Copy points-to info if possible. */
1914 if (TREE_CODE (addr) == SSA_NAME && !SSA_NAME_PTR_INFO (addr))
1915 copy_ref_info (build2 (MEM_REF, TREE_TYPE (ref), addr, ptr),
1916 ref);
1917 if (TREE_CODE (lhs) == SSA_NAME)
1919 new_stmt
1920 = gimple_build_call_internal (IFN_MASK_LOAD, 3, addr,
1921 ptr, mask);
1922 gimple_call_set_lhs (new_stmt, lhs);
1924 else
1925 new_stmt
1926 = gimple_build_call_internal (IFN_MASK_STORE, 4, addr, ptr,
1927 mask, rhs);
1928 gsi_replace (&gsi, new_stmt, true);
1930 else if (gimple_vdef (stmt))
1932 tree lhs = gimple_assign_lhs (stmt);
1933 tree rhs = gimple_assign_rhs1 (stmt);
1934 tree type = TREE_TYPE (lhs);
1936 lhs = ifc_temp_var (type, unshare_expr (lhs), &gsi);
1937 rhs = ifc_temp_var (type, unshare_expr (rhs), &gsi);
1938 if (swap)
1940 tree tem = lhs;
1941 lhs = rhs;
1942 rhs = tem;
1944 cond = force_gimple_operand_gsi_1 (&gsi, unshare_expr (cond),
1945 is_gimple_condexpr, NULL_TREE,
1946 true, GSI_SAME_STMT);
1947 rhs = fold_build_cond_expr (type, unshare_expr (cond), rhs, lhs);
1948 gimple_assign_set_rhs1 (stmt, ifc_temp_var (type, rhs, &gsi));
1949 update_stmt (stmt);
1954 /* Remove all GIMPLE_CONDs and GIMPLE_LABELs of all the basic blocks
1955 other than the exit and latch of the LOOP. Also resets the
1956 GIMPLE_DEBUG information. */
1958 static void
1959 remove_conditions_and_labels (loop_p loop)
1961 gimple_stmt_iterator gsi;
1962 unsigned int i;
1964 for (i = 0; i < loop->num_nodes; i++)
1966 basic_block bb = ifc_bbs[i];
1968 if (bb_with_exit_edge_p (loop, bb)
1969 || bb == loop->latch)
1970 continue;
1972 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
1973 switch (gimple_code (gsi_stmt (gsi)))
1975 case GIMPLE_COND:
1976 case GIMPLE_LABEL:
1977 gsi_remove (&gsi, true);
1978 break;
1980 case GIMPLE_DEBUG:
1981 /* ??? Should there be conditional GIMPLE_DEBUG_BINDs? */
1982 if (gimple_debug_bind_p (gsi_stmt (gsi)))
1984 gimple_debug_bind_reset_value (gsi_stmt (gsi));
1985 update_stmt (gsi_stmt (gsi));
1987 gsi_next (&gsi);
1988 break;
1990 default:
1991 gsi_next (&gsi);
1996 /* Combine all the basic blocks from LOOP into one or two super basic
1997 blocks. Replace PHI nodes with conditional modify expressions. */
1999 static void
2000 combine_blocks (struct loop *loop, bool any_mask_load_store)
2002 basic_block bb, exit_bb, merge_target_bb;
2003 unsigned int orig_loop_num_nodes = loop->num_nodes;
2004 unsigned int i;
2005 edge e;
2006 edge_iterator ei;
2008 predicate_bbs (loop);
2009 remove_conditions_and_labels (loop);
2010 insert_gimplified_predicates (loop, any_mask_load_store);
2011 predicate_all_scalar_phis (loop);
2013 if (flag_tree_loop_if_convert_stores || any_mask_load_store)
2014 predicate_mem_writes (loop);
2016 /* Merge basic blocks: first remove all the edges in the loop,
2017 except for those from the exit block. */
2018 exit_bb = NULL;
2019 for (i = 0; i < orig_loop_num_nodes; i++)
2021 bb = ifc_bbs[i];
2022 free_bb_predicate (bb);
2023 if (bb_with_exit_edge_p (loop, bb))
2025 gcc_assert (exit_bb == NULL);
2026 exit_bb = bb;
2029 gcc_assert (exit_bb != loop->latch);
2031 for (i = 1; i < orig_loop_num_nodes; i++)
2033 bb = ifc_bbs[i];
2035 for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));)
2037 if (e->src == exit_bb)
2038 ei_next (&ei);
2039 else
2040 remove_edge (e);
2044 if (exit_bb != NULL)
2046 if (exit_bb != loop->header)
2048 /* Connect this node to loop header. */
2049 make_edge (loop->header, exit_bb, EDGE_FALLTHRU);
2050 set_immediate_dominator (CDI_DOMINATORS, exit_bb, loop->header);
2053 /* Redirect non-exit edges to loop->latch. */
2054 FOR_EACH_EDGE (e, ei, exit_bb->succs)
2056 if (!loop_exit_edge_p (loop, e))
2057 redirect_edge_and_branch (e, loop->latch);
2059 set_immediate_dominator (CDI_DOMINATORS, loop->latch, exit_bb);
2061 else
2063 /* If the loop does not have an exit, reconnect header and latch. */
2064 make_edge (loop->header, loop->latch, EDGE_FALLTHRU);
2065 set_immediate_dominator (CDI_DOMINATORS, loop->latch, loop->header);
2068 merge_target_bb = loop->header;
2069 for (i = 1; i < orig_loop_num_nodes; i++)
2071 gimple_stmt_iterator gsi;
2072 gimple_stmt_iterator last;
2074 bb = ifc_bbs[i];
2076 if (bb == exit_bb || bb == loop->latch)
2077 continue;
2079 /* Make stmts member of loop->header. */
2080 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2081 gimple_set_bb (gsi_stmt (gsi), merge_target_bb);
2083 /* Update stmt list. */
2084 last = gsi_last_bb (merge_target_bb);
2085 gsi_insert_seq_after (&last, bb_seq (bb), GSI_NEW_STMT);
2086 set_bb_seq (bb, NULL);
2088 delete_basic_block (bb);
2091 /* If possible, merge loop header to the block with the exit edge.
2092 This reduces the number of basic blocks to two, to please the
2093 vectorizer that handles only loops with two nodes. */
2094 if (exit_bb
2095 && exit_bb != loop->header
2096 && can_merge_blocks_p (loop->header, exit_bb))
2097 merge_blocks (loop->header, exit_bb);
2099 free (ifc_bbs);
2100 ifc_bbs = NULL;
2103 /* Version LOOP before if-converting it, the original loop
2104 will be then if-converted, the new copy of the loop will not,
2105 and the LOOP_VECTORIZED internal call will be guarding which
2106 loop to execute. The vectorizer pass will fold this
2107 internal call into either true or false. */
2109 static bool
2110 version_loop_for_if_conversion (struct loop *loop)
2112 basic_block cond_bb;
2113 tree cond = make_ssa_name (boolean_type_node);
2114 struct loop *new_loop;
2115 gimple g;
2116 gimple_stmt_iterator gsi;
2118 g = gimple_build_call_internal (IFN_LOOP_VECTORIZED, 2,
2119 build_int_cst (integer_type_node, loop->num),
2120 integer_zero_node);
2121 gimple_call_set_lhs (g, cond);
2123 initialize_original_copy_tables ();
2124 new_loop = loop_version (loop, cond, &cond_bb,
2125 REG_BR_PROB_BASE, REG_BR_PROB_BASE,
2126 REG_BR_PROB_BASE, true);
2127 free_original_copy_tables ();
2128 if (new_loop == NULL)
2129 return false;
2130 new_loop->dont_vectorize = true;
2131 new_loop->force_vectorize = false;
2132 gsi = gsi_last_bb (cond_bb);
2133 gimple_call_set_arg (g, 1, build_int_cst (integer_type_node, new_loop->num));
2134 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2135 update_ssa (TODO_update_ssa);
2136 return true;
2139 /* If-convert LOOP when it is legal. For the moment this pass has no
2140 profitability analysis. Returns non-zero todo flags when something
2141 changed. */
2143 static unsigned int
2144 tree_if_conversion (struct loop *loop)
2146 unsigned int todo = 0;
2147 ifc_bbs = NULL;
2148 bool any_mask_load_store = false;
2150 if (!if_convertible_loop_p (loop, &any_mask_load_store)
2151 || !dbg_cnt (if_conversion_tree))
2152 goto cleanup;
2154 if (any_mask_load_store
2155 && ((!flag_tree_loop_vectorize && !loop->force_vectorize)
2156 || loop->dont_vectorize))
2157 goto cleanup;
2159 if (any_mask_load_store && !version_loop_for_if_conversion (loop))
2160 goto cleanup;
2162 /* Now all statements are if-convertible. Combine all the basic
2163 blocks into one huge basic block doing the if-conversion
2164 on-the-fly. */
2165 combine_blocks (loop, any_mask_load_store);
2167 todo |= TODO_cleanup_cfg;
2168 if (flag_tree_loop_if_convert_stores || any_mask_load_store)
2170 mark_virtual_operands_for_renaming (cfun);
2171 todo |= TODO_update_ssa_only_virtuals;
2174 cleanup:
2175 if (ifc_bbs)
2177 unsigned int i;
2179 for (i = 0; i < loop->num_nodes; i++)
2180 free_bb_predicate (ifc_bbs[i]);
2182 free (ifc_bbs);
2183 ifc_bbs = NULL;
2185 free_dominance_info (CDI_POST_DOMINATORS);
2187 return todo;
2190 /* Tree if-conversion pass management. */
2192 namespace {
2194 const pass_data pass_data_if_conversion =
2196 GIMPLE_PASS, /* type */
2197 "ifcvt", /* name */
2198 OPTGROUP_NONE, /* optinfo_flags */
2199 TV_NONE, /* tv_id */
2200 ( PROP_cfg | PROP_ssa ), /* properties_required */
2201 0, /* properties_provided */
2202 0, /* properties_destroyed */
2203 0, /* todo_flags_start */
2204 0, /* todo_flags_finish */
2207 class pass_if_conversion : public gimple_opt_pass
2209 public:
2210 pass_if_conversion (gcc::context *ctxt)
2211 : gimple_opt_pass (pass_data_if_conversion, ctxt)
2214 /* opt_pass methods: */
2215 virtual bool gate (function *);
2216 virtual unsigned int execute (function *);
2218 }; // class pass_if_conversion
2220 bool
2221 pass_if_conversion::gate (function *fun)
2223 return (((flag_tree_loop_vectorize || fun->has_force_vectorize_loops)
2224 && flag_tree_loop_if_convert != 0)
2225 || flag_tree_loop_if_convert == 1
2226 || flag_tree_loop_if_convert_stores == 1);
2229 unsigned int
2230 pass_if_conversion::execute (function *fun)
2232 struct loop *loop;
2233 unsigned todo = 0;
2235 if (number_of_loops (fun) <= 1)
2236 return 0;
2238 FOR_EACH_LOOP (loop, 0)
2239 if (flag_tree_loop_if_convert == 1
2240 || flag_tree_loop_if_convert_stores == 1
2241 || ((flag_tree_loop_vectorize || loop->force_vectorize)
2242 && !loop->dont_vectorize))
2243 todo |= tree_if_conversion (loop);
2245 #ifdef ENABLE_CHECKING
2247 basic_block bb;
2248 FOR_EACH_BB_FN (bb, fun)
2249 gcc_assert (!bb->aux);
2251 #endif
2253 return todo;
2256 } // anon namespace
2258 gimple_opt_pass *
2259 make_pass_if_conversion (gcc::context *ctxt)
2261 return new pass_if_conversion (ctxt);