* g++.dg/cpp/ucn-1.C: Fix typo.
[official-gcc.git] / gcc / tree-if-conv.c
blob88b6405a7be355242a306f1d7768cba49ed7da31
1 /* If-conversion for vectorizer.
2 Copyright (C) 2004-2015 Free Software Foundation, Inc.
3 Contributed by Devang Patel <dpatel@apple.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass implements a tree level if-conversion of loops. Its
22 initial goal is to help the vectorizer to vectorize loops with
23 conditions.
25 A short description of if-conversion:
27 o Decide if a loop is if-convertible or not.
28 o Walk all loop basic blocks in breadth first order (BFS order).
29 o Remove conditional statements (at the end of basic block)
30 and propagate condition into destination basic blocks'
31 predicate list.
32 o Replace modify expression with conditional modify expression
33 using current basic block's condition.
34 o Merge all basic blocks
35 o Replace phi nodes with conditional modify expr
36 o Merge all basic blocks into header
38 Sample transformation:
40 INPUT
41 -----
43 # i_23 = PHI <0(0), i_18(10)>;
44 <L0>:;
45 j_15 = A[i_23];
46 if (j_15 > 41) goto <L1>; else goto <L17>;
48 <L17>:;
49 goto <bb 3> (<L3>);
51 <L1>:;
53 # iftmp.2_4 = PHI <0(8), 42(2)>;
54 <L3>:;
55 A[i_23] = iftmp.2_4;
56 i_18 = i_23 + 1;
57 if (i_18 <= 15) goto <L19>; else goto <L18>;
59 <L19>:;
60 goto <bb 1> (<L0>);
62 <L18>:;
64 OUTPUT
65 ------
67 # i_23 = PHI <0(0), i_18(10)>;
68 <L0>:;
69 j_15 = A[i_23];
71 <L3>:;
72 iftmp.2_4 = j_15 > 41 ? 42 : 0;
73 A[i_23] = iftmp.2_4;
74 i_18 = i_23 + 1;
75 if (i_18 <= 15) goto <L19>; else goto <L18>;
77 <L19>:;
78 goto <bb 1> (<L0>);
80 <L18>:;
83 #include "config.h"
84 #include "system.h"
85 #include "coretypes.h"
86 #include "backend.h"
87 #include "rtl.h"
88 #include "tree.h"
89 #include "gimple.h"
90 #include "cfghooks.h"
91 #include "tree-pass.h"
92 #include "ssa.h"
93 #include "expmed.h"
94 #include "optabs-query.h"
95 #include "gimple-pretty-print.h"
96 #include "alias.h"
97 #include "fold-const.h"
98 #include "stor-layout.h"
99 #include "gimple-fold.h"
100 #include "gimplify.h"
101 #include "gimple-iterator.h"
102 #include "gimplify-me.h"
103 #include "tree-cfg.h"
104 #include "tree-into-ssa.h"
105 #include "tree-ssa.h"
106 #include "cfgloop.h"
107 #include "tree-data-ref.h"
108 #include "tree-scalar-evolution.h"
109 #include "tree-ssa-loop-ivopts.h"
110 #include "tree-ssa-address.h"
111 #include "dbgcnt.h"
112 #include "tree-hash-traits.h"
114 /* List of basic blocks in if-conversion-suitable order. */
115 static basic_block *ifc_bbs;
117 /* Apply more aggressive (extended) if-conversion if true. */
118 static bool aggressive_if_conv;
120 /* Structure used to predicate basic blocks. This is attached to the
121 ->aux field of the BBs in the loop to be if-converted. */
122 struct bb_predicate {
124 /* The condition under which this basic block is executed. */
125 tree predicate;
127 /* PREDICATE is gimplified, and the sequence of statements is
128 recorded here, in order to avoid the duplication of computations
129 that occur in previous conditions. See PR44483. */
130 gimple_seq predicate_gimplified_stmts;
133 /* Returns true when the basic block BB has a predicate. */
135 static inline bool
136 bb_has_predicate (basic_block bb)
138 return bb->aux != NULL;
141 /* Returns the gimplified predicate for basic block BB. */
143 static inline tree
144 bb_predicate (basic_block bb)
146 return ((struct bb_predicate *) bb->aux)->predicate;
149 /* Sets the gimplified predicate COND for basic block BB. */
151 static inline void
152 set_bb_predicate (basic_block bb, tree cond)
154 gcc_assert ((TREE_CODE (cond) == TRUTH_NOT_EXPR
155 && is_gimple_condexpr (TREE_OPERAND (cond, 0)))
156 || is_gimple_condexpr (cond));
157 ((struct bb_predicate *) bb->aux)->predicate = cond;
160 /* Returns the sequence of statements of the gimplification of the
161 predicate for basic block BB. */
163 static inline gimple_seq
164 bb_predicate_gimplified_stmts (basic_block bb)
166 return ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts;
169 /* Sets the sequence of statements STMTS of the gimplification of the
170 predicate for basic block BB. */
172 static inline void
173 set_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
175 ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts = stmts;
178 /* Adds the sequence of statements STMTS to the sequence of statements
179 of the predicate for basic block BB. */
181 static inline void
182 add_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
184 gimple_seq_add_seq
185 (&(((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts), stmts);
188 /* Initializes to TRUE the predicate of basic block BB. */
190 static inline void
191 init_bb_predicate (basic_block bb)
193 bb->aux = XNEW (struct bb_predicate);
194 set_bb_predicate_gimplified_stmts (bb, NULL);
195 set_bb_predicate (bb, boolean_true_node);
198 /* Release the SSA_NAMEs associated with the predicate of basic block BB,
199 but don't actually free it. */
201 static inline void
202 release_bb_predicate (basic_block bb)
204 gimple_seq stmts = bb_predicate_gimplified_stmts (bb);
205 if (stmts)
207 gimple_stmt_iterator i;
209 for (i = gsi_start (stmts); !gsi_end_p (i); gsi_next (&i))
210 free_stmt_operands (cfun, gsi_stmt (i));
211 set_bb_predicate_gimplified_stmts (bb, NULL);
215 /* Free the predicate of basic block BB. */
217 static inline void
218 free_bb_predicate (basic_block bb)
220 if (!bb_has_predicate (bb))
221 return;
223 release_bb_predicate (bb);
224 free (bb->aux);
225 bb->aux = NULL;
228 /* Reinitialize predicate of BB with the true predicate. */
230 static inline void
231 reset_bb_predicate (basic_block bb)
233 if (!bb_has_predicate (bb))
234 init_bb_predicate (bb);
235 else
237 release_bb_predicate (bb);
238 set_bb_predicate (bb, boolean_true_node);
242 /* Returns a new SSA_NAME of type TYPE that is assigned the value of
243 the expression EXPR. Inserts the statement created for this
244 computation before GSI and leaves the iterator GSI at the same
245 statement. */
247 static tree
248 ifc_temp_var (tree type, tree expr, gimple_stmt_iterator *gsi)
250 tree new_name = make_temp_ssa_name (type, NULL, "_ifc_");
251 gimple *stmt = gimple_build_assign (new_name, expr);
252 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
253 return new_name;
256 /* Return true when COND is a true predicate. */
258 static inline bool
259 is_true_predicate (tree cond)
261 return (cond == NULL_TREE
262 || cond == boolean_true_node
263 || integer_onep (cond));
266 /* Returns true when BB has a predicate that is not trivial: true or
267 NULL_TREE. */
269 static inline bool
270 is_predicated (basic_block bb)
272 return !is_true_predicate (bb_predicate (bb));
275 /* Parses the predicate COND and returns its comparison code and
276 operands OP0 and OP1. */
278 static enum tree_code
279 parse_predicate (tree cond, tree *op0, tree *op1)
281 gimple *s;
283 if (TREE_CODE (cond) == SSA_NAME
284 && is_gimple_assign (s = SSA_NAME_DEF_STMT (cond)))
286 if (TREE_CODE_CLASS (gimple_assign_rhs_code (s)) == tcc_comparison)
288 *op0 = gimple_assign_rhs1 (s);
289 *op1 = gimple_assign_rhs2 (s);
290 return gimple_assign_rhs_code (s);
293 else if (gimple_assign_rhs_code (s) == TRUTH_NOT_EXPR)
295 tree op = gimple_assign_rhs1 (s);
296 tree type = TREE_TYPE (op);
297 enum tree_code code = parse_predicate (op, op0, op1);
299 return code == ERROR_MARK ? ERROR_MARK
300 : invert_tree_comparison (code, HONOR_NANS (type));
303 return ERROR_MARK;
306 if (COMPARISON_CLASS_P (cond))
308 *op0 = TREE_OPERAND (cond, 0);
309 *op1 = TREE_OPERAND (cond, 1);
310 return TREE_CODE (cond);
313 return ERROR_MARK;
316 /* Returns the fold of predicate C1 OR C2 at location LOC. */
318 static tree
319 fold_or_predicates (location_t loc, tree c1, tree c2)
321 tree op1a, op1b, op2a, op2b;
322 enum tree_code code1 = parse_predicate (c1, &op1a, &op1b);
323 enum tree_code code2 = parse_predicate (c2, &op2a, &op2b);
325 if (code1 != ERROR_MARK && code2 != ERROR_MARK)
327 tree t = maybe_fold_or_comparisons (code1, op1a, op1b,
328 code2, op2a, op2b);
329 if (t)
330 return t;
333 return fold_build2_loc (loc, TRUTH_OR_EXPR, boolean_type_node, c1, c2);
336 /* Returns true if N is either a constant or a SSA_NAME. */
338 static bool
339 constant_or_ssa_name (tree n)
341 switch (TREE_CODE (n))
343 case SSA_NAME:
344 case INTEGER_CST:
345 case REAL_CST:
346 case COMPLEX_CST:
347 case VECTOR_CST:
348 return true;
349 default:
350 return false;
354 /* Returns either a COND_EXPR or the folded expression if the folded
355 expression is a MIN_EXPR, a MAX_EXPR, an ABS_EXPR,
356 a constant or a SSA_NAME. */
358 static tree
359 fold_build_cond_expr (tree type, tree cond, tree rhs, tree lhs)
361 tree rhs1, lhs1, cond_expr;
363 /* If COND is comparison r != 0 and r has boolean type, convert COND
364 to SSA_NAME to accept by vect bool pattern. */
365 if (TREE_CODE (cond) == NE_EXPR)
367 tree op0 = TREE_OPERAND (cond, 0);
368 tree op1 = TREE_OPERAND (cond, 1);
369 if (TREE_CODE (op0) == SSA_NAME
370 && TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE
371 && (integer_zerop (op1)))
372 cond = op0;
374 cond_expr = fold_ternary (COND_EXPR, type, cond,
375 rhs, lhs);
377 if (cond_expr == NULL_TREE)
378 return build3 (COND_EXPR, type, cond, rhs, lhs);
380 STRIP_USELESS_TYPE_CONVERSION (cond_expr);
382 if (constant_or_ssa_name (cond_expr))
383 return cond_expr;
385 if (TREE_CODE (cond_expr) == ABS_EXPR)
387 rhs1 = TREE_OPERAND (cond_expr, 1);
388 STRIP_USELESS_TYPE_CONVERSION (rhs1);
389 if (constant_or_ssa_name (rhs1))
390 return build1 (ABS_EXPR, type, rhs1);
393 if (TREE_CODE (cond_expr) == MIN_EXPR
394 || TREE_CODE (cond_expr) == MAX_EXPR)
396 lhs1 = TREE_OPERAND (cond_expr, 0);
397 STRIP_USELESS_TYPE_CONVERSION (lhs1);
398 rhs1 = TREE_OPERAND (cond_expr, 1);
399 STRIP_USELESS_TYPE_CONVERSION (rhs1);
400 if (constant_or_ssa_name (rhs1)
401 && constant_or_ssa_name (lhs1))
402 return build2 (TREE_CODE (cond_expr), type, lhs1, rhs1);
404 return build3 (COND_EXPR, type, cond, rhs, lhs);
407 /* Add condition NC to the predicate list of basic block BB. LOOP is
408 the loop to be if-converted. Use predicate of cd-equivalent block
409 for join bb if it exists: we call basic blocks bb1 and bb2
410 cd-equivalent if they are executed under the same condition. */
412 static inline void
413 add_to_predicate_list (struct loop *loop, basic_block bb, tree nc)
415 tree bc, *tp;
416 basic_block dom_bb;
418 if (is_true_predicate (nc))
419 return;
421 /* If dominance tells us this basic block is always executed,
422 don't record any predicates for it. */
423 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
424 return;
426 dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb);
427 /* We use notion of cd equivalence to get simpler predicate for
428 join block, e.g. if join block has 2 predecessors with predicates
429 p1 & p2 and p1 & !p2, we'd like to get p1 for it instead of
430 p1 & p2 | p1 & !p2. */
431 if (dom_bb != loop->header
432 && get_immediate_dominator (CDI_POST_DOMINATORS, dom_bb) == bb)
434 gcc_assert (flow_bb_inside_loop_p (loop, dom_bb));
435 bc = bb_predicate (dom_bb);
436 if (!is_true_predicate (bc))
437 set_bb_predicate (bb, bc);
438 else
439 gcc_assert (is_true_predicate (bb_predicate (bb)));
440 if (dump_file && (dump_flags & TDF_DETAILS))
441 fprintf (dump_file, "Use predicate of bb#%d for bb#%d\n",
442 dom_bb->index, bb->index);
443 return;
446 if (!is_predicated (bb))
447 bc = nc;
448 else
450 bc = bb_predicate (bb);
451 bc = fold_or_predicates (EXPR_LOCATION (bc), nc, bc);
452 if (is_true_predicate (bc))
454 reset_bb_predicate (bb);
455 return;
459 /* Allow a TRUTH_NOT_EXPR around the main predicate. */
460 if (TREE_CODE (bc) == TRUTH_NOT_EXPR)
461 tp = &TREE_OPERAND (bc, 0);
462 else
463 tp = &bc;
464 if (!is_gimple_condexpr (*tp))
466 gimple_seq stmts;
467 *tp = force_gimple_operand_1 (*tp, &stmts, is_gimple_condexpr, NULL_TREE);
468 add_bb_predicate_gimplified_stmts (bb, stmts);
470 set_bb_predicate (bb, bc);
473 /* Add the condition COND to the previous condition PREV_COND, and add
474 this to the predicate list of the destination of edge E. LOOP is
475 the loop to be if-converted. */
477 static void
478 add_to_dst_predicate_list (struct loop *loop, edge e,
479 tree prev_cond, tree cond)
481 if (!flow_bb_inside_loop_p (loop, e->dest))
482 return;
484 if (!is_true_predicate (prev_cond))
485 cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
486 prev_cond, cond);
488 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, e->dest))
489 add_to_predicate_list (loop, e->dest, cond);
492 /* Return true if one of the successor edges of BB exits LOOP. */
494 static bool
495 bb_with_exit_edge_p (struct loop *loop, basic_block bb)
497 edge e;
498 edge_iterator ei;
500 FOR_EACH_EDGE (e, ei, bb->succs)
501 if (loop_exit_edge_p (loop, e))
502 return true;
504 return false;
507 /* Return true when PHI is if-convertible. PHI is part of loop LOOP
508 and it belongs to basic block BB.
510 PHI is not if-convertible if:
511 - it has more than 2 arguments.
513 When the flag_tree_loop_if_convert_stores is not set, PHI is not
514 if-convertible if:
515 - a virtual PHI is immediately used in another PHI node,
516 - there is a virtual PHI in a BB other than the loop->header.
517 When the aggressive_if_conv is set, PHI can have more than
518 two arguments. */
520 static bool
521 if_convertible_phi_p (struct loop *loop, basic_block bb, gphi *phi,
522 bool any_mask_load_store)
524 if (dump_file && (dump_flags & TDF_DETAILS))
526 fprintf (dump_file, "-------------------------\n");
527 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
530 if (bb != loop->header)
532 if (gimple_phi_num_args (phi) != 2
533 && !aggressive_if_conv)
535 if (dump_file && (dump_flags & TDF_DETAILS))
536 fprintf (dump_file, "More than two phi node args.\n");
537 return false;
541 if (flag_tree_loop_if_convert_stores || any_mask_load_store)
542 return true;
544 /* When the flag_tree_loop_if_convert_stores is not set, check
545 that there are no memory writes in the branches of the loop to be
546 if-converted. */
547 if (virtual_operand_p (gimple_phi_result (phi)))
549 imm_use_iterator imm_iter;
550 use_operand_p use_p;
552 if (bb != loop->header)
554 if (dump_file && (dump_flags & TDF_DETAILS))
555 fprintf (dump_file, "Virtual phi not on loop->header.\n");
556 return false;
559 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_phi_result (phi))
561 if (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI
562 && USE_STMT (use_p) != phi)
564 if (dump_file && (dump_flags & TDF_DETAILS))
565 fprintf (dump_file, "Difficult to handle this virtual phi.\n");
566 return false;
571 return true;
574 /* Records the status of a data reference. This struct is attached to
575 each DR->aux field. */
577 struct ifc_dr {
578 /* -1 when not initialized, 0 when false, 1 when true. */
579 int written_at_least_once;
581 /* -1 when not initialized, 0 when false, 1 when true. */
582 int rw_unconditionally;
585 #define IFC_DR(DR) ((struct ifc_dr *) (DR)->aux)
586 #define DR_WRITTEN_AT_LEAST_ONCE(DR) (IFC_DR (DR)->written_at_least_once)
587 #define DR_RW_UNCONDITIONALLY(DR) (IFC_DR (DR)->rw_unconditionally)
589 /* Returns true when the memory references of STMT are read or written
590 unconditionally. In other words, this function returns true when
591 for every data reference A in STMT there exist other accesses to
592 a data reference with the same base with predicates that add up (OR-up) to
593 the true predicate: this ensures that the data reference A is touched
594 (read or written) on every iteration of the if-converted loop. */
596 static bool
597 memrefs_read_or_written_unconditionally (gimple *stmt,
598 vec<data_reference_p> drs)
600 int i, j;
601 data_reference_p a, b;
602 tree ca = bb_predicate (gimple_bb (stmt));
604 for (i = 0; drs.iterate (i, &a); i++)
605 if (DR_STMT (a) == stmt)
607 bool found = false;
608 int x = DR_RW_UNCONDITIONALLY (a);
610 if (x == 0)
611 return false;
613 if (x == 1)
614 continue;
616 for (j = 0; drs.iterate (j, &b); j++)
618 tree ref_base_a = DR_REF (a);
619 tree ref_base_b = DR_REF (b);
621 if (DR_STMT (b) == stmt)
622 continue;
624 while (TREE_CODE (ref_base_a) == COMPONENT_REF
625 || TREE_CODE (ref_base_a) == IMAGPART_EXPR
626 || TREE_CODE (ref_base_a) == REALPART_EXPR)
627 ref_base_a = TREE_OPERAND (ref_base_a, 0);
629 while (TREE_CODE (ref_base_b) == COMPONENT_REF
630 || TREE_CODE (ref_base_b) == IMAGPART_EXPR
631 || TREE_CODE (ref_base_b) == REALPART_EXPR)
632 ref_base_b = TREE_OPERAND (ref_base_b, 0);
634 if (operand_equal_p (ref_base_a, ref_base_b, 0))
636 tree cb = bb_predicate (gimple_bb (DR_STMT (b)));
638 if (DR_RW_UNCONDITIONALLY (b) == 1
639 || is_true_predicate (cb)
640 || is_true_predicate (ca
641 = fold_or_predicates (EXPR_LOCATION (cb), ca, cb)))
643 DR_RW_UNCONDITIONALLY (a) = 1;
644 DR_RW_UNCONDITIONALLY (b) = 1;
645 found = true;
646 break;
651 if (!found)
653 DR_RW_UNCONDITIONALLY (a) = 0;
654 return false;
658 return true;
661 /* Returns true when the memory references of STMT are unconditionally
662 written. In other words, this function returns true when for every
663 data reference A written in STMT, there exist other writes to the
664 same data reference with predicates that add up (OR-up) to the true
665 predicate: this ensures that the data reference A is written on
666 every iteration of the if-converted loop. */
668 static bool
669 write_memrefs_written_at_least_once (gimple *stmt,
670 vec<data_reference_p> drs)
672 int i, j;
673 data_reference_p a, b;
674 tree ca = bb_predicate (gimple_bb (stmt));
676 for (i = 0; drs.iterate (i, &a); i++)
677 if (DR_STMT (a) == stmt
678 && DR_IS_WRITE (a))
680 bool found = false;
681 int x = DR_WRITTEN_AT_LEAST_ONCE (a);
683 if (x == 0)
684 return false;
686 if (x == 1)
687 continue;
689 for (j = 0; drs.iterate (j, &b); j++)
690 if (DR_STMT (b) != stmt
691 && DR_IS_WRITE (b)
692 && same_data_refs_base_objects (a, b))
694 tree cb = bb_predicate (gimple_bb (DR_STMT (b)));
696 if (DR_WRITTEN_AT_LEAST_ONCE (b) == 1
697 || is_true_predicate (cb)
698 || is_true_predicate (ca = fold_or_predicates (EXPR_LOCATION (cb),
699 ca, cb)))
701 DR_WRITTEN_AT_LEAST_ONCE (a) = 1;
702 DR_WRITTEN_AT_LEAST_ONCE (b) = 1;
703 found = true;
704 break;
708 if (!found)
710 DR_WRITTEN_AT_LEAST_ONCE (a) = 0;
711 return false;
715 return true;
718 /* Return true when the memory references of STMT won't trap in the
719 if-converted code. There are two things that we have to check for:
721 - writes to memory occur to writable memory: if-conversion of
722 memory writes transforms the conditional memory writes into
723 unconditional writes, i.e. "if (cond) A[i] = foo" is transformed
724 into "A[i] = cond ? foo : A[i]", and as the write to memory may not
725 be executed at all in the original code, it may be a readonly
726 memory. To check that A is not const-qualified, we check that
727 there exists at least an unconditional write to A in the current
728 function.
730 - reads or writes to memory are valid memory accesses for every
731 iteration. To check that the memory accesses are correctly formed
732 and that we are allowed to read and write in these locations, we
733 check that the memory accesses to be if-converted occur at every
734 iteration unconditionally. */
736 static bool
737 ifcvt_memrefs_wont_trap (gimple *stmt, vec<data_reference_p> refs)
739 return write_memrefs_written_at_least_once (stmt, refs)
740 && memrefs_read_or_written_unconditionally (stmt, refs);
743 /* Wrapper around gimple_could_trap_p refined for the needs of the
744 if-conversion. Try to prove that the memory accesses of STMT could
745 not trap in the innermost loop containing STMT. */
747 static bool
748 ifcvt_could_trap_p (gimple *stmt, vec<data_reference_p> refs)
750 if (gimple_vuse (stmt)
751 && !gimple_could_trap_p_1 (stmt, false, false)
752 && ifcvt_memrefs_wont_trap (stmt, refs))
753 return false;
755 return gimple_could_trap_p (stmt);
758 /* Return true if STMT could be converted into a masked load or store
759 (conditional load or store based on a mask computed from bb predicate). */
761 static bool
762 ifcvt_can_use_mask_load_store (gimple *stmt)
764 tree lhs, ref;
765 machine_mode mode;
766 basic_block bb = gimple_bb (stmt);
767 bool is_load;
769 if (!(flag_tree_loop_vectorize || bb->loop_father->force_vectorize)
770 || bb->loop_father->dont_vectorize
771 || !gimple_assign_single_p (stmt)
772 || gimple_has_volatile_ops (stmt))
773 return false;
775 /* Check whether this is a load or store. */
776 lhs = gimple_assign_lhs (stmt);
777 if (gimple_store_p (stmt))
779 if (!is_gimple_val (gimple_assign_rhs1 (stmt)))
780 return false;
781 is_load = false;
782 ref = lhs;
784 else if (gimple_assign_load_p (stmt))
786 is_load = true;
787 ref = gimple_assign_rhs1 (stmt);
789 else
790 return false;
792 if (may_be_nonaddressable_p (ref))
793 return false;
795 /* Mask should be integer mode of the same size as the load/store
796 mode. */
797 mode = TYPE_MODE (TREE_TYPE (lhs));
798 if (int_mode_for_mode (mode) == BLKmode
799 || VECTOR_MODE_P (mode))
800 return false;
802 if (can_vec_mask_load_store_p (mode, VOIDmode, is_load))
803 return true;
805 return false;
808 /* Return true when STMT is if-convertible.
810 GIMPLE_ASSIGN statement is not if-convertible if,
811 - it is not movable,
812 - it could trap,
813 - LHS is not var decl. */
815 static bool
816 if_convertible_gimple_assign_stmt_p (gimple *stmt,
817 vec<data_reference_p> refs,
818 bool *any_mask_load_store)
820 tree lhs = gimple_assign_lhs (stmt);
821 basic_block bb;
823 if (dump_file && (dump_flags & TDF_DETAILS))
825 fprintf (dump_file, "-------------------------\n");
826 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
829 if (!is_gimple_reg_type (TREE_TYPE (lhs)))
830 return false;
832 /* Some of these constrains might be too conservative. */
833 if (stmt_ends_bb_p (stmt)
834 || gimple_has_volatile_ops (stmt)
835 || (TREE_CODE (lhs) == SSA_NAME
836 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
837 || gimple_has_side_effects (stmt))
839 if (dump_file && (dump_flags & TDF_DETAILS))
840 fprintf (dump_file, "stmt not suitable for ifcvt\n");
841 return false;
844 /* tree-into-ssa.c uses GF_PLF_1, so avoid it, because
845 in between if_convertible_loop_p and combine_blocks
846 we can perform loop versioning. */
847 gimple_set_plf (stmt, GF_PLF_2, false);
849 if (flag_tree_loop_if_convert_stores)
851 if (ifcvt_could_trap_p (stmt, refs))
853 if (ifcvt_can_use_mask_load_store (stmt))
855 gimple_set_plf (stmt, GF_PLF_2, true);
856 *any_mask_load_store = true;
857 return true;
859 if (dump_file && (dump_flags & TDF_DETAILS))
860 fprintf (dump_file, "tree could trap...\n");
861 return false;
863 return true;
866 if (ifcvt_could_trap_p (stmt, refs))
868 if (ifcvt_can_use_mask_load_store (stmt))
870 gimple_set_plf (stmt, GF_PLF_2, true);
871 *any_mask_load_store = true;
872 return true;
874 if (dump_file && (dump_flags & TDF_DETAILS))
875 fprintf (dump_file, "tree could trap...\n");
876 return false;
879 bb = gimple_bb (stmt);
881 if (TREE_CODE (lhs) != SSA_NAME
882 && bb != bb->loop_father->header
883 && !bb_with_exit_edge_p (bb->loop_father, bb))
885 if (ifcvt_can_use_mask_load_store (stmt))
887 gimple_set_plf (stmt, GF_PLF_2, true);
888 *any_mask_load_store = true;
889 return true;
891 if (dump_file && (dump_flags & TDF_DETAILS))
893 fprintf (dump_file, "LHS is not var\n");
894 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
896 return false;
899 return true;
902 /* Return true when STMT is if-convertible.
904 A statement is if-convertible if:
905 - it is an if-convertible GIMPLE_ASSIGN,
906 - it is a GIMPLE_LABEL or a GIMPLE_COND,
907 - it is builtins call. */
909 static bool
910 if_convertible_stmt_p (gimple *stmt, vec<data_reference_p> refs,
911 bool *any_mask_load_store)
913 switch (gimple_code (stmt))
915 case GIMPLE_LABEL:
916 case GIMPLE_DEBUG:
917 case GIMPLE_COND:
918 return true;
920 case GIMPLE_ASSIGN:
921 return if_convertible_gimple_assign_stmt_p (stmt, refs,
922 any_mask_load_store);
924 case GIMPLE_CALL:
926 tree fndecl = gimple_call_fndecl (stmt);
927 if (fndecl)
929 int flags = gimple_call_flags (stmt);
930 if ((flags & ECF_CONST)
931 && !(flags & ECF_LOOPING_CONST_OR_PURE)
932 /* We can only vectorize some builtins at the moment,
933 so restrict if-conversion to those. */
934 && DECL_BUILT_IN (fndecl))
935 return true;
937 return false;
940 default:
941 /* Don't know what to do with 'em so don't do anything. */
942 if (dump_file && (dump_flags & TDF_DETAILS))
944 fprintf (dump_file, "don't know what to do\n");
945 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
947 return false;
948 break;
951 return true;
954 /* Assumes that BB has more than 1 predecessors.
955 Returns false if at least one successor is not on critical edge
956 and true otherwise. */
958 static inline bool
959 all_preds_critical_p (basic_block bb)
961 edge e;
962 edge_iterator ei;
964 FOR_EACH_EDGE (e, ei, bb->preds)
965 if (EDGE_COUNT (e->src->succs) == 1)
966 return false;
967 return true;
970 /* Returns true if at least one successor in on critical edge. */
971 static inline bool
972 has_pred_critical_p (basic_block bb)
974 edge e;
975 edge_iterator ei;
977 FOR_EACH_EDGE (e, ei, bb->preds)
978 if (EDGE_COUNT (e->src->succs) > 1)
979 return true;
980 return false;
983 /* Return true when BB is if-convertible. This routine does not check
984 basic block's statements and phis.
986 A basic block is not if-convertible if:
987 - it is non-empty and it is after the exit block (in BFS order),
988 - it is after the exit block but before the latch,
989 - its edges are not normal.
991 Last restriction is valid if aggressive_if_conv is false.
993 EXIT_BB is the basic block containing the exit of the LOOP. BB is
994 inside LOOP. */
996 static bool
997 if_convertible_bb_p (struct loop *loop, basic_block bb, basic_block exit_bb)
999 edge e;
1000 edge_iterator ei;
1002 if (dump_file && (dump_flags & TDF_DETAILS))
1003 fprintf (dump_file, "----------[%d]-------------\n", bb->index);
1005 if (EDGE_COUNT (bb->succs) > 2)
1006 return false;
1008 if (EDGE_COUNT (bb->preds) > 2
1009 && !aggressive_if_conv)
1010 return false;
1012 if (exit_bb)
1014 if (bb != loop->latch)
1016 if (dump_file && (dump_flags & TDF_DETAILS))
1017 fprintf (dump_file, "basic block after exit bb but before latch\n");
1018 return false;
1020 else if (!empty_block_p (bb))
1022 if (dump_file && (dump_flags & TDF_DETAILS))
1023 fprintf (dump_file, "non empty basic block after exit bb\n");
1024 return false;
1026 else if (bb == loop->latch
1027 && bb != exit_bb
1028 && !dominated_by_p (CDI_DOMINATORS, bb, exit_bb))
1030 if (dump_file && (dump_flags & TDF_DETAILS))
1031 fprintf (dump_file, "latch is not dominated by exit_block\n");
1032 return false;
1036 /* Be less adventurous and handle only normal edges. */
1037 FOR_EACH_EDGE (e, ei, bb->succs)
1038 if (e->flags & (EDGE_EH | EDGE_ABNORMAL | EDGE_IRREDUCIBLE_LOOP))
1040 if (dump_file && (dump_flags & TDF_DETAILS))
1041 fprintf (dump_file, "Difficult to handle edges\n");
1042 return false;
1045 /* At least one incoming edge has to be non-critical as otherwise edge
1046 predicates are not equal to basic-block predicates of the edge
1047 source. This check is skipped if aggressive_if_conv is true. */
1048 if (!aggressive_if_conv
1049 && EDGE_COUNT (bb->preds) > 1
1050 && bb != loop->header
1051 && all_preds_critical_p (bb))
1053 if (dump_file && (dump_flags & TDF_DETAILS))
1054 fprintf (dump_file, "only critical predecessors\n");
1055 return false;
1058 return true;
1061 /* Return true when all predecessor blocks of BB are visited. The
1062 VISITED bitmap keeps track of the visited blocks. */
1064 static bool
1065 pred_blocks_visited_p (basic_block bb, bitmap *visited)
1067 edge e;
1068 edge_iterator ei;
1069 FOR_EACH_EDGE (e, ei, bb->preds)
1070 if (!bitmap_bit_p (*visited, e->src->index))
1071 return false;
1073 return true;
1076 /* Get body of a LOOP in suitable order for if-conversion. It is
1077 caller's responsibility to deallocate basic block list.
1078 If-conversion suitable order is, breadth first sort (BFS) order
1079 with an additional constraint: select a block only if all its
1080 predecessors are already selected. */
1082 static basic_block *
1083 get_loop_body_in_if_conv_order (const struct loop *loop)
1085 basic_block *blocks, *blocks_in_bfs_order;
1086 basic_block bb;
1087 bitmap visited;
1088 unsigned int index = 0;
1089 unsigned int visited_count = 0;
1091 gcc_assert (loop->num_nodes);
1092 gcc_assert (loop->latch != EXIT_BLOCK_PTR_FOR_FN (cfun));
1094 blocks = XCNEWVEC (basic_block, loop->num_nodes);
1095 visited = BITMAP_ALLOC (NULL);
1097 blocks_in_bfs_order = get_loop_body_in_bfs_order (loop);
1099 index = 0;
1100 while (index < loop->num_nodes)
1102 bb = blocks_in_bfs_order [index];
1104 if (bb->flags & BB_IRREDUCIBLE_LOOP)
1106 free (blocks_in_bfs_order);
1107 BITMAP_FREE (visited);
1108 free (blocks);
1109 return NULL;
1112 if (!bitmap_bit_p (visited, bb->index))
1114 if (pred_blocks_visited_p (bb, &visited)
1115 || bb == loop->header)
1117 /* This block is now visited. */
1118 bitmap_set_bit (visited, bb->index);
1119 blocks[visited_count++] = bb;
1123 index++;
1125 if (index == loop->num_nodes
1126 && visited_count != loop->num_nodes)
1127 /* Not done yet. */
1128 index = 0;
1130 free (blocks_in_bfs_order);
1131 BITMAP_FREE (visited);
1132 return blocks;
1135 /* Returns true when the analysis of the predicates for all the basic
1136 blocks in LOOP succeeded.
1138 predicate_bbs first allocates the predicates of the basic blocks.
1139 These fields are then initialized with the tree expressions
1140 representing the predicates under which a basic block is executed
1141 in the LOOP. As the loop->header is executed at each iteration, it
1142 has the "true" predicate. Other statements executed under a
1143 condition are predicated with that condition, for example
1145 | if (x)
1146 | S1;
1147 | else
1148 | S2;
1150 S1 will be predicated with "x", and
1151 S2 will be predicated with "!x". */
1153 static void
1154 predicate_bbs (loop_p loop)
1156 unsigned int i;
1158 for (i = 0; i < loop->num_nodes; i++)
1159 init_bb_predicate (ifc_bbs[i]);
1161 for (i = 0; i < loop->num_nodes; i++)
1163 basic_block bb = ifc_bbs[i];
1164 tree cond;
1165 gimple *stmt;
1167 /* The loop latch and loop exit block are always executed and
1168 have no extra conditions to be processed: skip them. */
1169 if (bb == loop->latch
1170 || bb_with_exit_edge_p (loop, bb))
1172 reset_bb_predicate (bb);
1173 continue;
1176 cond = bb_predicate (bb);
1177 stmt = last_stmt (bb);
1178 if (stmt && gimple_code (stmt) == GIMPLE_COND)
1180 tree c2;
1181 edge true_edge, false_edge;
1182 location_t loc = gimple_location (stmt);
1183 tree c = build2_loc (loc, gimple_cond_code (stmt),
1184 boolean_type_node,
1185 gimple_cond_lhs (stmt),
1186 gimple_cond_rhs (stmt));
1188 /* Add new condition into destination's predicate list. */
1189 extract_true_false_edges_from_block (gimple_bb (stmt),
1190 &true_edge, &false_edge);
1192 /* If C is true, then TRUE_EDGE is taken. */
1193 add_to_dst_predicate_list (loop, true_edge, unshare_expr (cond),
1194 unshare_expr (c));
1196 /* If C is false, then FALSE_EDGE is taken. */
1197 c2 = build1_loc (loc, TRUTH_NOT_EXPR, boolean_type_node,
1198 unshare_expr (c));
1199 add_to_dst_predicate_list (loop, false_edge,
1200 unshare_expr (cond), c2);
1202 cond = NULL_TREE;
1205 /* If current bb has only one successor, then consider it as an
1206 unconditional goto. */
1207 if (single_succ_p (bb))
1209 basic_block bb_n = single_succ (bb);
1211 /* The successor bb inherits the predicate of its
1212 predecessor. If there is no predicate in the predecessor
1213 bb, then consider the successor bb as always executed. */
1214 if (cond == NULL_TREE)
1215 cond = boolean_true_node;
1217 add_to_predicate_list (loop, bb_n, cond);
1221 /* The loop header is always executed. */
1222 reset_bb_predicate (loop->header);
1223 gcc_assert (bb_predicate_gimplified_stmts (loop->header) == NULL
1224 && bb_predicate_gimplified_stmts (loop->latch) == NULL);
1227 /* Return true when LOOP is if-convertible. This is a helper function
1228 for if_convertible_loop_p. REFS and DDRS are initialized and freed
1229 in if_convertible_loop_p. */
1231 static bool
1232 if_convertible_loop_p_1 (struct loop *loop,
1233 vec<loop_p> *loop_nest,
1234 vec<data_reference_p> *refs,
1235 vec<ddr_p> *ddrs, bool *any_mask_load_store)
1237 bool res;
1238 unsigned int i;
1239 basic_block exit_bb = NULL;
1241 /* Don't if-convert the loop when the data dependences cannot be
1242 computed: the loop won't be vectorized in that case. */
1243 res = compute_data_dependences_for_loop (loop, true, loop_nest, refs, ddrs);
1244 if (!res)
1245 return false;
1247 calculate_dominance_info (CDI_DOMINATORS);
1248 calculate_dominance_info (CDI_POST_DOMINATORS);
1250 /* Allow statements that can be handled during if-conversion. */
1251 ifc_bbs = get_loop_body_in_if_conv_order (loop);
1252 if (!ifc_bbs)
1254 if (dump_file && (dump_flags & TDF_DETAILS))
1255 fprintf (dump_file, "Irreducible loop\n");
1256 return false;
1259 for (i = 0; i < loop->num_nodes; i++)
1261 basic_block bb = ifc_bbs[i];
1263 if (!if_convertible_bb_p (loop, bb, exit_bb))
1264 return false;
1266 if (bb_with_exit_edge_p (loop, bb))
1267 exit_bb = bb;
1270 for (i = 0; i < loop->num_nodes; i++)
1272 basic_block bb = ifc_bbs[i];
1273 gimple_stmt_iterator gsi;
1275 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1276 switch (gimple_code (gsi_stmt (gsi)))
1278 case GIMPLE_LABEL:
1279 case GIMPLE_ASSIGN:
1280 case GIMPLE_CALL:
1281 case GIMPLE_DEBUG:
1282 case GIMPLE_COND:
1283 break;
1284 default:
1285 return false;
1289 data_reference_p dr;
1291 for (i = 0; refs->iterate (i, &dr); i++)
1293 dr->aux = XNEW (struct ifc_dr);
1294 DR_WRITTEN_AT_LEAST_ONCE (dr) = -1;
1295 DR_RW_UNCONDITIONALLY (dr) = -1;
1297 predicate_bbs (loop);
1299 for (i = 0; i < loop->num_nodes; i++)
1301 basic_block bb = ifc_bbs[i];
1302 gimple_stmt_iterator itr;
1304 /* Check the if-convertibility of statements in predicated BBs. */
1305 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1306 for (itr = gsi_start_bb (bb); !gsi_end_p (itr); gsi_next (&itr))
1307 if (!if_convertible_stmt_p (gsi_stmt (itr), *refs,
1308 any_mask_load_store))
1309 return false;
1312 for (i = 0; i < loop->num_nodes; i++)
1313 free_bb_predicate (ifc_bbs[i]);
1315 /* Checking PHIs needs to be done after stmts, as the fact whether there
1316 are any masked loads or stores affects the tests. */
1317 for (i = 0; i < loop->num_nodes; i++)
1319 basic_block bb = ifc_bbs[i];
1320 gphi_iterator itr;
1322 for (itr = gsi_start_phis (bb); !gsi_end_p (itr); gsi_next (&itr))
1323 if (!if_convertible_phi_p (loop, bb, itr.phi (),
1324 *any_mask_load_store))
1325 return false;
1328 if (dump_file)
1329 fprintf (dump_file, "Applying if-conversion\n");
1331 return true;
1334 /* Return true when LOOP is if-convertible.
1335 LOOP is if-convertible if:
1336 - it is innermost,
1337 - it has two or more basic blocks,
1338 - it has only one exit,
1339 - loop header is not the exit edge,
1340 - if its basic blocks and phi nodes are if convertible. */
1342 static bool
1343 if_convertible_loop_p (struct loop *loop, bool *any_mask_load_store)
1345 edge e;
1346 edge_iterator ei;
1347 bool res = false;
1348 vec<data_reference_p> refs;
1349 vec<ddr_p> ddrs;
1351 /* Handle only innermost loop. */
1352 if (!loop || loop->inner)
1354 if (dump_file && (dump_flags & TDF_DETAILS))
1355 fprintf (dump_file, "not innermost loop\n");
1356 return false;
1359 /* If only one block, no need for if-conversion. */
1360 if (loop->num_nodes <= 2)
1362 if (dump_file && (dump_flags & TDF_DETAILS))
1363 fprintf (dump_file, "less than 2 basic blocks\n");
1364 return false;
1367 /* More than one loop exit is too much to handle. */
1368 if (!single_exit (loop))
1370 if (dump_file && (dump_flags & TDF_DETAILS))
1371 fprintf (dump_file, "multiple exits\n");
1372 return false;
1375 /* If one of the loop header's edge is an exit edge then do not
1376 apply if-conversion. */
1377 FOR_EACH_EDGE (e, ei, loop->header->succs)
1378 if (loop_exit_edge_p (loop, e))
1379 return false;
1381 refs.create (5);
1382 ddrs.create (25);
1383 auto_vec<loop_p, 3> loop_nest;
1384 res = if_convertible_loop_p_1 (loop, &loop_nest, &refs, &ddrs,
1385 any_mask_load_store);
1387 data_reference_p dr;
1388 unsigned int i;
1389 for (i = 0; refs.iterate (i, &dr); i++)
1390 free (dr->aux);
1392 free_data_refs (refs);
1393 free_dependence_relations (ddrs);
1394 return res;
1397 /* Returns true if def-stmt for phi argument ARG is simple increment/decrement
1398 which is in predicated basic block.
1399 In fact, the following PHI pattern is searching:
1400 loop-header:
1401 reduc_1 = PHI <..., reduc_2>
1403 if (...)
1404 reduc_3 = ...
1405 reduc_2 = PHI <reduc_1, reduc_3>
1407 ARG_0 and ARG_1 are correspondent PHI arguments.
1408 REDUC, OP0 and OP1 contain reduction stmt and its operands.
1409 EXTENDED is true if PHI has > 2 arguments. */
1411 static bool
1412 is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1,
1413 tree *op0, tree *op1, bool extended)
1415 tree lhs, r_op1, r_op2;
1416 gimple *stmt;
1417 gimple *header_phi = NULL;
1418 enum tree_code reduction_op;
1419 basic_block bb = gimple_bb (phi);
1420 struct loop *loop = bb->loop_father;
1421 edge latch_e = loop_latch_edge (loop);
1422 imm_use_iterator imm_iter;
1423 use_operand_p use_p;
1424 edge e;
1425 edge_iterator ei;
1426 bool result = false;
1427 if (TREE_CODE (arg_0) != SSA_NAME || TREE_CODE (arg_1) != SSA_NAME)
1428 return false;
1430 if (!extended && gimple_code (SSA_NAME_DEF_STMT (arg_0)) == GIMPLE_PHI)
1432 lhs = arg_1;
1433 header_phi = SSA_NAME_DEF_STMT (arg_0);
1434 stmt = SSA_NAME_DEF_STMT (arg_1);
1436 else if (gimple_code (SSA_NAME_DEF_STMT (arg_1)) == GIMPLE_PHI)
1438 lhs = arg_0;
1439 header_phi = SSA_NAME_DEF_STMT (arg_1);
1440 stmt = SSA_NAME_DEF_STMT (arg_0);
1442 else
1443 return false;
1444 if (gimple_bb (header_phi) != loop->header)
1445 return false;
1447 if (PHI_ARG_DEF_FROM_EDGE (header_phi, latch_e) != PHI_RESULT (phi))
1448 return false;
1450 if (gimple_code (stmt) != GIMPLE_ASSIGN
1451 || gimple_has_volatile_ops (stmt))
1452 return false;
1454 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
1455 return false;
1457 if (!is_predicated (gimple_bb (stmt)))
1458 return false;
1460 /* Check that stmt-block is predecessor of phi-block. */
1461 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->succs)
1462 if (e->dest == bb)
1464 result = true;
1465 break;
1467 if (!result)
1468 return false;
1470 if (!has_single_use (lhs))
1471 return false;
1473 reduction_op = gimple_assign_rhs_code (stmt);
1474 if (reduction_op != PLUS_EXPR && reduction_op != MINUS_EXPR)
1475 return false;
1476 r_op1 = gimple_assign_rhs1 (stmt);
1477 r_op2 = gimple_assign_rhs2 (stmt);
1479 /* Make R_OP1 to hold reduction variable. */
1480 if (r_op2 == PHI_RESULT (header_phi)
1481 && reduction_op == PLUS_EXPR)
1482 std::swap (r_op1, r_op2);
1483 else if (r_op1 != PHI_RESULT (header_phi))
1484 return false;
1486 /* Check that R_OP1 is used in reduction stmt or in PHI only. */
1487 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_op1)
1489 gimple *use_stmt = USE_STMT (use_p);
1490 if (is_gimple_debug (use_stmt))
1491 continue;
1492 if (use_stmt == stmt)
1493 continue;
1494 if (gimple_code (use_stmt) != GIMPLE_PHI)
1495 return false;
1498 *op0 = r_op1; *op1 = r_op2;
1499 *reduc = stmt;
1500 return true;
1503 /* Converts conditional scalar reduction into unconditional form, e.g.
1504 bb_4
1505 if (_5 != 0) goto bb_5 else goto bb_6
1506 end_bb_4
1507 bb_5
1508 res_6 = res_13 + 1;
1509 end_bb_5
1510 bb_6
1511 # res_2 = PHI <res_13(4), res_6(5)>
1512 end_bb_6
1514 will be converted into sequence
1515 _ifc__1 = _5 != 0 ? 1 : 0;
1516 res_2 = res_13 + _ifc__1;
1517 Argument SWAP tells that arguments of conditional expression should be
1518 swapped.
1519 Returns rhs of resulting PHI assignment. */
1521 static tree
1522 convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi,
1523 tree cond, tree op0, tree op1, bool swap)
1525 gimple_stmt_iterator stmt_it;
1526 gimple *new_assign;
1527 tree rhs;
1528 tree rhs1 = gimple_assign_rhs1 (reduc);
1529 tree tmp = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, "_ifc_");
1530 tree c;
1531 tree zero = build_zero_cst (TREE_TYPE (rhs1));
1533 if (dump_file && (dump_flags & TDF_DETAILS))
1535 fprintf (dump_file, "Found cond scalar reduction.\n");
1536 print_gimple_stmt (dump_file, reduc, 0, TDF_SLIM);
1539 /* Build cond expression using COND and constant operand
1540 of reduction rhs. */
1541 c = fold_build_cond_expr (TREE_TYPE (rhs1),
1542 unshare_expr (cond),
1543 swap ? zero : op1,
1544 swap ? op1 : zero);
1546 /* Create assignment stmt and insert it at GSI. */
1547 new_assign = gimple_build_assign (tmp, c);
1548 gsi_insert_before (gsi, new_assign, GSI_SAME_STMT);
1549 /* Build rhs for unconditional increment/decrement. */
1550 rhs = fold_build2 (gimple_assign_rhs_code (reduc),
1551 TREE_TYPE (rhs1), op0, tmp);
1553 /* Delete original reduction stmt. */
1554 stmt_it = gsi_for_stmt (reduc);
1555 gsi_remove (&stmt_it, true);
1556 release_defs (reduc);
1557 return rhs;
1560 /* Produce condition for all occurrences of ARG in PHI node. */
1562 static tree
1563 gen_phi_arg_condition (gphi *phi, vec<int> *occur,
1564 gimple_stmt_iterator *gsi)
1566 int len;
1567 int i;
1568 tree cond = NULL_TREE;
1569 tree c;
1570 edge e;
1572 len = occur->length ();
1573 gcc_assert (len > 0);
1574 for (i = 0; i < len; i++)
1576 e = gimple_phi_arg_edge (phi, (*occur)[i]);
1577 c = bb_predicate (e->src);
1578 if (is_true_predicate (c))
1579 continue;
1580 c = force_gimple_operand_gsi_1 (gsi, unshare_expr (c),
1581 is_gimple_condexpr, NULL_TREE,
1582 true, GSI_SAME_STMT);
1583 if (cond != NULL_TREE)
1585 /* Must build OR expression. */
1586 cond = fold_or_predicates (EXPR_LOCATION (c), c, cond);
1587 cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond),
1588 is_gimple_condexpr, NULL_TREE,
1589 true, GSI_SAME_STMT);
1591 else
1592 cond = c;
1594 gcc_assert (cond != NULL_TREE);
1595 return cond;
1598 /* Replace a scalar PHI node with a COND_EXPR using COND as condition.
1599 This routine can handle PHI nodes with more than two arguments.
1601 For example,
1602 S1: A = PHI <x1(1), x2(5)>
1603 is converted into,
1604 S2: A = cond ? x1 : x2;
1606 The generated code is inserted at GSI that points to the top of
1607 basic block's statement list.
1608 If PHI node has more than two arguments a chain of conditional
1609 expression is produced. */
1612 static void
1613 predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
1615 gimple *new_stmt = NULL, *reduc;
1616 tree rhs, res, arg0, arg1, op0, op1, scev;
1617 tree cond;
1618 unsigned int index0;
1619 unsigned int max, args_len;
1620 edge e;
1621 basic_block bb;
1622 unsigned int i;
1624 res = gimple_phi_result (phi);
1625 if (virtual_operand_p (res))
1626 return;
1628 if ((rhs = degenerate_phi_result (phi))
1629 || ((scev = analyze_scalar_evolution (gimple_bb (phi)->loop_father,
1630 res))
1631 && !chrec_contains_undetermined (scev)
1632 && scev != res
1633 && (rhs = gimple_phi_arg_def (phi, 0))))
1635 if (dump_file && (dump_flags & TDF_DETAILS))
1637 fprintf (dump_file, "Degenerate phi!\n");
1638 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
1640 new_stmt = gimple_build_assign (res, rhs);
1641 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1642 update_stmt (new_stmt);
1643 return;
1646 bb = gimple_bb (phi);
1647 if (EDGE_COUNT (bb->preds) == 2)
1649 /* Predicate ordinary PHI node with 2 arguments. */
1650 edge first_edge, second_edge;
1651 basic_block true_bb;
1652 first_edge = EDGE_PRED (bb, 0);
1653 second_edge = EDGE_PRED (bb, 1);
1654 cond = bb_predicate (first_edge->src);
1655 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1656 std::swap (first_edge, second_edge);
1657 if (EDGE_COUNT (first_edge->src->succs) > 1)
1659 cond = bb_predicate (second_edge->src);
1660 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1661 cond = TREE_OPERAND (cond, 0);
1662 else
1663 first_edge = second_edge;
1665 else
1666 cond = bb_predicate (first_edge->src);
1667 /* Gimplify the condition to a valid cond-expr conditonal operand. */
1668 cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond),
1669 is_gimple_condexpr, NULL_TREE,
1670 true, GSI_SAME_STMT);
1671 true_bb = first_edge->src;
1672 if (EDGE_PRED (bb, 1)->src == true_bb)
1674 arg0 = gimple_phi_arg_def (phi, 1);
1675 arg1 = gimple_phi_arg_def (phi, 0);
1677 else
1679 arg0 = gimple_phi_arg_def (phi, 0);
1680 arg1 = gimple_phi_arg_def (phi, 1);
1682 if (is_cond_scalar_reduction (phi, &reduc, arg0, arg1,
1683 &op0, &op1, false))
1684 /* Convert reduction stmt into vectorizable form. */
1685 rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
1686 true_bb != gimple_bb (reduc));
1687 else
1688 /* Build new RHS using selected condition and arguments. */
1689 rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
1690 arg0, arg1);
1691 new_stmt = gimple_build_assign (res, rhs);
1692 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1693 update_stmt (new_stmt);
1695 if (dump_file && (dump_flags & TDF_DETAILS))
1697 fprintf (dump_file, "new phi replacement stmt\n");
1698 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
1700 return;
1703 /* Create hashmap for PHI node which contain vector of argument indexes
1704 having the same value. */
1705 bool swap = false;
1706 hash_map<tree_operand_hash, auto_vec<int> > phi_arg_map;
1707 unsigned int num_args = gimple_phi_num_args (phi);
1708 int max_ind = -1;
1709 /* Vector of different PHI argument values. */
1710 auto_vec<tree> args (num_args);
1712 /* Compute phi_arg_map. */
1713 for (i = 0; i < num_args; i++)
1715 tree arg;
1717 arg = gimple_phi_arg_def (phi, i);
1718 if (!phi_arg_map.get (arg))
1719 args.quick_push (arg);
1720 phi_arg_map.get_or_insert (arg).safe_push (i);
1723 /* Determine element with max number of occurrences. */
1724 max_ind = -1;
1725 max = 1;
1726 args_len = args.length ();
1727 for (i = 0; i < args_len; i++)
1729 unsigned int len;
1730 if ((len = phi_arg_map.get (args[i])->length ()) > max)
1732 max_ind = (int) i;
1733 max = len;
1737 /* Put element with max number of occurences to the end of ARGS. */
1738 if (max_ind != -1 && max_ind +1 != (int) args_len)
1739 std::swap (args[args_len - 1], args[max_ind]);
1741 /* Handle one special case when number of arguments with different values
1742 is equal 2 and one argument has the only occurrence. Such PHI can be
1743 handled as if would have only 2 arguments. */
1744 if (args_len == 2 && phi_arg_map.get (args[0])->length () == 1)
1746 vec<int> *indexes;
1747 indexes = phi_arg_map.get (args[0]);
1748 index0 = (*indexes)[0];
1749 arg0 = args[0];
1750 arg1 = args[1];
1751 e = gimple_phi_arg_edge (phi, index0);
1752 cond = bb_predicate (e->src);
1753 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1755 swap = true;
1756 cond = TREE_OPERAND (cond, 0);
1758 /* Gimplify the condition to a valid cond-expr conditonal operand. */
1759 cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond),
1760 is_gimple_condexpr, NULL_TREE,
1761 true, GSI_SAME_STMT);
1762 if (!(is_cond_scalar_reduction (phi, &reduc, arg0 , arg1,
1763 &op0, &op1, true)))
1764 rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
1765 swap? arg1 : arg0,
1766 swap? arg0 : arg1);
1767 else
1768 /* Convert reduction stmt into vectorizable form. */
1769 rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
1770 swap);
1771 new_stmt = gimple_build_assign (res, rhs);
1772 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1773 update_stmt (new_stmt);
1775 else
1777 /* Common case. */
1778 vec<int> *indexes;
1779 tree type = TREE_TYPE (gimple_phi_result (phi));
1780 tree lhs;
1781 arg1 = args[1];
1782 for (i = 0; i < args_len; i++)
1784 arg0 = args[i];
1785 indexes = phi_arg_map.get (args[i]);
1786 if (i != args_len - 1)
1787 lhs = make_temp_ssa_name (type, NULL, "_ifc_");
1788 else
1789 lhs = res;
1790 cond = gen_phi_arg_condition (phi, indexes, gsi);
1791 rhs = fold_build_cond_expr (type, unshare_expr (cond),
1792 arg0, arg1);
1793 new_stmt = gimple_build_assign (lhs, rhs);
1794 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1795 update_stmt (new_stmt);
1796 arg1 = lhs;
1800 if (dump_file && (dump_flags & TDF_DETAILS))
1802 fprintf (dump_file, "new extended phi replacement stmt\n");
1803 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
1807 /* Replaces in LOOP all the scalar phi nodes other than those in the
1808 LOOP->header block with conditional modify expressions. */
1810 static void
1811 predicate_all_scalar_phis (struct loop *loop)
1813 basic_block bb;
1814 unsigned int orig_loop_num_nodes = loop->num_nodes;
1815 unsigned int i;
1817 for (i = 1; i < orig_loop_num_nodes; i++)
1819 gphi *phi;
1820 gimple_stmt_iterator gsi;
1821 gphi_iterator phi_gsi;
1822 bb = ifc_bbs[i];
1824 if (bb == loop->header)
1825 continue;
1827 if (EDGE_COUNT (bb->preds) == 1)
1828 continue;
1830 phi_gsi = gsi_start_phis (bb);
1831 if (gsi_end_p (phi_gsi))
1832 continue;
1834 gsi = gsi_after_labels (bb);
1835 while (!gsi_end_p (phi_gsi))
1837 phi = phi_gsi.phi ();
1838 predicate_scalar_phi (phi, &gsi);
1839 release_phi_node (phi);
1840 gsi_next (&phi_gsi);
1843 set_phi_nodes (bb, NULL);
1847 /* Insert in each basic block of LOOP the statements produced by the
1848 gimplification of the predicates. */
1850 static void
1851 insert_gimplified_predicates (loop_p loop, bool any_mask_load_store)
1853 unsigned int i;
1855 for (i = 0; i < loop->num_nodes; i++)
1857 basic_block bb = ifc_bbs[i];
1858 gimple_seq stmts;
1859 if (!is_predicated (bb))
1860 gcc_assert (bb_predicate_gimplified_stmts (bb) == NULL);
1861 if (!is_predicated (bb))
1863 /* Do not insert statements for a basic block that is not
1864 predicated. Also make sure that the predicate of the
1865 basic block is set to true. */
1866 reset_bb_predicate (bb);
1867 continue;
1870 stmts = bb_predicate_gimplified_stmts (bb);
1871 if (stmts)
1873 if (flag_tree_loop_if_convert_stores
1874 || any_mask_load_store)
1876 /* Insert the predicate of the BB just after the label,
1877 as the if-conversion of memory writes will use this
1878 predicate. */
1879 gimple_stmt_iterator gsi = gsi_after_labels (bb);
1880 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1882 else
1884 /* Insert the predicate of the BB at the end of the BB
1885 as this would reduce the register pressure: the only
1886 use of this predicate will be in successor BBs. */
1887 gimple_stmt_iterator gsi = gsi_last_bb (bb);
1889 if (gsi_end_p (gsi)
1890 || stmt_ends_bb_p (gsi_stmt (gsi)))
1891 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1892 else
1893 gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
1896 /* Once the sequence is code generated, set it to NULL. */
1897 set_bb_predicate_gimplified_stmts (bb, NULL);
1902 /* Helper function for predicate_mem_writes. Returns index of existent
1903 mask if it was created for given SIZE and -1 otherwise. */
1905 static int
1906 mask_exists (int size, vec<int> vec)
1908 unsigned int ix;
1909 int v;
1910 FOR_EACH_VEC_ELT (vec, ix, v)
1911 if (v == size)
1912 return (int) ix;
1913 return -1;
1916 /* Predicate each write to memory in LOOP.
1918 This function transforms control flow constructs containing memory
1919 writes of the form:
1921 | for (i = 0; i < N; i++)
1922 | if (cond)
1923 | A[i] = expr;
1925 into the following form that does not contain control flow:
1927 | for (i = 0; i < N; i++)
1928 | A[i] = cond ? expr : A[i];
1930 The original CFG looks like this:
1932 | bb_0
1933 | i = 0
1934 | end_bb_0
1936 | bb_1
1937 | if (i < N) goto bb_5 else goto bb_2
1938 | end_bb_1
1940 | bb_2
1941 | cond = some_computation;
1942 | if (cond) goto bb_3 else goto bb_4
1943 | end_bb_2
1945 | bb_3
1946 | A[i] = expr;
1947 | goto bb_4
1948 | end_bb_3
1950 | bb_4
1951 | goto bb_1
1952 | end_bb_4
1954 insert_gimplified_predicates inserts the computation of the COND
1955 expression at the beginning of the destination basic block:
1957 | bb_0
1958 | i = 0
1959 | end_bb_0
1961 | bb_1
1962 | if (i < N) goto bb_5 else goto bb_2
1963 | end_bb_1
1965 | bb_2
1966 | cond = some_computation;
1967 | if (cond) goto bb_3 else goto bb_4
1968 | end_bb_2
1970 | bb_3
1971 | cond = some_computation;
1972 | A[i] = expr;
1973 | goto bb_4
1974 | end_bb_3
1976 | bb_4
1977 | goto bb_1
1978 | end_bb_4
1980 predicate_mem_writes is then predicating the memory write as follows:
1982 | bb_0
1983 | i = 0
1984 | end_bb_0
1986 | bb_1
1987 | if (i < N) goto bb_5 else goto bb_2
1988 | end_bb_1
1990 | bb_2
1991 | if (cond) goto bb_3 else goto bb_4
1992 | end_bb_2
1994 | bb_3
1995 | cond = some_computation;
1996 | A[i] = cond ? expr : A[i];
1997 | goto bb_4
1998 | end_bb_3
2000 | bb_4
2001 | goto bb_1
2002 | end_bb_4
2004 and finally combine_blocks removes the basic block boundaries making
2005 the loop vectorizable:
2007 | bb_0
2008 | i = 0
2009 | if (i < N) goto bb_5 else goto bb_1
2010 | end_bb_0
2012 | bb_1
2013 | cond = some_computation;
2014 | A[i] = cond ? expr : A[i];
2015 | if (i < N) goto bb_5 else goto bb_4
2016 | end_bb_1
2018 | bb_4
2019 | goto bb_1
2020 | end_bb_4
2023 static void
2024 predicate_mem_writes (loop_p loop)
2026 unsigned int i, orig_loop_num_nodes = loop->num_nodes;
2027 auto_vec<int, 1> vect_sizes;
2028 auto_vec<tree, 1> vect_masks;
2030 for (i = 1; i < orig_loop_num_nodes; i++)
2032 gimple_stmt_iterator gsi;
2033 basic_block bb = ifc_bbs[i];
2034 tree cond = bb_predicate (bb);
2035 bool swap;
2036 gimple *stmt;
2037 int index;
2039 if (is_true_predicate (cond))
2040 continue;
2042 swap = false;
2043 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2045 swap = true;
2046 cond = TREE_OPERAND (cond, 0);
2049 vect_sizes.truncate (0);
2050 vect_masks.truncate (0);
2052 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2053 if (!gimple_assign_single_p (stmt = gsi_stmt (gsi)))
2054 continue;
2055 else if (gimple_plf (stmt, GF_PLF_2))
2057 tree lhs = gimple_assign_lhs (stmt);
2058 tree rhs = gimple_assign_rhs1 (stmt);
2059 tree ref, addr, ptr, mask;
2060 gimple *new_stmt;
2061 gimple_seq stmts = NULL;
2062 int bitsize = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (lhs)));
2063 ref = TREE_CODE (lhs) == SSA_NAME ? rhs : lhs;
2064 mark_addressable (ref);
2065 addr = force_gimple_operand_gsi (&gsi, build_fold_addr_expr (ref),
2066 true, NULL_TREE, true,
2067 GSI_SAME_STMT);
2068 if (!vect_sizes.is_empty ()
2069 && (index = mask_exists (bitsize, vect_sizes)) != -1)
2070 /* Use created mask. */
2071 mask = vect_masks[index];
2072 else
2074 if (COMPARISON_CLASS_P (cond))
2075 mask = gimple_build (&stmts, TREE_CODE (cond),
2076 boolean_type_node,
2077 TREE_OPERAND (cond, 0),
2078 TREE_OPERAND (cond, 1));
2079 else
2081 gcc_assert (TREE_CODE (cond) == SSA_NAME);
2082 mask = cond;
2085 if (swap)
2087 tree true_val
2088 = constant_boolean_node (true, TREE_TYPE (mask));
2089 mask = gimple_build (&stmts, BIT_XOR_EXPR,
2090 TREE_TYPE (mask), mask, true_val);
2092 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2094 mask = ifc_temp_var (TREE_TYPE (mask), mask, &gsi);
2095 /* Save mask and its size for further use. */
2096 vect_sizes.safe_push (bitsize);
2097 vect_masks.safe_push (mask);
2099 ptr = build_int_cst (reference_alias_ptr_type (ref), 0);
2100 /* Copy points-to info if possible. */
2101 if (TREE_CODE (addr) == SSA_NAME && !SSA_NAME_PTR_INFO (addr))
2102 copy_ref_info (build2 (MEM_REF, TREE_TYPE (ref), addr, ptr),
2103 ref);
2104 if (TREE_CODE (lhs) == SSA_NAME)
2106 new_stmt
2107 = gimple_build_call_internal (IFN_MASK_LOAD, 3, addr,
2108 ptr, mask);
2109 gimple_call_set_lhs (new_stmt, lhs);
2111 else
2112 new_stmt
2113 = gimple_build_call_internal (IFN_MASK_STORE, 4, addr, ptr,
2114 mask, rhs);
2115 gsi_replace (&gsi, new_stmt, true);
2117 else if (gimple_vdef (stmt))
2119 tree lhs = gimple_assign_lhs (stmt);
2120 tree rhs = gimple_assign_rhs1 (stmt);
2121 tree type = TREE_TYPE (lhs);
2123 lhs = ifc_temp_var (type, unshare_expr (lhs), &gsi);
2124 rhs = ifc_temp_var (type, unshare_expr (rhs), &gsi);
2125 if (swap)
2126 std::swap (lhs, rhs);
2127 cond = force_gimple_operand_gsi_1 (&gsi, unshare_expr (cond),
2128 is_gimple_condexpr, NULL_TREE,
2129 true, GSI_SAME_STMT);
2130 rhs = fold_build_cond_expr (type, unshare_expr (cond), rhs, lhs);
2131 gimple_assign_set_rhs1 (stmt, ifc_temp_var (type, rhs, &gsi));
2132 update_stmt (stmt);
2137 /* Remove all GIMPLE_CONDs and GIMPLE_LABELs of all the basic blocks
2138 other than the exit and latch of the LOOP. Also resets the
2139 GIMPLE_DEBUG information. */
2141 static void
2142 remove_conditions_and_labels (loop_p loop)
2144 gimple_stmt_iterator gsi;
2145 unsigned int i;
2147 for (i = 0; i < loop->num_nodes; i++)
2149 basic_block bb = ifc_bbs[i];
2151 if (bb_with_exit_edge_p (loop, bb)
2152 || bb == loop->latch)
2153 continue;
2155 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
2156 switch (gimple_code (gsi_stmt (gsi)))
2158 case GIMPLE_COND:
2159 case GIMPLE_LABEL:
2160 gsi_remove (&gsi, true);
2161 break;
2163 case GIMPLE_DEBUG:
2164 /* ??? Should there be conditional GIMPLE_DEBUG_BINDs? */
2165 if (gimple_debug_bind_p (gsi_stmt (gsi)))
2167 gimple_debug_bind_reset_value (gsi_stmt (gsi));
2168 update_stmt (gsi_stmt (gsi));
2170 gsi_next (&gsi);
2171 break;
2173 default:
2174 gsi_next (&gsi);
2179 /* Combine all the basic blocks from LOOP into one or two super basic
2180 blocks. Replace PHI nodes with conditional modify expressions. */
2182 static void
2183 combine_blocks (struct loop *loop, bool any_mask_load_store)
2185 basic_block bb, exit_bb, merge_target_bb;
2186 unsigned int orig_loop_num_nodes = loop->num_nodes;
2187 unsigned int i;
2188 edge e;
2189 edge_iterator ei;
2191 predicate_bbs (loop);
2192 remove_conditions_and_labels (loop);
2193 insert_gimplified_predicates (loop, any_mask_load_store);
2194 predicate_all_scalar_phis (loop);
2196 if (flag_tree_loop_if_convert_stores || any_mask_load_store)
2197 predicate_mem_writes (loop);
2199 /* Merge basic blocks: first remove all the edges in the loop,
2200 except for those from the exit block. */
2201 exit_bb = NULL;
2202 bool *predicated = XNEWVEC (bool, orig_loop_num_nodes);
2203 for (i = 0; i < orig_loop_num_nodes; i++)
2205 bb = ifc_bbs[i];
2206 predicated[i] = !is_true_predicate (bb_predicate (bb));
2207 free_bb_predicate (bb);
2208 if (bb_with_exit_edge_p (loop, bb))
2210 gcc_assert (exit_bb == NULL);
2211 exit_bb = bb;
2214 gcc_assert (exit_bb != loop->latch);
2216 for (i = 1; i < orig_loop_num_nodes; i++)
2218 bb = ifc_bbs[i];
2220 for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));)
2222 if (e->src == exit_bb)
2223 ei_next (&ei);
2224 else
2225 remove_edge (e);
2229 if (exit_bb != NULL)
2231 if (exit_bb != loop->header)
2233 /* Connect this node to loop header. */
2234 make_edge (loop->header, exit_bb, EDGE_FALLTHRU);
2235 set_immediate_dominator (CDI_DOMINATORS, exit_bb, loop->header);
2238 /* Redirect non-exit edges to loop->latch. */
2239 FOR_EACH_EDGE (e, ei, exit_bb->succs)
2241 if (!loop_exit_edge_p (loop, e))
2242 redirect_edge_and_branch (e, loop->latch);
2244 set_immediate_dominator (CDI_DOMINATORS, loop->latch, exit_bb);
2246 else
2248 /* If the loop does not have an exit, reconnect header and latch. */
2249 make_edge (loop->header, loop->latch, EDGE_FALLTHRU);
2250 set_immediate_dominator (CDI_DOMINATORS, loop->latch, loop->header);
2253 merge_target_bb = loop->header;
2254 for (i = 1; i < orig_loop_num_nodes; i++)
2256 gimple_stmt_iterator gsi;
2257 gimple_stmt_iterator last;
2259 bb = ifc_bbs[i];
2261 if (bb == exit_bb || bb == loop->latch)
2262 continue;
2264 /* Make stmts member of loop->header and clear range info from all stmts
2265 in BB which is now no longer executed conditional on a predicate we
2266 could have derived it from. */
2267 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2269 gimple *stmt = gsi_stmt (gsi);
2270 gimple_set_bb (stmt, merge_target_bb);
2271 if (predicated[i])
2273 ssa_op_iter i;
2274 tree op;
2275 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF)
2276 reset_flow_sensitive_info (op);
2280 /* Update stmt list. */
2281 last = gsi_last_bb (merge_target_bb);
2282 gsi_insert_seq_after (&last, bb_seq (bb), GSI_NEW_STMT);
2283 set_bb_seq (bb, NULL);
2285 delete_basic_block (bb);
2288 /* If possible, merge loop header to the block with the exit edge.
2289 This reduces the number of basic blocks to two, to please the
2290 vectorizer that handles only loops with two nodes. */
2291 if (exit_bb
2292 && exit_bb != loop->header
2293 && can_merge_blocks_p (loop->header, exit_bb))
2294 merge_blocks (loop->header, exit_bb);
2296 free (ifc_bbs);
2297 ifc_bbs = NULL;
2298 free (predicated);
2301 /* Version LOOP before if-converting it; the original loop
2302 will be if-converted, the new copy of the loop will not,
2303 and the LOOP_VECTORIZED internal call will be guarding which
2304 loop to execute. The vectorizer pass will fold this
2305 internal call into either true or false. */
2307 static bool
2308 version_loop_for_if_conversion (struct loop *loop)
2310 basic_block cond_bb;
2311 tree cond = make_ssa_name (boolean_type_node);
2312 struct loop *new_loop;
2313 gimple *g;
2314 gimple_stmt_iterator gsi;
2316 g = gimple_build_call_internal (IFN_LOOP_VECTORIZED, 2,
2317 build_int_cst (integer_type_node, loop->num),
2318 integer_zero_node);
2319 gimple_call_set_lhs (g, cond);
2321 initialize_original_copy_tables ();
2322 new_loop = loop_version (loop, cond, &cond_bb,
2323 REG_BR_PROB_BASE, REG_BR_PROB_BASE,
2324 REG_BR_PROB_BASE, true);
2325 free_original_copy_tables ();
2326 if (new_loop == NULL)
2327 return false;
2328 new_loop->dont_vectorize = true;
2329 new_loop->force_vectorize = false;
2330 gsi = gsi_last_bb (cond_bb);
2331 gimple_call_set_arg (g, 1, build_int_cst (integer_type_node, new_loop->num));
2332 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2333 update_ssa (TODO_update_ssa);
2334 return true;
2337 /* Performs splitting of critical edges if aggressive_if_conv is true.
2338 Returns false if loop won't be if converted and true otherwise. */
2340 static bool
2341 ifcvt_split_critical_edges (struct loop *loop)
2343 basic_block *body;
2344 basic_block bb;
2345 unsigned int num = loop->num_nodes;
2346 unsigned int i;
2347 gimple *stmt;
2348 edge e;
2349 edge_iterator ei;
2351 if (num <= 2)
2352 return false;
2353 if (loop->inner)
2354 return false;
2355 if (!single_exit (loop))
2356 return false;
2358 body = get_loop_body (loop);
2359 for (i = 0; i < num; i++)
2361 bb = body[i];
2362 if (bb == loop->latch
2363 || bb_with_exit_edge_p (loop, bb))
2364 continue;
2365 stmt = last_stmt (bb);
2366 /* Skip basic blocks not ending with conditional branch. */
2367 if (!(stmt && gimple_code (stmt) == GIMPLE_COND))
2368 continue;
2369 FOR_EACH_EDGE (e, ei, bb->succs)
2370 if (EDGE_CRITICAL_P (e) && e->dest->loop_father == loop)
2371 split_edge (e);
2373 free (body);
2374 return true;
2377 /* Assumes that lhs of DEF_STMT have multiple uses.
2378 Delete one use by (1) creation of copy DEF_STMT with
2379 unique lhs; (2) change original use of lhs in one
2380 use statement with newly created lhs. */
2382 static void
2383 ifcvt_split_def_stmt (gimple *def_stmt, gimple *use_stmt)
2385 tree var;
2386 tree lhs;
2387 gimple *copy_stmt;
2388 gimple_stmt_iterator gsi;
2389 use_operand_p use_p;
2390 imm_use_iterator imm_iter;
2392 var = gimple_assign_lhs (def_stmt);
2393 copy_stmt = gimple_copy (def_stmt);
2394 lhs = make_temp_ssa_name (TREE_TYPE (var), NULL, "_ifc_");
2395 gimple_assign_set_lhs (copy_stmt, lhs);
2396 SSA_NAME_DEF_STMT (lhs) = copy_stmt;
2397 /* Insert copy of DEF_STMT. */
2398 gsi = gsi_for_stmt (def_stmt);
2399 gsi_insert_after (&gsi, copy_stmt, GSI_SAME_STMT);
2400 /* Change use of var to lhs in use_stmt. */
2401 if (dump_file && (dump_flags & TDF_DETAILS))
2403 fprintf (dump_file, "Change use of var ");
2404 print_generic_expr (dump_file, var, TDF_SLIM);
2405 fprintf (dump_file, " to ");
2406 print_generic_expr (dump_file, lhs, TDF_SLIM);
2407 fprintf (dump_file, "\n");
2409 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, var)
2411 if (USE_STMT (use_p) != use_stmt)
2412 continue;
2413 SET_USE (use_p, lhs);
2414 break;
2418 /* Traverse bool pattern recursively starting from VAR.
2419 Save its def and use statements to defuse_list if VAR does
2420 not have single use. */
2422 static void
2423 ifcvt_walk_pattern_tree (tree var, vec<gimple *> *defuse_list,
2424 gimple *use_stmt)
2426 tree rhs1, rhs2;
2427 enum tree_code code;
2428 gimple *def_stmt;
2430 def_stmt = SSA_NAME_DEF_STMT (var);
2431 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
2432 return;
2433 if (!has_single_use (var))
2435 /* Put def and use stmts into defuse_list. */
2436 defuse_list->safe_push (def_stmt);
2437 defuse_list->safe_push (use_stmt);
2438 if (dump_file && (dump_flags & TDF_DETAILS))
2440 fprintf (dump_file, "Multiple lhs uses in stmt\n");
2441 print_gimple_stmt (dump_file, def_stmt, 0, TDF_SLIM);
2444 rhs1 = gimple_assign_rhs1 (def_stmt);
2445 code = gimple_assign_rhs_code (def_stmt);
2446 switch (code)
2448 case SSA_NAME:
2449 ifcvt_walk_pattern_tree (rhs1, defuse_list, def_stmt);
2450 break;
2451 CASE_CONVERT:
2452 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2453 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2454 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2455 break;
2456 ifcvt_walk_pattern_tree (rhs1, defuse_list, def_stmt);
2457 break;
2458 case BIT_NOT_EXPR:
2459 ifcvt_walk_pattern_tree (rhs1, defuse_list, def_stmt);
2460 break;
2461 case BIT_AND_EXPR:
2462 case BIT_IOR_EXPR:
2463 case BIT_XOR_EXPR:
2464 ifcvt_walk_pattern_tree (rhs1, defuse_list, def_stmt);
2465 rhs2 = gimple_assign_rhs2 (def_stmt);
2466 ifcvt_walk_pattern_tree (rhs2, defuse_list, def_stmt);
2467 break;
2468 default:
2469 break;
2471 return;
2474 /* Returns true if STMT can be a root of bool pattern applied
2475 by vectorizer. */
2477 static bool
2478 stmt_is_root_of_bool_pattern (gimple *stmt)
2480 enum tree_code code;
2481 tree lhs, rhs;
2483 code = gimple_assign_rhs_code (stmt);
2484 if (CONVERT_EXPR_CODE_P (code))
2486 lhs = gimple_assign_lhs (stmt);
2487 rhs = gimple_assign_rhs1 (stmt);
2488 if (TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2489 return false;
2490 if (TREE_CODE (TREE_TYPE (lhs)) == BOOLEAN_TYPE)
2491 return false;
2492 return true;
2494 else if (code == COND_EXPR)
2496 rhs = gimple_assign_rhs1 (stmt);
2497 if (TREE_CODE (rhs) != SSA_NAME)
2498 return false;
2499 return true;
2501 return false;
2504 /* Traverse all statements in BB which correspond to loop header to
2505 find out all statements which can start bool pattern applied by
2506 vectorizer and convert multiple uses in it to conform pattern
2507 restrictions. Such case can occur if the same predicate is used both
2508 for phi node conversion and load/store mask. */
2510 static void
2511 ifcvt_repair_bool_pattern (basic_block bb)
2513 tree rhs;
2514 gimple *stmt;
2515 gimple_stmt_iterator gsi;
2516 vec<gimple *> defuse_list = vNULL;
2517 vec<gimple *> pattern_roots = vNULL;
2518 bool repeat = true;
2519 int niter = 0;
2520 unsigned int ix;
2522 /* Collect all root pattern statements. */
2523 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2525 stmt = gsi_stmt (gsi);
2526 if (gimple_code (stmt) != GIMPLE_ASSIGN)
2527 continue;
2528 if (!stmt_is_root_of_bool_pattern (stmt))
2529 continue;
2530 pattern_roots.safe_push (stmt);
2533 if (pattern_roots.is_empty ())
2534 return;
2536 /* Split all statements with multiple uses iteratively since splitting
2537 may create new multiple uses. */
2538 while (repeat)
2540 repeat = false;
2541 niter++;
2542 FOR_EACH_VEC_ELT (pattern_roots, ix, stmt)
2544 rhs = gimple_assign_rhs1 (stmt);
2545 ifcvt_walk_pattern_tree (rhs, &defuse_list, stmt);
2546 while (defuse_list.length () > 0)
2548 repeat = true;
2549 gimple *def_stmt, *use_stmt;
2550 use_stmt = defuse_list.pop ();
2551 def_stmt = defuse_list.pop ();
2552 ifcvt_split_def_stmt (def_stmt, use_stmt);
2557 if (dump_file && (dump_flags & TDF_DETAILS))
2558 fprintf (dump_file, "Repair bool pattern takes %d iterations. \n",
2559 niter);
2562 /* Delete redundant statements produced by predication which prevents
2563 loop vectorization. */
2565 static void
2566 ifcvt_local_dce (basic_block bb)
2568 gimple *stmt;
2569 gimple *stmt1;
2570 gimple *phi;
2571 gimple_stmt_iterator gsi;
2572 vec<gimple *> worklist;
2573 enum gimple_code code;
2574 use_operand_p use_p;
2575 imm_use_iterator imm_iter;
2577 worklist.create (64);
2578 /* Consider all phi as live statements. */
2579 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2581 phi = gsi_stmt (gsi);
2582 gimple_set_plf (phi, GF_PLF_2, true);
2583 worklist.safe_push (phi);
2585 /* Consider load/store statements, CALL and COND as live. */
2586 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2588 stmt = gsi_stmt (gsi);
2589 if (gimple_store_p (stmt)
2590 || gimple_assign_load_p (stmt)
2591 || is_gimple_debug (stmt))
2593 gimple_set_plf (stmt, GF_PLF_2, true);
2594 worklist.safe_push (stmt);
2595 continue;
2597 code = gimple_code (stmt);
2598 if (code == GIMPLE_COND || code == GIMPLE_CALL)
2600 gimple_set_plf (stmt, GF_PLF_2, true);
2601 worklist.safe_push (stmt);
2602 continue;
2604 gimple_set_plf (stmt, GF_PLF_2, false);
2606 if (code == GIMPLE_ASSIGN)
2608 tree lhs = gimple_assign_lhs (stmt);
2609 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2611 stmt1 = USE_STMT (use_p);
2612 if (gimple_bb (stmt1) != bb)
2614 gimple_set_plf (stmt, GF_PLF_2, true);
2615 worklist.safe_push (stmt);
2616 break;
2621 /* Propagate liveness through arguments of live stmt. */
2622 while (worklist.length () > 0)
2624 ssa_op_iter iter;
2625 use_operand_p use_p;
2626 tree use;
2628 stmt = worklist.pop ();
2629 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2631 use = USE_FROM_PTR (use_p);
2632 if (TREE_CODE (use) != SSA_NAME)
2633 continue;
2634 stmt1 = SSA_NAME_DEF_STMT (use);
2635 if (gimple_bb (stmt1) != bb
2636 || gimple_plf (stmt1, GF_PLF_2))
2637 continue;
2638 gimple_set_plf (stmt1, GF_PLF_2, true);
2639 worklist.safe_push (stmt1);
2642 /* Delete dead statements. */
2643 gsi = gsi_start_bb (bb);
2644 while (!gsi_end_p (gsi))
2646 stmt = gsi_stmt (gsi);
2647 if (gimple_plf (stmt, GF_PLF_2))
2649 gsi_next (&gsi);
2650 continue;
2652 if (dump_file && (dump_flags & TDF_DETAILS))
2654 fprintf (dump_file, "Delete dead stmt in bb#%d\n", bb->index);
2655 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2657 gsi_remove (&gsi, true);
2658 release_defs (stmt);
2662 /* If-convert LOOP when it is legal. For the moment this pass has no
2663 profitability analysis. Returns non-zero todo flags when something
2664 changed. */
2666 static unsigned int
2667 tree_if_conversion (struct loop *loop)
2669 unsigned int todo = 0;
2670 ifc_bbs = NULL;
2671 bool any_mask_load_store = false;
2673 /* Set up aggressive if-conversion for loops marked with simd pragma. */
2674 aggressive_if_conv = loop->force_vectorize;
2675 /* Check either outer loop was marked with simd pragma. */
2676 if (!aggressive_if_conv)
2678 struct loop *outer_loop = loop_outer (loop);
2679 if (outer_loop && outer_loop->force_vectorize)
2680 aggressive_if_conv = true;
2683 if (aggressive_if_conv)
2684 if (!ifcvt_split_critical_edges (loop))
2685 goto cleanup;
2687 if (!if_convertible_loop_p (loop, &any_mask_load_store)
2688 || !dbg_cnt (if_conversion_tree))
2689 goto cleanup;
2691 if (any_mask_load_store
2692 && ((!flag_tree_loop_vectorize && !loop->force_vectorize)
2693 || loop->dont_vectorize))
2694 goto cleanup;
2696 if (any_mask_load_store && !version_loop_for_if_conversion (loop))
2697 goto cleanup;
2699 /* Now all statements are if-convertible. Combine all the basic
2700 blocks into one huge basic block doing the if-conversion
2701 on-the-fly. */
2702 combine_blocks (loop, any_mask_load_store);
2704 /* Delete dead predicate computations and repair tree correspondent
2705 to bool pattern to delete multiple uses of predicates. */
2706 if (aggressive_if_conv)
2708 ifcvt_local_dce (loop->header);
2709 ifcvt_repair_bool_pattern (loop->header);
2712 todo |= TODO_cleanup_cfg;
2713 if (flag_tree_loop_if_convert_stores || any_mask_load_store)
2715 mark_virtual_operands_for_renaming (cfun);
2716 todo |= TODO_update_ssa_only_virtuals;
2719 cleanup:
2720 if (ifc_bbs)
2722 unsigned int i;
2724 for (i = 0; i < loop->num_nodes; i++)
2725 free_bb_predicate (ifc_bbs[i]);
2727 free (ifc_bbs);
2728 ifc_bbs = NULL;
2730 free_dominance_info (CDI_POST_DOMINATORS);
2732 return todo;
2735 /* Tree if-conversion pass management. */
2737 namespace {
2739 const pass_data pass_data_if_conversion =
2741 GIMPLE_PASS, /* type */
2742 "ifcvt", /* name */
2743 OPTGROUP_NONE, /* optinfo_flags */
2744 TV_NONE, /* tv_id */
2745 ( PROP_cfg | PROP_ssa ), /* properties_required */
2746 0, /* properties_provided */
2747 0, /* properties_destroyed */
2748 0, /* todo_flags_start */
2749 0, /* todo_flags_finish */
2752 class pass_if_conversion : public gimple_opt_pass
2754 public:
2755 pass_if_conversion (gcc::context *ctxt)
2756 : gimple_opt_pass (pass_data_if_conversion, ctxt)
2759 /* opt_pass methods: */
2760 virtual bool gate (function *);
2761 virtual unsigned int execute (function *);
2763 }; // class pass_if_conversion
2765 bool
2766 pass_if_conversion::gate (function *fun)
2768 return (((flag_tree_loop_vectorize || fun->has_force_vectorize_loops)
2769 && flag_tree_loop_if_convert != 0)
2770 || flag_tree_loop_if_convert == 1
2771 || flag_tree_loop_if_convert_stores == 1);
2774 unsigned int
2775 pass_if_conversion::execute (function *fun)
2777 struct loop *loop;
2778 unsigned todo = 0;
2780 if (number_of_loops (fun) <= 1)
2781 return 0;
2783 FOR_EACH_LOOP (loop, 0)
2784 if (flag_tree_loop_if_convert == 1
2785 || flag_tree_loop_if_convert_stores == 1
2786 || ((flag_tree_loop_vectorize || loop->force_vectorize)
2787 && !loop->dont_vectorize))
2788 todo |= tree_if_conversion (loop);
2790 if (flag_checking)
2792 basic_block bb;
2793 FOR_EACH_BB_FN (bb, fun)
2794 gcc_assert (!bb->aux);
2797 return todo;
2800 } // anon namespace
2802 gimple_opt_pass *
2803 make_pass_if_conversion (gcc::context *ctxt)
2805 return new pass_if_conversion (ctxt);