[AArch64] PR target/68129: Define TARGET_SUPPORTS_WIDE_INT
[official-gcc.git] / gcc / tree-if-conv.c
blob61ec39040f6ac0c15de58ebc93148f29d33f9337
1 /* If-conversion for vectorizer.
2 Copyright (C) 2004-2015 Free Software Foundation, Inc.
3 Contributed by Devang Patel <dpatel@apple.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass implements a tree level if-conversion of loops. Its
22 initial goal is to help the vectorizer to vectorize loops with
23 conditions.
25 A short description of if-conversion:
27 o Decide if a loop is if-convertible or not.
28 o Walk all loop basic blocks in breadth first order (BFS order).
29 o Remove conditional statements (at the end of basic block)
30 and propagate condition into destination basic blocks'
31 predicate list.
32 o Replace modify expression with conditional modify expression
33 using current basic block's condition.
34 o Merge all basic blocks
35 o Replace phi nodes with conditional modify expr
36 o Merge all basic blocks into header
38 Sample transformation:
40 INPUT
41 -----
43 # i_23 = PHI <0(0), i_18(10)>;
44 <L0>:;
45 j_15 = A[i_23];
46 if (j_15 > 41) goto <L1>; else goto <L17>;
48 <L17>:;
49 goto <bb 3> (<L3>);
51 <L1>:;
53 # iftmp.2_4 = PHI <0(8), 42(2)>;
54 <L3>:;
55 A[i_23] = iftmp.2_4;
56 i_18 = i_23 + 1;
57 if (i_18 <= 15) goto <L19>; else goto <L18>;
59 <L19>:;
60 goto <bb 1> (<L0>);
62 <L18>:;
64 OUTPUT
65 ------
67 # i_23 = PHI <0(0), i_18(10)>;
68 <L0>:;
69 j_15 = A[i_23];
71 <L3>:;
72 iftmp.2_4 = j_15 > 41 ? 42 : 0;
73 A[i_23] = iftmp.2_4;
74 i_18 = i_23 + 1;
75 if (i_18 <= 15) goto <L19>; else goto <L18>;
77 <L19>:;
78 goto <bb 1> (<L0>);
80 <L18>:;
83 #include "config.h"
84 #include "system.h"
85 #include "coretypes.h"
86 #include "backend.h"
87 #include "rtl.h"
88 #include "tree.h"
89 #include "gimple.h"
90 #include "cfghooks.h"
91 #include "tree-pass.h"
92 #include "ssa.h"
93 #include "expmed.h"
94 #include "optabs-query.h"
95 #include "gimple-pretty-print.h"
96 #include "alias.h"
97 #include "fold-const.h"
98 #include "stor-layout.h"
99 #include "gimple-fold.h"
100 #include "gimplify.h"
101 #include "gimple-iterator.h"
102 #include "gimplify-me.h"
103 #include "tree-cfg.h"
104 #include "tree-into-ssa.h"
105 #include "tree-ssa.h"
106 #include "cfgloop.h"
107 #include "tree-data-ref.h"
108 #include "tree-scalar-evolution.h"
109 #include "tree-ssa-loop-ivopts.h"
110 #include "tree-ssa-address.h"
111 #include "dbgcnt.h"
112 #include "tree-hash-traits.h"
114 /* List of basic blocks in if-conversion-suitable order. */
115 static basic_block *ifc_bbs;
117 /* Apply more aggressive (extended) if-conversion if true. */
118 static bool aggressive_if_conv;
120 /* Structure used to predicate basic blocks. This is attached to the
121 ->aux field of the BBs in the loop to be if-converted. */
122 struct bb_predicate {
124 /* The condition under which this basic block is executed. */
125 tree predicate;
127 /* PREDICATE is gimplified, and the sequence of statements is
128 recorded here, in order to avoid the duplication of computations
129 that occur in previous conditions. See PR44483. */
130 gimple_seq predicate_gimplified_stmts;
133 /* Returns true when the basic block BB has a predicate. */
135 static inline bool
136 bb_has_predicate (basic_block bb)
138 return bb->aux != NULL;
141 /* Returns the gimplified predicate for basic block BB. */
143 static inline tree
144 bb_predicate (basic_block bb)
146 return ((struct bb_predicate *) bb->aux)->predicate;
149 /* Sets the gimplified predicate COND for basic block BB. */
151 static inline void
152 set_bb_predicate (basic_block bb, tree cond)
154 gcc_assert ((TREE_CODE (cond) == TRUTH_NOT_EXPR
155 && is_gimple_condexpr (TREE_OPERAND (cond, 0)))
156 || is_gimple_condexpr (cond));
157 ((struct bb_predicate *) bb->aux)->predicate = cond;
160 /* Returns the sequence of statements of the gimplification of the
161 predicate for basic block BB. */
163 static inline gimple_seq
164 bb_predicate_gimplified_stmts (basic_block bb)
166 return ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts;
169 /* Sets the sequence of statements STMTS of the gimplification of the
170 predicate for basic block BB. */
172 static inline void
173 set_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
175 ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts = stmts;
178 /* Adds the sequence of statements STMTS to the sequence of statements
179 of the predicate for basic block BB. */
181 static inline void
182 add_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
184 gimple_seq_add_seq
185 (&(((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts), stmts);
188 /* Initializes to TRUE the predicate of basic block BB. */
190 static inline void
191 init_bb_predicate (basic_block bb)
193 bb->aux = XNEW (struct bb_predicate);
194 set_bb_predicate_gimplified_stmts (bb, NULL);
195 set_bb_predicate (bb, boolean_true_node);
198 /* Release the SSA_NAMEs associated with the predicate of basic block BB,
199 but don't actually free it. */
201 static inline void
202 release_bb_predicate (basic_block bb)
204 gimple_seq stmts = bb_predicate_gimplified_stmts (bb);
205 if (stmts)
207 gimple_stmt_iterator i;
209 for (i = gsi_start (stmts); !gsi_end_p (i); gsi_next (&i))
210 free_stmt_operands (cfun, gsi_stmt (i));
211 set_bb_predicate_gimplified_stmts (bb, NULL);
215 /* Free the predicate of basic block BB. */
217 static inline void
218 free_bb_predicate (basic_block bb)
220 if (!bb_has_predicate (bb))
221 return;
223 release_bb_predicate (bb);
224 free (bb->aux);
225 bb->aux = NULL;
228 /* Reinitialize predicate of BB with the true predicate. */
230 static inline void
231 reset_bb_predicate (basic_block bb)
233 if (!bb_has_predicate (bb))
234 init_bb_predicate (bb);
235 else
237 release_bb_predicate (bb);
238 set_bb_predicate (bb, boolean_true_node);
242 /* Returns a new SSA_NAME of type TYPE that is assigned the value of
243 the expression EXPR. Inserts the statement created for this
244 computation before GSI and leaves the iterator GSI at the same
245 statement. */
247 static tree
248 ifc_temp_var (tree type, tree expr, gimple_stmt_iterator *gsi)
250 tree new_name = make_temp_ssa_name (type, NULL, "_ifc_");
251 gimple *stmt = gimple_build_assign (new_name, expr);
252 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
253 return new_name;
256 /* Return true when COND is a true predicate. */
258 static inline bool
259 is_true_predicate (tree cond)
261 return (cond == NULL_TREE
262 || cond == boolean_true_node
263 || integer_onep (cond));
266 /* Returns true when BB has a predicate that is not trivial: true or
267 NULL_TREE. */
269 static inline bool
270 is_predicated (basic_block bb)
272 return !is_true_predicate (bb_predicate (bb));
275 /* Parses the predicate COND and returns its comparison code and
276 operands OP0 and OP1. */
278 static enum tree_code
279 parse_predicate (tree cond, tree *op0, tree *op1)
281 gimple *s;
283 if (TREE_CODE (cond) == SSA_NAME
284 && is_gimple_assign (s = SSA_NAME_DEF_STMT (cond)))
286 if (TREE_CODE_CLASS (gimple_assign_rhs_code (s)) == tcc_comparison)
288 *op0 = gimple_assign_rhs1 (s);
289 *op1 = gimple_assign_rhs2 (s);
290 return gimple_assign_rhs_code (s);
293 else if (gimple_assign_rhs_code (s) == TRUTH_NOT_EXPR)
295 tree op = gimple_assign_rhs1 (s);
296 tree type = TREE_TYPE (op);
297 enum tree_code code = parse_predicate (op, op0, op1);
299 return code == ERROR_MARK ? ERROR_MARK
300 : invert_tree_comparison (code, HONOR_NANS (type));
303 return ERROR_MARK;
306 if (COMPARISON_CLASS_P (cond))
308 *op0 = TREE_OPERAND (cond, 0);
309 *op1 = TREE_OPERAND (cond, 1);
310 return TREE_CODE (cond);
313 return ERROR_MARK;
316 /* Returns the fold of predicate C1 OR C2 at location LOC. */
318 static tree
319 fold_or_predicates (location_t loc, tree c1, tree c2)
321 tree op1a, op1b, op2a, op2b;
322 enum tree_code code1 = parse_predicate (c1, &op1a, &op1b);
323 enum tree_code code2 = parse_predicate (c2, &op2a, &op2b);
325 if (code1 != ERROR_MARK && code2 != ERROR_MARK)
327 tree t = maybe_fold_or_comparisons (code1, op1a, op1b,
328 code2, op2a, op2b);
329 if (t)
330 return t;
333 return fold_build2_loc (loc, TRUTH_OR_EXPR, boolean_type_node, c1, c2);
336 /* Returns true if N is either a constant or a SSA_NAME. */
338 static bool
339 constant_or_ssa_name (tree n)
341 switch (TREE_CODE (n))
343 case SSA_NAME:
344 case INTEGER_CST:
345 case REAL_CST:
346 case COMPLEX_CST:
347 case VECTOR_CST:
348 return true;
349 default:
350 return false;
354 /* Returns either a COND_EXPR or the folded expression if the folded
355 expression is a MIN_EXPR, a MAX_EXPR, an ABS_EXPR,
356 a constant or a SSA_NAME. */
358 static tree
359 fold_build_cond_expr (tree type, tree cond, tree rhs, tree lhs)
361 tree rhs1, lhs1, cond_expr;
363 /* If COND is comparison r != 0 and r has boolean type, convert COND
364 to SSA_NAME to accept by vect bool pattern. */
365 if (TREE_CODE (cond) == NE_EXPR)
367 tree op0 = TREE_OPERAND (cond, 0);
368 tree op1 = TREE_OPERAND (cond, 1);
369 if (TREE_CODE (op0) == SSA_NAME
370 && TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE
371 && (integer_zerop (op1)))
372 cond = op0;
374 cond_expr = fold_ternary (COND_EXPR, type, cond,
375 rhs, lhs);
377 if (cond_expr == NULL_TREE)
378 return build3 (COND_EXPR, type, cond, rhs, lhs);
380 STRIP_USELESS_TYPE_CONVERSION (cond_expr);
382 if (constant_or_ssa_name (cond_expr))
383 return cond_expr;
385 if (TREE_CODE (cond_expr) == ABS_EXPR)
387 rhs1 = TREE_OPERAND (cond_expr, 1);
388 STRIP_USELESS_TYPE_CONVERSION (rhs1);
389 if (constant_or_ssa_name (rhs1))
390 return build1 (ABS_EXPR, type, rhs1);
393 if (TREE_CODE (cond_expr) == MIN_EXPR
394 || TREE_CODE (cond_expr) == MAX_EXPR)
396 lhs1 = TREE_OPERAND (cond_expr, 0);
397 STRIP_USELESS_TYPE_CONVERSION (lhs1);
398 rhs1 = TREE_OPERAND (cond_expr, 1);
399 STRIP_USELESS_TYPE_CONVERSION (rhs1);
400 if (constant_or_ssa_name (rhs1)
401 && constant_or_ssa_name (lhs1))
402 return build2 (TREE_CODE (cond_expr), type, lhs1, rhs1);
404 return build3 (COND_EXPR, type, cond, rhs, lhs);
407 /* Add condition NC to the predicate list of basic block BB. LOOP is
408 the loop to be if-converted. Use predicate of cd-equivalent block
409 for join bb if it exists: we call basic blocks bb1 and bb2
410 cd-equivalent if they are executed under the same condition. */
412 static inline void
413 add_to_predicate_list (struct loop *loop, basic_block bb, tree nc)
415 tree bc, *tp;
416 basic_block dom_bb;
418 if (is_true_predicate (nc))
419 return;
421 /* If dominance tells us this basic block is always executed,
422 don't record any predicates for it. */
423 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
424 return;
426 dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb);
427 /* We use notion of cd equivalence to get simpler predicate for
428 join block, e.g. if join block has 2 predecessors with predicates
429 p1 & p2 and p1 & !p2, we'd like to get p1 for it instead of
430 p1 & p2 | p1 & !p2. */
431 if (dom_bb != loop->header
432 && get_immediate_dominator (CDI_POST_DOMINATORS, dom_bb) == bb)
434 gcc_assert (flow_bb_inside_loop_p (loop, dom_bb));
435 bc = bb_predicate (dom_bb);
436 if (!is_true_predicate (bc))
437 set_bb_predicate (bb, bc);
438 else
439 gcc_assert (is_true_predicate (bb_predicate (bb)));
440 if (dump_file && (dump_flags & TDF_DETAILS))
441 fprintf (dump_file, "Use predicate of bb#%d for bb#%d\n",
442 dom_bb->index, bb->index);
443 return;
446 if (!is_predicated (bb))
447 bc = nc;
448 else
450 bc = bb_predicate (bb);
451 bc = fold_or_predicates (EXPR_LOCATION (bc), nc, bc);
452 if (is_true_predicate (bc))
454 reset_bb_predicate (bb);
455 return;
459 /* Allow a TRUTH_NOT_EXPR around the main predicate. */
460 if (TREE_CODE (bc) == TRUTH_NOT_EXPR)
461 tp = &TREE_OPERAND (bc, 0);
462 else
463 tp = &bc;
464 if (!is_gimple_condexpr (*tp))
466 gimple_seq stmts;
467 *tp = force_gimple_operand_1 (*tp, &stmts, is_gimple_condexpr, NULL_TREE);
468 add_bb_predicate_gimplified_stmts (bb, stmts);
470 set_bb_predicate (bb, bc);
473 /* Add the condition COND to the previous condition PREV_COND, and add
474 this to the predicate list of the destination of edge E. LOOP is
475 the loop to be if-converted. */
477 static void
478 add_to_dst_predicate_list (struct loop *loop, edge e,
479 tree prev_cond, tree cond)
481 if (!flow_bb_inside_loop_p (loop, e->dest))
482 return;
484 if (!is_true_predicate (prev_cond))
485 cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
486 prev_cond, cond);
488 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, e->dest))
489 add_to_predicate_list (loop, e->dest, cond);
492 /* Return true if one of the successor edges of BB exits LOOP. */
494 static bool
495 bb_with_exit_edge_p (struct loop *loop, basic_block bb)
497 edge e;
498 edge_iterator ei;
500 FOR_EACH_EDGE (e, ei, bb->succs)
501 if (loop_exit_edge_p (loop, e))
502 return true;
504 return false;
507 /* Return true when PHI is if-convertible. PHI is part of loop LOOP
508 and it belongs to basic block BB.
510 PHI is not if-convertible if:
511 - it has more than 2 arguments.
513 When the flag_tree_loop_if_convert_stores is not set, PHI is not
514 if-convertible if:
515 - a virtual PHI is immediately used in another PHI node,
516 - there is a virtual PHI in a BB other than the loop->header.
517 When the aggressive_if_conv is set, PHI can have more than
518 two arguments. */
520 static bool
521 if_convertible_phi_p (struct loop *loop, basic_block bb, gphi *phi,
522 bool any_mask_load_store)
524 if (dump_file && (dump_flags & TDF_DETAILS))
526 fprintf (dump_file, "-------------------------\n");
527 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
530 if (bb != loop->header)
532 if (gimple_phi_num_args (phi) != 2
533 && !aggressive_if_conv)
535 if (dump_file && (dump_flags & TDF_DETAILS))
536 fprintf (dump_file, "More than two phi node args.\n");
537 return false;
541 if (flag_tree_loop_if_convert_stores || any_mask_load_store)
542 return true;
544 /* When the flag_tree_loop_if_convert_stores is not set, check
545 that there are no memory writes in the branches of the loop to be
546 if-converted. */
547 if (virtual_operand_p (gimple_phi_result (phi)))
549 imm_use_iterator imm_iter;
550 use_operand_p use_p;
552 if (bb != loop->header)
554 if (dump_file && (dump_flags & TDF_DETAILS))
555 fprintf (dump_file, "Virtual phi not on loop->header.\n");
556 return false;
559 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_phi_result (phi))
561 if (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI
562 && USE_STMT (use_p) != phi)
564 if (dump_file && (dump_flags & TDF_DETAILS))
565 fprintf (dump_file, "Difficult to handle this virtual phi.\n");
566 return false;
571 return true;
574 /* Records the status of a data reference. This struct is attached to
575 each DR->aux field. */
577 struct ifc_dr {
578 /* -1 when not initialized, 0 when false, 1 when true. */
579 int written_at_least_once;
581 /* -1 when not initialized, 0 when false, 1 when true. */
582 int rw_unconditionally;
585 #define IFC_DR(DR) ((struct ifc_dr *) (DR)->aux)
586 #define DR_WRITTEN_AT_LEAST_ONCE(DR) (IFC_DR (DR)->written_at_least_once)
587 #define DR_RW_UNCONDITIONALLY(DR) (IFC_DR (DR)->rw_unconditionally)
589 /* Returns true when the memory references of STMT are read or written
590 unconditionally. In other words, this function returns true when
591 for every data reference A in STMT there exist other accesses to
592 a data reference with the same base with predicates that add up (OR-up) to
593 the true predicate: this ensures that the data reference A is touched
594 (read or written) on every iteration of the if-converted loop. */
596 static bool
597 memrefs_read_or_written_unconditionally (gimple *stmt,
598 vec<data_reference_p> drs)
600 int i, j;
601 data_reference_p a, b;
602 tree ca = bb_predicate (gimple_bb (stmt));
604 for (i = 0; drs.iterate (i, &a); i++)
605 if (DR_STMT (a) == stmt)
607 bool found = false;
608 int x = DR_RW_UNCONDITIONALLY (a);
610 if (x == 0)
611 return false;
613 if (x == 1)
614 continue;
616 for (j = 0; drs.iterate (j, &b); j++)
618 tree ref_base_a = DR_REF (a);
619 tree ref_base_b = DR_REF (b);
621 if (DR_STMT (b) == stmt)
622 continue;
624 while (TREE_CODE (ref_base_a) == COMPONENT_REF
625 || TREE_CODE (ref_base_a) == IMAGPART_EXPR
626 || TREE_CODE (ref_base_a) == REALPART_EXPR)
627 ref_base_a = TREE_OPERAND (ref_base_a, 0);
629 while (TREE_CODE (ref_base_b) == COMPONENT_REF
630 || TREE_CODE (ref_base_b) == IMAGPART_EXPR
631 || TREE_CODE (ref_base_b) == REALPART_EXPR)
632 ref_base_b = TREE_OPERAND (ref_base_b, 0);
634 if (operand_equal_p (ref_base_a, ref_base_b, 0))
636 tree cb = bb_predicate (gimple_bb (DR_STMT (b)));
638 if (DR_RW_UNCONDITIONALLY (b) == 1
639 || is_true_predicate (cb)
640 || is_true_predicate (ca
641 = fold_or_predicates (EXPR_LOCATION (cb), ca, cb)))
643 DR_RW_UNCONDITIONALLY (a) = 1;
644 DR_RW_UNCONDITIONALLY (b) = 1;
645 found = true;
646 break;
651 if (!found)
653 DR_RW_UNCONDITIONALLY (a) = 0;
654 return false;
658 return true;
661 /* Returns true when the memory references of STMT are unconditionally
662 written. In other words, this function returns true when for every
663 data reference A written in STMT, there exist other writes to the
664 same data reference with predicates that add up (OR-up) to the true
665 predicate: this ensures that the data reference A is written on
666 every iteration of the if-converted loop. */
668 static bool
669 write_memrefs_written_at_least_once (gimple *stmt,
670 vec<data_reference_p> drs)
672 int i, j;
673 data_reference_p a, b;
674 tree ca = bb_predicate (gimple_bb (stmt));
676 for (i = 0; drs.iterate (i, &a); i++)
677 if (DR_STMT (a) == stmt
678 && DR_IS_WRITE (a))
680 bool found = false;
681 int x = DR_WRITTEN_AT_LEAST_ONCE (a);
683 if (x == 0)
684 return false;
686 if (x == 1)
687 continue;
689 for (j = 0; drs.iterate (j, &b); j++)
690 if (DR_STMT (b) != stmt
691 && DR_IS_WRITE (b)
692 && same_data_refs_base_objects (a, b))
694 tree cb = bb_predicate (gimple_bb (DR_STMT (b)));
696 if (DR_WRITTEN_AT_LEAST_ONCE (b) == 1
697 || is_true_predicate (cb)
698 || is_true_predicate (ca = fold_or_predicates (EXPR_LOCATION (cb),
699 ca, cb)))
701 DR_WRITTEN_AT_LEAST_ONCE (a) = 1;
702 DR_WRITTEN_AT_LEAST_ONCE (b) = 1;
703 found = true;
704 break;
708 if (!found)
710 DR_WRITTEN_AT_LEAST_ONCE (a) = 0;
711 return false;
715 return true;
718 /* Return true when the memory references of STMT won't trap in the
719 if-converted code. There are two things that we have to check for:
721 - writes to memory occur to writable memory: if-conversion of
722 memory writes transforms the conditional memory writes into
723 unconditional writes, i.e. "if (cond) A[i] = foo" is transformed
724 into "A[i] = cond ? foo : A[i]", and as the write to memory may not
725 be executed at all in the original code, it may be a readonly
726 memory. To check that A is not const-qualified, we check that
727 there exists at least an unconditional write to A in the current
728 function.
730 - reads or writes to memory are valid memory accesses for every
731 iteration. To check that the memory accesses are correctly formed
732 and that we are allowed to read and write in these locations, we
733 check that the memory accesses to be if-converted occur at every
734 iteration unconditionally. */
736 static bool
737 ifcvt_memrefs_wont_trap (gimple *stmt, vec<data_reference_p> refs)
739 return write_memrefs_written_at_least_once (stmt, refs)
740 && memrefs_read_or_written_unconditionally (stmt, refs);
743 /* Wrapper around gimple_could_trap_p refined for the needs of the
744 if-conversion. Try to prove that the memory accesses of STMT could
745 not trap in the innermost loop containing STMT. */
747 static bool
748 ifcvt_could_trap_p (gimple *stmt, vec<data_reference_p> refs)
750 if (gimple_vuse (stmt)
751 && !gimple_could_trap_p_1 (stmt, false, false)
752 && ifcvt_memrefs_wont_trap (stmt, refs))
753 return false;
755 return gimple_could_trap_p (stmt);
758 /* Return true if STMT could be converted into a masked load or store
759 (conditional load or store based on a mask computed from bb predicate). */
761 static bool
762 ifcvt_can_use_mask_load_store (gimple *stmt)
764 tree lhs, ref;
765 machine_mode mode;
766 basic_block bb = gimple_bb (stmt);
767 bool is_load;
769 if (!(flag_tree_loop_vectorize || bb->loop_father->force_vectorize)
770 || bb->loop_father->dont_vectorize
771 || !gimple_assign_single_p (stmt)
772 || gimple_has_volatile_ops (stmt))
773 return false;
775 /* Check whether this is a load or store. */
776 lhs = gimple_assign_lhs (stmt);
777 if (gimple_store_p (stmt))
779 if (!is_gimple_val (gimple_assign_rhs1 (stmt)))
780 return false;
781 is_load = false;
782 ref = lhs;
784 else if (gimple_assign_load_p (stmt))
786 is_load = true;
787 ref = gimple_assign_rhs1 (stmt);
789 else
790 return false;
792 if (may_be_nonaddressable_p (ref))
793 return false;
795 /* Mask should be integer mode of the same size as the load/store
796 mode. */
797 mode = TYPE_MODE (TREE_TYPE (lhs));
798 if (int_mode_for_mode (mode) == BLKmode
799 || VECTOR_MODE_P (mode))
800 return false;
802 if (can_vec_mask_load_store_p (mode, is_load))
803 return true;
805 return false;
808 /* Return true when STMT is if-convertible.
810 GIMPLE_ASSIGN statement is not if-convertible if,
811 - it is not movable,
812 - it could trap,
813 - LHS is not var decl. */
815 static bool
816 if_convertible_gimple_assign_stmt_p (gimple *stmt,
817 vec<data_reference_p> refs,
818 bool *any_mask_load_store)
820 tree lhs = gimple_assign_lhs (stmt);
821 basic_block bb;
823 if (dump_file && (dump_flags & TDF_DETAILS))
825 fprintf (dump_file, "-------------------------\n");
826 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
829 if (!is_gimple_reg_type (TREE_TYPE (lhs)))
830 return false;
832 /* Some of these constrains might be too conservative. */
833 if (stmt_ends_bb_p (stmt)
834 || gimple_has_volatile_ops (stmt)
835 || (TREE_CODE (lhs) == SSA_NAME
836 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
837 || gimple_has_side_effects (stmt))
839 if (dump_file && (dump_flags & TDF_DETAILS))
840 fprintf (dump_file, "stmt not suitable for ifcvt\n");
841 return false;
844 /* tree-into-ssa.c uses GF_PLF_1, so avoid it, because
845 in between if_convertible_loop_p and combine_blocks
846 we can perform loop versioning. */
847 gimple_set_plf (stmt, GF_PLF_2, false);
849 if (flag_tree_loop_if_convert_stores)
851 if (ifcvt_could_trap_p (stmt, refs))
853 if (ifcvt_can_use_mask_load_store (stmt))
855 gimple_set_plf (stmt, GF_PLF_2, true);
856 *any_mask_load_store = true;
857 return true;
859 if (dump_file && (dump_flags & TDF_DETAILS))
860 fprintf (dump_file, "tree could trap...\n");
861 return false;
863 return true;
866 if (ifcvt_could_trap_p (stmt, refs))
868 if (ifcvt_can_use_mask_load_store (stmt))
870 gimple_set_plf (stmt, GF_PLF_2, true);
871 *any_mask_load_store = true;
872 return true;
874 if (dump_file && (dump_flags & TDF_DETAILS))
875 fprintf (dump_file, "tree could trap...\n");
876 return false;
879 bb = gimple_bb (stmt);
881 if (TREE_CODE (lhs) != SSA_NAME
882 && bb != bb->loop_father->header
883 && !bb_with_exit_edge_p (bb->loop_father, bb))
885 if (ifcvt_can_use_mask_load_store (stmt))
887 gimple_set_plf (stmt, GF_PLF_2, true);
888 *any_mask_load_store = true;
889 return true;
891 if (dump_file && (dump_flags & TDF_DETAILS))
893 fprintf (dump_file, "LHS is not var\n");
894 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
896 return false;
899 return true;
902 /* Return true when STMT is if-convertible.
904 A statement is if-convertible if:
905 - it is an if-convertible GIMPLE_ASSIGN,
906 - it is a GIMPLE_LABEL or a GIMPLE_COND,
907 - it is builtins call. */
909 static bool
910 if_convertible_stmt_p (gimple *stmt, vec<data_reference_p> refs,
911 bool *any_mask_load_store)
913 switch (gimple_code (stmt))
915 case GIMPLE_LABEL:
916 case GIMPLE_DEBUG:
917 case GIMPLE_COND:
918 return true;
920 case GIMPLE_ASSIGN:
921 return if_convertible_gimple_assign_stmt_p (stmt, refs,
922 any_mask_load_store);
924 case GIMPLE_CALL:
926 tree fndecl = gimple_call_fndecl (stmt);
927 if (fndecl)
929 int flags = gimple_call_flags (stmt);
930 if ((flags & ECF_CONST)
931 && !(flags & ECF_LOOPING_CONST_OR_PURE)
932 /* We can only vectorize some builtins at the moment,
933 so restrict if-conversion to those. */
934 && DECL_BUILT_IN (fndecl))
935 return true;
937 return false;
940 default:
941 /* Don't know what to do with 'em so don't do anything. */
942 if (dump_file && (dump_flags & TDF_DETAILS))
944 fprintf (dump_file, "don't know what to do\n");
945 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
947 return false;
948 break;
951 return true;
954 /* Assumes that BB has more than 1 predecessors.
955 Returns false if at least one successor is not on critical edge
956 and true otherwise. */
958 static inline bool
959 all_preds_critical_p (basic_block bb)
961 edge e;
962 edge_iterator ei;
964 FOR_EACH_EDGE (e, ei, bb->preds)
965 if (EDGE_COUNT (e->src->succs) == 1)
966 return false;
967 return true;
970 /* Returns true if at least one successor in on critical edge. */
971 static inline bool
972 has_pred_critical_p (basic_block bb)
974 edge e;
975 edge_iterator ei;
977 FOR_EACH_EDGE (e, ei, bb->preds)
978 if (EDGE_COUNT (e->src->succs) > 1)
979 return true;
980 return false;
983 /* Return true when BB is if-convertible. This routine does not check
984 basic block's statements and phis.
986 A basic block is not if-convertible if:
987 - it is non-empty and it is after the exit block (in BFS order),
988 - it is after the exit block but before the latch,
989 - its edges are not normal.
991 Last restriction is valid if aggressive_if_conv is false.
993 EXIT_BB is the basic block containing the exit of the LOOP. BB is
994 inside LOOP. */
996 static bool
997 if_convertible_bb_p (struct loop *loop, basic_block bb, basic_block exit_bb)
999 edge e;
1000 edge_iterator ei;
1002 if (dump_file && (dump_flags & TDF_DETAILS))
1003 fprintf (dump_file, "----------[%d]-------------\n", bb->index);
1005 if (EDGE_COUNT (bb->succs) > 2)
1006 return false;
1008 if (EDGE_COUNT (bb->preds) > 2
1009 && !aggressive_if_conv)
1010 return false;
1012 if (exit_bb)
1014 if (bb != loop->latch)
1016 if (dump_file && (dump_flags & TDF_DETAILS))
1017 fprintf (dump_file, "basic block after exit bb but before latch\n");
1018 return false;
1020 else if (!empty_block_p (bb))
1022 if (dump_file && (dump_flags & TDF_DETAILS))
1023 fprintf (dump_file, "non empty basic block after exit bb\n");
1024 return false;
1026 else if (bb == loop->latch
1027 && bb != exit_bb
1028 && !dominated_by_p (CDI_DOMINATORS, bb, exit_bb))
1030 if (dump_file && (dump_flags & TDF_DETAILS))
1031 fprintf (dump_file, "latch is not dominated by exit_block\n");
1032 return false;
1036 /* Be less adventurous and handle only normal edges. */
1037 FOR_EACH_EDGE (e, ei, bb->succs)
1038 if (e->flags & (EDGE_EH | EDGE_ABNORMAL | EDGE_IRREDUCIBLE_LOOP))
1040 if (dump_file && (dump_flags & TDF_DETAILS))
1041 fprintf (dump_file, "Difficult to handle edges\n");
1042 return false;
1045 /* At least one incoming edge has to be non-critical as otherwise edge
1046 predicates are not equal to basic-block predicates of the edge
1047 source. This check is skipped if aggressive_if_conv is true. */
1048 if (!aggressive_if_conv
1049 && EDGE_COUNT (bb->preds) > 1
1050 && bb != loop->header
1051 && all_preds_critical_p (bb))
1053 if (dump_file && (dump_flags & TDF_DETAILS))
1054 fprintf (dump_file, "only critical predecessors\n");
1055 return false;
1058 return true;
1061 /* Return true when all predecessor blocks of BB are visited. The
1062 VISITED bitmap keeps track of the visited blocks. */
1064 static bool
1065 pred_blocks_visited_p (basic_block bb, bitmap *visited)
1067 edge e;
1068 edge_iterator ei;
1069 FOR_EACH_EDGE (e, ei, bb->preds)
1070 if (!bitmap_bit_p (*visited, e->src->index))
1071 return false;
1073 return true;
1076 /* Get body of a LOOP in suitable order for if-conversion. It is
1077 caller's responsibility to deallocate basic block list.
1078 If-conversion suitable order is, breadth first sort (BFS) order
1079 with an additional constraint: select a block only if all its
1080 predecessors are already selected. */
1082 static basic_block *
1083 get_loop_body_in_if_conv_order (const struct loop *loop)
1085 basic_block *blocks, *blocks_in_bfs_order;
1086 basic_block bb;
1087 bitmap visited;
1088 unsigned int index = 0;
1089 unsigned int visited_count = 0;
1091 gcc_assert (loop->num_nodes);
1092 gcc_assert (loop->latch != EXIT_BLOCK_PTR_FOR_FN (cfun));
1094 blocks = XCNEWVEC (basic_block, loop->num_nodes);
1095 visited = BITMAP_ALLOC (NULL);
1097 blocks_in_bfs_order = get_loop_body_in_bfs_order (loop);
1099 index = 0;
1100 while (index < loop->num_nodes)
1102 bb = blocks_in_bfs_order [index];
1104 if (bb->flags & BB_IRREDUCIBLE_LOOP)
1106 free (blocks_in_bfs_order);
1107 BITMAP_FREE (visited);
1108 free (blocks);
1109 return NULL;
1112 if (!bitmap_bit_p (visited, bb->index))
1114 if (pred_blocks_visited_p (bb, &visited)
1115 || bb == loop->header)
1117 /* This block is now visited. */
1118 bitmap_set_bit (visited, bb->index);
1119 blocks[visited_count++] = bb;
1123 index++;
1125 if (index == loop->num_nodes
1126 && visited_count != loop->num_nodes)
1127 /* Not done yet. */
1128 index = 0;
1130 free (blocks_in_bfs_order);
1131 BITMAP_FREE (visited);
1132 return blocks;
1135 /* Returns true when the analysis of the predicates for all the basic
1136 blocks in LOOP succeeded.
1138 predicate_bbs first allocates the predicates of the basic blocks.
1139 These fields are then initialized with the tree expressions
1140 representing the predicates under which a basic block is executed
1141 in the LOOP. As the loop->header is executed at each iteration, it
1142 has the "true" predicate. Other statements executed under a
1143 condition are predicated with that condition, for example
1145 | if (x)
1146 | S1;
1147 | else
1148 | S2;
1150 S1 will be predicated with "x", and
1151 S2 will be predicated with "!x". */
1153 static void
1154 predicate_bbs (loop_p loop)
1156 unsigned int i;
1158 for (i = 0; i < loop->num_nodes; i++)
1159 init_bb_predicate (ifc_bbs[i]);
1161 for (i = 0; i < loop->num_nodes; i++)
1163 basic_block bb = ifc_bbs[i];
1164 tree cond;
1165 gimple *stmt;
1167 /* The loop latch and loop exit block are always executed and
1168 have no extra conditions to be processed: skip them. */
1169 if (bb == loop->latch
1170 || bb_with_exit_edge_p (loop, bb))
1172 reset_bb_predicate (bb);
1173 continue;
1176 cond = bb_predicate (bb);
1177 stmt = last_stmt (bb);
1178 if (stmt && gimple_code (stmt) == GIMPLE_COND)
1180 tree c2;
1181 edge true_edge, false_edge;
1182 location_t loc = gimple_location (stmt);
1183 tree c = build2_loc (loc, gimple_cond_code (stmt),
1184 boolean_type_node,
1185 gimple_cond_lhs (stmt),
1186 gimple_cond_rhs (stmt));
1188 /* Add new condition into destination's predicate list. */
1189 extract_true_false_edges_from_block (gimple_bb (stmt),
1190 &true_edge, &false_edge);
1192 /* If C is true, then TRUE_EDGE is taken. */
1193 add_to_dst_predicate_list (loop, true_edge, unshare_expr (cond),
1194 unshare_expr (c));
1196 /* If C is false, then FALSE_EDGE is taken. */
1197 c2 = build1_loc (loc, TRUTH_NOT_EXPR, boolean_type_node,
1198 unshare_expr (c));
1199 add_to_dst_predicate_list (loop, false_edge,
1200 unshare_expr (cond), c2);
1202 cond = NULL_TREE;
1205 /* If current bb has only one successor, then consider it as an
1206 unconditional goto. */
1207 if (single_succ_p (bb))
1209 basic_block bb_n = single_succ (bb);
1211 /* The successor bb inherits the predicate of its
1212 predecessor. If there is no predicate in the predecessor
1213 bb, then consider the successor bb as always executed. */
1214 if (cond == NULL_TREE)
1215 cond = boolean_true_node;
1217 add_to_predicate_list (loop, bb_n, cond);
1221 /* The loop header is always executed. */
1222 reset_bb_predicate (loop->header);
1223 gcc_assert (bb_predicate_gimplified_stmts (loop->header) == NULL
1224 && bb_predicate_gimplified_stmts (loop->latch) == NULL);
1227 /* Return true when LOOP is if-convertible. This is a helper function
1228 for if_convertible_loop_p. REFS and DDRS are initialized and freed
1229 in if_convertible_loop_p. */
1231 static bool
1232 if_convertible_loop_p_1 (struct loop *loop,
1233 vec<loop_p> *loop_nest,
1234 vec<data_reference_p> *refs,
1235 vec<ddr_p> *ddrs, bool *any_mask_load_store)
1237 bool res;
1238 unsigned int i;
1239 basic_block exit_bb = NULL;
1241 /* Don't if-convert the loop when the data dependences cannot be
1242 computed: the loop won't be vectorized in that case. */
1243 res = compute_data_dependences_for_loop (loop, true, loop_nest, refs, ddrs);
1244 if (!res)
1245 return false;
1247 calculate_dominance_info (CDI_DOMINATORS);
1248 calculate_dominance_info (CDI_POST_DOMINATORS);
1250 /* Allow statements that can be handled during if-conversion. */
1251 ifc_bbs = get_loop_body_in_if_conv_order (loop);
1252 if (!ifc_bbs)
1254 if (dump_file && (dump_flags & TDF_DETAILS))
1255 fprintf (dump_file, "Irreducible loop\n");
1256 return false;
1259 for (i = 0; i < loop->num_nodes; i++)
1261 basic_block bb = ifc_bbs[i];
1263 if (!if_convertible_bb_p (loop, bb, exit_bb))
1264 return false;
1266 if (bb_with_exit_edge_p (loop, bb))
1267 exit_bb = bb;
1270 for (i = 0; i < loop->num_nodes; i++)
1272 basic_block bb = ifc_bbs[i];
1273 gimple_stmt_iterator gsi;
1275 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1276 switch (gimple_code (gsi_stmt (gsi)))
1278 case GIMPLE_LABEL:
1279 case GIMPLE_ASSIGN:
1280 case GIMPLE_CALL:
1281 case GIMPLE_DEBUG:
1282 case GIMPLE_COND:
1283 break;
1284 default:
1285 return false;
1289 data_reference_p dr;
1291 for (i = 0; refs->iterate (i, &dr); i++)
1293 dr->aux = XNEW (struct ifc_dr);
1294 DR_WRITTEN_AT_LEAST_ONCE (dr) = -1;
1295 DR_RW_UNCONDITIONALLY (dr) = -1;
1297 predicate_bbs (loop);
1299 for (i = 0; i < loop->num_nodes; i++)
1301 basic_block bb = ifc_bbs[i];
1302 gimple_stmt_iterator itr;
1304 /* Check the if-convertibility of statements in predicated BBs. */
1305 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1306 for (itr = gsi_start_bb (bb); !gsi_end_p (itr); gsi_next (&itr))
1307 if (!if_convertible_stmt_p (gsi_stmt (itr), *refs,
1308 any_mask_load_store))
1309 return false;
1312 for (i = 0; i < loop->num_nodes; i++)
1313 free_bb_predicate (ifc_bbs[i]);
1315 /* Checking PHIs needs to be done after stmts, as the fact whether there
1316 are any masked loads or stores affects the tests. */
1317 for (i = 0; i < loop->num_nodes; i++)
1319 basic_block bb = ifc_bbs[i];
1320 gphi_iterator itr;
1322 for (itr = gsi_start_phis (bb); !gsi_end_p (itr); gsi_next (&itr))
1323 if (!if_convertible_phi_p (loop, bb, itr.phi (),
1324 *any_mask_load_store))
1325 return false;
1328 if (dump_file)
1329 fprintf (dump_file, "Applying if-conversion\n");
1331 return true;
1334 /* Return true when LOOP is if-convertible.
1335 LOOP is if-convertible if:
1336 - it is innermost,
1337 - it has two or more basic blocks,
1338 - it has only one exit,
1339 - loop header is not the exit edge,
1340 - if its basic blocks and phi nodes are if convertible. */
1342 static bool
1343 if_convertible_loop_p (struct loop *loop, bool *any_mask_load_store)
1345 edge e;
1346 edge_iterator ei;
1347 bool res = false;
1348 vec<data_reference_p> refs;
1349 vec<ddr_p> ddrs;
1351 /* Handle only innermost loop. */
1352 if (!loop || loop->inner)
1354 if (dump_file && (dump_flags & TDF_DETAILS))
1355 fprintf (dump_file, "not innermost loop\n");
1356 return false;
1359 /* If only one block, no need for if-conversion. */
1360 if (loop->num_nodes <= 2)
1362 if (dump_file && (dump_flags & TDF_DETAILS))
1363 fprintf (dump_file, "less than 2 basic blocks\n");
1364 return false;
1367 /* More than one loop exit is too much to handle. */
1368 if (!single_exit (loop))
1370 if (dump_file && (dump_flags & TDF_DETAILS))
1371 fprintf (dump_file, "multiple exits\n");
1372 return false;
1375 /* If one of the loop header's edge is an exit edge then do not
1376 apply if-conversion. */
1377 FOR_EACH_EDGE (e, ei, loop->header->succs)
1378 if (loop_exit_edge_p (loop, e))
1379 return false;
1381 refs.create (5);
1382 ddrs.create (25);
1383 auto_vec<loop_p, 3> loop_nest;
1384 res = if_convertible_loop_p_1 (loop, &loop_nest, &refs, &ddrs,
1385 any_mask_load_store);
1387 data_reference_p dr;
1388 unsigned int i;
1389 for (i = 0; refs.iterate (i, &dr); i++)
1390 free (dr->aux);
1392 free_data_refs (refs);
1393 free_dependence_relations (ddrs);
1394 return res;
1397 /* Returns true if def-stmt for phi argument ARG is simple increment/decrement
1398 which is in predicated basic block.
1399 In fact, the following PHI pattern is searching:
1400 loop-header:
1401 reduc_1 = PHI <..., reduc_2>
1403 if (...)
1404 reduc_3 = ...
1405 reduc_2 = PHI <reduc_1, reduc_3>
1407 ARG_0 and ARG_1 are correspondent PHI arguments.
1408 REDUC, OP0 and OP1 contain reduction stmt and its operands.
1409 EXTENDED is true if PHI has > 2 arguments. */
1411 static bool
1412 is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1,
1413 tree *op0, tree *op1, bool extended)
1415 tree lhs, r_op1, r_op2;
1416 gimple *stmt;
1417 gimple *header_phi = NULL;
1418 enum tree_code reduction_op;
1419 basic_block bb = gimple_bb (phi);
1420 struct loop *loop = bb->loop_father;
1421 edge latch_e = loop_latch_edge (loop);
1422 imm_use_iterator imm_iter;
1423 use_operand_p use_p;
1424 edge e;
1425 edge_iterator ei;
1426 bool result = false;
1427 if (TREE_CODE (arg_0) != SSA_NAME || TREE_CODE (arg_1) != SSA_NAME)
1428 return false;
1430 if (!extended && gimple_code (SSA_NAME_DEF_STMT (arg_0)) == GIMPLE_PHI)
1432 lhs = arg_1;
1433 header_phi = SSA_NAME_DEF_STMT (arg_0);
1434 stmt = SSA_NAME_DEF_STMT (arg_1);
1436 else if (gimple_code (SSA_NAME_DEF_STMT (arg_1)) == GIMPLE_PHI)
1438 lhs = arg_0;
1439 header_phi = SSA_NAME_DEF_STMT (arg_1);
1440 stmt = SSA_NAME_DEF_STMT (arg_0);
1442 else
1443 return false;
1444 if (gimple_bb (header_phi) != loop->header)
1445 return false;
1447 if (PHI_ARG_DEF_FROM_EDGE (header_phi, latch_e) != PHI_RESULT (phi))
1448 return false;
1450 if (gimple_code (stmt) != GIMPLE_ASSIGN
1451 || gimple_has_volatile_ops (stmt))
1452 return false;
1454 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
1455 return false;
1457 if (!is_predicated (gimple_bb (stmt)))
1458 return false;
1460 /* Check that stmt-block is predecessor of phi-block. */
1461 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->succs)
1462 if (e->dest == bb)
1464 result = true;
1465 break;
1467 if (!result)
1468 return false;
1470 if (!has_single_use (lhs))
1471 return false;
1473 reduction_op = gimple_assign_rhs_code (stmt);
1474 if (reduction_op != PLUS_EXPR && reduction_op != MINUS_EXPR)
1475 return false;
1476 r_op1 = gimple_assign_rhs1 (stmt);
1477 r_op2 = gimple_assign_rhs2 (stmt);
1479 /* Make R_OP1 to hold reduction variable. */
1480 if (r_op2 == PHI_RESULT (header_phi)
1481 && reduction_op == PLUS_EXPR)
1482 std::swap (r_op1, r_op2);
1483 else if (r_op1 != PHI_RESULT (header_phi))
1484 return false;
1486 /* Check that R_OP1 is used in reduction stmt or in PHI only. */
1487 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_op1)
1489 gimple *use_stmt = USE_STMT (use_p);
1490 if (is_gimple_debug (use_stmt))
1491 continue;
1492 if (use_stmt == stmt)
1493 continue;
1494 if (gimple_code (use_stmt) != GIMPLE_PHI)
1495 return false;
1498 *op0 = r_op1; *op1 = r_op2;
1499 *reduc = stmt;
1500 return true;
1503 /* Converts conditional scalar reduction into unconditional form, e.g.
1504 bb_4
1505 if (_5 != 0) goto bb_5 else goto bb_6
1506 end_bb_4
1507 bb_5
1508 res_6 = res_13 + 1;
1509 end_bb_5
1510 bb_6
1511 # res_2 = PHI <res_13(4), res_6(5)>
1512 end_bb_6
1514 will be converted into sequence
1515 _ifc__1 = _5 != 0 ? 1 : 0;
1516 res_2 = res_13 + _ifc__1;
1517 Argument SWAP tells that arguments of conditional expression should be
1518 swapped.
1519 Returns rhs of resulting PHI assignment. */
1521 static tree
1522 convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi,
1523 tree cond, tree op0, tree op1, bool swap)
1525 gimple_stmt_iterator stmt_it;
1526 gimple *new_assign;
1527 tree rhs;
1528 tree rhs1 = gimple_assign_rhs1 (reduc);
1529 tree tmp = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, "_ifc_");
1530 tree c;
1531 tree zero = build_zero_cst (TREE_TYPE (rhs1));
1533 if (dump_file && (dump_flags & TDF_DETAILS))
1535 fprintf (dump_file, "Found cond scalar reduction.\n");
1536 print_gimple_stmt (dump_file, reduc, 0, TDF_SLIM);
1539 /* Build cond expression using COND and constant operand
1540 of reduction rhs. */
1541 c = fold_build_cond_expr (TREE_TYPE (rhs1),
1542 unshare_expr (cond),
1543 swap ? zero : op1,
1544 swap ? op1 : zero);
1546 /* Create assignment stmt and insert it at GSI. */
1547 new_assign = gimple_build_assign (tmp, c);
1548 gsi_insert_before (gsi, new_assign, GSI_SAME_STMT);
1549 /* Build rhs for unconditional increment/decrement. */
1550 rhs = fold_build2 (gimple_assign_rhs_code (reduc),
1551 TREE_TYPE (rhs1), op0, tmp);
1553 /* Delete original reduction stmt. */
1554 stmt_it = gsi_for_stmt (reduc);
1555 gsi_remove (&stmt_it, true);
1556 release_defs (reduc);
1557 return rhs;
1560 /* Produce condition for all occurrences of ARG in PHI node. */
1562 static tree
1563 gen_phi_arg_condition (gphi *phi, vec<int> *occur,
1564 gimple_stmt_iterator *gsi)
1566 int len;
1567 int i;
1568 tree cond = NULL_TREE;
1569 tree c;
1570 edge e;
1572 len = occur->length ();
1573 gcc_assert (len > 0);
1574 for (i = 0; i < len; i++)
1576 e = gimple_phi_arg_edge (phi, (*occur)[i]);
1577 c = bb_predicate (e->src);
1578 if (is_true_predicate (c))
1579 continue;
1580 c = force_gimple_operand_gsi_1 (gsi, unshare_expr (c),
1581 is_gimple_condexpr, NULL_TREE,
1582 true, GSI_SAME_STMT);
1583 if (cond != NULL_TREE)
1585 /* Must build OR expression. */
1586 cond = fold_or_predicates (EXPR_LOCATION (c), c, cond);
1587 cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond),
1588 is_gimple_condexpr, NULL_TREE,
1589 true, GSI_SAME_STMT);
1591 else
1592 cond = c;
1594 gcc_assert (cond != NULL_TREE);
1595 return cond;
1598 /* Replace a scalar PHI node with a COND_EXPR using COND as condition.
1599 This routine can handle PHI nodes with more than two arguments.
1601 For example,
1602 S1: A = PHI <x1(1), x2(5)>
1603 is converted into,
1604 S2: A = cond ? x1 : x2;
1606 The generated code is inserted at GSI that points to the top of
1607 basic block's statement list.
1608 If PHI node has more than two arguments a chain of conditional
1609 expression is produced. */
1612 static void
1613 predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
1615 gimple *new_stmt = NULL, *reduc;
1616 tree rhs, res, arg0, arg1, op0, op1, scev;
1617 tree cond;
1618 unsigned int index0;
1619 unsigned int max, args_len;
1620 edge e;
1621 basic_block bb;
1622 unsigned int i;
1624 res = gimple_phi_result (phi);
1625 if (virtual_operand_p (res))
1626 return;
1628 if ((rhs = degenerate_phi_result (phi))
1629 || ((scev = analyze_scalar_evolution (gimple_bb (phi)->loop_father,
1630 res))
1631 && !chrec_contains_undetermined (scev)
1632 && scev != res
1633 && (rhs = gimple_phi_arg_def (phi, 0))))
1635 if (dump_file && (dump_flags & TDF_DETAILS))
1637 fprintf (dump_file, "Degenerate phi!\n");
1638 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
1640 new_stmt = gimple_build_assign (res, rhs);
1641 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1642 update_stmt (new_stmt);
1643 return;
1646 bb = gimple_bb (phi);
1647 if (EDGE_COUNT (bb->preds) == 2)
1649 /* Predicate ordinary PHI node with 2 arguments. */
1650 edge first_edge, second_edge;
1651 basic_block true_bb;
1652 first_edge = EDGE_PRED (bb, 0);
1653 second_edge = EDGE_PRED (bb, 1);
1654 cond = bb_predicate (first_edge->src);
1655 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1656 std::swap (first_edge, second_edge);
1657 if (EDGE_COUNT (first_edge->src->succs) > 1)
1659 cond = bb_predicate (second_edge->src);
1660 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1661 cond = TREE_OPERAND (cond, 0);
1662 else
1663 first_edge = second_edge;
1665 else
1666 cond = bb_predicate (first_edge->src);
1667 /* Gimplify the condition to a valid cond-expr conditonal operand. */
1668 cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond),
1669 is_gimple_condexpr, NULL_TREE,
1670 true, GSI_SAME_STMT);
1671 true_bb = first_edge->src;
1672 if (EDGE_PRED (bb, 1)->src == true_bb)
1674 arg0 = gimple_phi_arg_def (phi, 1);
1675 arg1 = gimple_phi_arg_def (phi, 0);
1677 else
1679 arg0 = gimple_phi_arg_def (phi, 0);
1680 arg1 = gimple_phi_arg_def (phi, 1);
1682 if (is_cond_scalar_reduction (phi, &reduc, arg0, arg1,
1683 &op0, &op1, false))
1684 /* Convert reduction stmt into vectorizable form. */
1685 rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
1686 true_bb != gimple_bb (reduc));
1687 else
1688 /* Build new RHS using selected condition and arguments. */
1689 rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
1690 arg0, arg1);
1691 new_stmt = gimple_build_assign (res, rhs);
1692 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1693 update_stmt (new_stmt);
1695 if (dump_file && (dump_flags & TDF_DETAILS))
1697 fprintf (dump_file, "new phi replacement stmt\n");
1698 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
1700 return;
1703 /* Create hashmap for PHI node which contain vector of argument indexes
1704 having the same value. */
1705 bool swap = false;
1706 hash_map<tree_operand_hash, auto_vec<int> > phi_arg_map;
1707 unsigned int num_args = gimple_phi_num_args (phi);
1708 int max_ind = -1;
1709 /* Vector of different PHI argument values. */
1710 auto_vec<tree> args (num_args);
1712 /* Compute phi_arg_map. */
1713 for (i = 0; i < num_args; i++)
1715 tree arg;
1717 arg = gimple_phi_arg_def (phi, i);
1718 if (!phi_arg_map.get (arg))
1719 args.quick_push (arg);
1720 phi_arg_map.get_or_insert (arg).safe_push (i);
1723 /* Determine element with max number of occurrences. */
1724 max_ind = -1;
1725 max = 1;
1726 args_len = args.length ();
1727 for (i = 0; i < args_len; i++)
1729 unsigned int len;
1730 if ((len = phi_arg_map.get (args[i])->length ()) > max)
1732 max_ind = (int) i;
1733 max = len;
1737 /* Put element with max number of occurences to the end of ARGS. */
1738 if (max_ind != -1 && max_ind +1 != (int) args_len)
1739 std::swap (args[args_len - 1], args[max_ind]);
1741 /* Handle one special case when number of arguments with different values
1742 is equal 2 and one argument has the only occurrence. Such PHI can be
1743 handled as if would have only 2 arguments. */
1744 if (args_len == 2 && phi_arg_map.get (args[0])->length () == 1)
1746 vec<int> *indexes;
1747 indexes = phi_arg_map.get (args[0]);
1748 index0 = (*indexes)[0];
1749 arg0 = args[0];
1750 arg1 = args[1];
1751 e = gimple_phi_arg_edge (phi, index0);
1752 cond = bb_predicate (e->src);
1753 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1755 swap = true;
1756 cond = TREE_OPERAND (cond, 0);
1758 /* Gimplify the condition to a valid cond-expr conditonal operand. */
1759 cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond),
1760 is_gimple_condexpr, NULL_TREE,
1761 true, GSI_SAME_STMT);
1762 if (!(is_cond_scalar_reduction (phi, &reduc, arg0 , arg1,
1763 &op0, &op1, true)))
1764 rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
1765 swap? arg1 : arg0,
1766 swap? arg0 : arg1);
1767 else
1768 /* Convert reduction stmt into vectorizable form. */
1769 rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
1770 swap);
1771 new_stmt = gimple_build_assign (res, rhs);
1772 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1773 update_stmt (new_stmt);
1775 else
1777 /* Common case. */
1778 vec<int> *indexes;
1779 tree type = TREE_TYPE (gimple_phi_result (phi));
1780 tree lhs;
1781 arg1 = args[1];
1782 for (i = 0; i < args_len; i++)
1784 arg0 = args[i];
1785 indexes = phi_arg_map.get (args[i]);
1786 if (i != args_len - 1)
1787 lhs = make_temp_ssa_name (type, NULL, "_ifc_");
1788 else
1789 lhs = res;
1790 cond = gen_phi_arg_condition (phi, indexes, gsi);
1791 rhs = fold_build_cond_expr (type, unshare_expr (cond),
1792 arg0, arg1);
1793 new_stmt = gimple_build_assign (lhs, rhs);
1794 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1795 update_stmt (new_stmt);
1796 arg1 = lhs;
1800 if (dump_file && (dump_flags & TDF_DETAILS))
1802 fprintf (dump_file, "new extended phi replacement stmt\n");
1803 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
1807 /* Replaces in LOOP all the scalar phi nodes other than those in the
1808 LOOP->header block with conditional modify expressions. */
1810 static void
1811 predicate_all_scalar_phis (struct loop *loop)
1813 basic_block bb;
1814 unsigned int orig_loop_num_nodes = loop->num_nodes;
1815 unsigned int i;
1817 for (i = 1; i < orig_loop_num_nodes; i++)
1819 gphi *phi;
1820 gimple_stmt_iterator gsi;
1821 gphi_iterator phi_gsi;
1822 bb = ifc_bbs[i];
1824 if (bb == loop->header)
1825 continue;
1827 if (EDGE_COUNT (bb->preds) == 1)
1828 continue;
1830 phi_gsi = gsi_start_phis (bb);
1831 if (gsi_end_p (phi_gsi))
1832 continue;
1834 gsi = gsi_after_labels (bb);
1835 while (!gsi_end_p (phi_gsi))
1837 phi = phi_gsi.phi ();
1838 predicate_scalar_phi (phi, &gsi);
1839 release_phi_node (phi);
1840 gsi_next (&phi_gsi);
1843 set_phi_nodes (bb, NULL);
1847 /* Insert in each basic block of LOOP the statements produced by the
1848 gimplification of the predicates. */
1850 static void
1851 insert_gimplified_predicates (loop_p loop, bool any_mask_load_store)
1853 unsigned int i;
1855 for (i = 0; i < loop->num_nodes; i++)
1857 basic_block bb = ifc_bbs[i];
1858 gimple_seq stmts;
1859 if (!is_predicated (bb))
1860 gcc_assert (bb_predicate_gimplified_stmts (bb) == NULL);
1861 if (!is_predicated (bb))
1863 /* Do not insert statements for a basic block that is not
1864 predicated. Also make sure that the predicate of the
1865 basic block is set to true. */
1866 reset_bb_predicate (bb);
1867 continue;
1870 stmts = bb_predicate_gimplified_stmts (bb);
1871 if (stmts)
1873 if (flag_tree_loop_if_convert_stores
1874 || any_mask_load_store)
1876 /* Insert the predicate of the BB just after the label,
1877 as the if-conversion of memory writes will use this
1878 predicate. */
1879 gimple_stmt_iterator gsi = gsi_after_labels (bb);
1880 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1882 else
1884 /* Insert the predicate of the BB at the end of the BB
1885 as this would reduce the register pressure: the only
1886 use of this predicate will be in successor BBs. */
1887 gimple_stmt_iterator gsi = gsi_last_bb (bb);
1889 if (gsi_end_p (gsi)
1890 || stmt_ends_bb_p (gsi_stmt (gsi)))
1891 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1892 else
1893 gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
1896 /* Once the sequence is code generated, set it to NULL. */
1897 set_bb_predicate_gimplified_stmts (bb, NULL);
1902 /* Helper function for predicate_mem_writes. Returns index of existent
1903 mask if it was created for given SIZE and -1 otherwise. */
1905 static int
1906 mask_exists (int size, vec<int> vec)
1908 unsigned int ix;
1909 int v;
1910 FOR_EACH_VEC_ELT (vec, ix, v)
1911 if (v == size)
1912 return (int) ix;
1913 return -1;
1916 /* Predicate each write to memory in LOOP.
1918 This function transforms control flow constructs containing memory
1919 writes of the form:
1921 | for (i = 0; i < N; i++)
1922 | if (cond)
1923 | A[i] = expr;
1925 into the following form that does not contain control flow:
1927 | for (i = 0; i < N; i++)
1928 | A[i] = cond ? expr : A[i];
1930 The original CFG looks like this:
1932 | bb_0
1933 | i = 0
1934 | end_bb_0
1936 | bb_1
1937 | if (i < N) goto bb_5 else goto bb_2
1938 | end_bb_1
1940 | bb_2
1941 | cond = some_computation;
1942 | if (cond) goto bb_3 else goto bb_4
1943 | end_bb_2
1945 | bb_3
1946 | A[i] = expr;
1947 | goto bb_4
1948 | end_bb_3
1950 | bb_4
1951 | goto bb_1
1952 | end_bb_4
1954 insert_gimplified_predicates inserts the computation of the COND
1955 expression at the beginning of the destination basic block:
1957 | bb_0
1958 | i = 0
1959 | end_bb_0
1961 | bb_1
1962 | if (i < N) goto bb_5 else goto bb_2
1963 | end_bb_1
1965 | bb_2
1966 | cond = some_computation;
1967 | if (cond) goto bb_3 else goto bb_4
1968 | end_bb_2
1970 | bb_3
1971 | cond = some_computation;
1972 | A[i] = expr;
1973 | goto bb_4
1974 | end_bb_3
1976 | bb_4
1977 | goto bb_1
1978 | end_bb_4
1980 predicate_mem_writes is then predicating the memory write as follows:
1982 | bb_0
1983 | i = 0
1984 | end_bb_0
1986 | bb_1
1987 | if (i < N) goto bb_5 else goto bb_2
1988 | end_bb_1
1990 | bb_2
1991 | if (cond) goto bb_3 else goto bb_4
1992 | end_bb_2
1994 | bb_3
1995 | cond = some_computation;
1996 | A[i] = cond ? expr : A[i];
1997 | goto bb_4
1998 | end_bb_3
2000 | bb_4
2001 | goto bb_1
2002 | end_bb_4
2004 and finally combine_blocks removes the basic block boundaries making
2005 the loop vectorizable:
2007 | bb_0
2008 | i = 0
2009 | if (i < N) goto bb_5 else goto bb_1
2010 | end_bb_0
2012 | bb_1
2013 | cond = some_computation;
2014 | A[i] = cond ? expr : A[i];
2015 | if (i < N) goto bb_5 else goto bb_4
2016 | end_bb_1
2018 | bb_4
2019 | goto bb_1
2020 | end_bb_4
2023 static void
2024 predicate_mem_writes (loop_p loop)
2026 unsigned int i, orig_loop_num_nodes = loop->num_nodes;
2027 auto_vec<int, 1> vect_sizes;
2028 auto_vec<tree, 1> vect_masks;
2030 for (i = 1; i < orig_loop_num_nodes; i++)
2032 gimple_stmt_iterator gsi;
2033 basic_block bb = ifc_bbs[i];
2034 tree cond = bb_predicate (bb);
2035 bool swap;
2036 gimple *stmt;
2037 int index;
2039 if (is_true_predicate (cond))
2040 continue;
2042 swap = false;
2043 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2045 swap = true;
2046 cond = TREE_OPERAND (cond, 0);
2049 vect_sizes.truncate (0);
2050 vect_masks.truncate (0);
2052 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2053 if (!gimple_assign_single_p (stmt = gsi_stmt (gsi)))
2054 continue;
2055 else if (gimple_plf (stmt, GF_PLF_2))
2057 tree lhs = gimple_assign_lhs (stmt);
2058 tree rhs = gimple_assign_rhs1 (stmt);
2059 tree ref, addr, ptr, masktype, mask_op0, mask_op1, mask;
2060 gimple *new_stmt;
2061 int bitsize = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (lhs)));
2062 ref = TREE_CODE (lhs) == SSA_NAME ? rhs : lhs;
2063 mark_addressable (ref);
2064 addr = force_gimple_operand_gsi (&gsi, build_fold_addr_expr (ref),
2065 true, NULL_TREE, true,
2066 GSI_SAME_STMT);
2067 if (!vect_sizes.is_empty ()
2068 && (index = mask_exists (bitsize, vect_sizes)) != -1)
2069 /* Use created mask. */
2070 mask = vect_masks[index];
2071 else
2073 masktype = build_nonstandard_integer_type (bitsize, 1);
2074 mask_op0 = build_int_cst (masktype, swap ? 0 : -1);
2075 mask_op1 = build_int_cst (masktype, swap ? -1 : 0);
2076 cond = force_gimple_operand_gsi_1 (&gsi, unshare_expr (cond),
2077 is_gimple_condexpr,
2078 NULL_TREE,
2079 true, GSI_SAME_STMT);
2080 mask = fold_build_cond_expr (masktype, unshare_expr (cond),
2081 mask_op0, mask_op1);
2082 mask = ifc_temp_var (masktype, mask, &gsi);
2083 /* Save mask and its size for further use. */
2084 vect_sizes.safe_push (bitsize);
2085 vect_masks.safe_push (mask);
2087 ptr = build_int_cst (reference_alias_ptr_type (ref), 0);
2088 /* Copy points-to info if possible. */
2089 if (TREE_CODE (addr) == SSA_NAME && !SSA_NAME_PTR_INFO (addr))
2090 copy_ref_info (build2 (MEM_REF, TREE_TYPE (ref), addr, ptr),
2091 ref);
2092 if (TREE_CODE (lhs) == SSA_NAME)
2094 new_stmt
2095 = gimple_build_call_internal (IFN_MASK_LOAD, 3, addr,
2096 ptr, mask);
2097 gimple_call_set_lhs (new_stmt, lhs);
2099 else
2100 new_stmt
2101 = gimple_build_call_internal (IFN_MASK_STORE, 4, addr, ptr,
2102 mask, rhs);
2103 gsi_replace (&gsi, new_stmt, true);
2105 else if (gimple_vdef (stmt))
2107 tree lhs = gimple_assign_lhs (stmt);
2108 tree rhs = gimple_assign_rhs1 (stmt);
2109 tree type = TREE_TYPE (lhs);
2111 lhs = ifc_temp_var (type, unshare_expr (lhs), &gsi);
2112 rhs = ifc_temp_var (type, unshare_expr (rhs), &gsi);
2113 if (swap)
2114 std::swap (lhs, rhs);
2115 cond = force_gimple_operand_gsi_1 (&gsi, unshare_expr (cond),
2116 is_gimple_condexpr, NULL_TREE,
2117 true, GSI_SAME_STMT);
2118 rhs = fold_build_cond_expr (type, unshare_expr (cond), rhs, lhs);
2119 gimple_assign_set_rhs1 (stmt, ifc_temp_var (type, rhs, &gsi));
2120 update_stmt (stmt);
2125 /* Remove all GIMPLE_CONDs and GIMPLE_LABELs of all the basic blocks
2126 other than the exit and latch of the LOOP. Also resets the
2127 GIMPLE_DEBUG information. */
2129 static void
2130 remove_conditions_and_labels (loop_p loop)
2132 gimple_stmt_iterator gsi;
2133 unsigned int i;
2135 for (i = 0; i < loop->num_nodes; i++)
2137 basic_block bb = ifc_bbs[i];
2139 if (bb_with_exit_edge_p (loop, bb)
2140 || bb == loop->latch)
2141 continue;
2143 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
2144 switch (gimple_code (gsi_stmt (gsi)))
2146 case GIMPLE_COND:
2147 case GIMPLE_LABEL:
2148 gsi_remove (&gsi, true);
2149 break;
2151 case GIMPLE_DEBUG:
2152 /* ??? Should there be conditional GIMPLE_DEBUG_BINDs? */
2153 if (gimple_debug_bind_p (gsi_stmt (gsi)))
2155 gimple_debug_bind_reset_value (gsi_stmt (gsi));
2156 update_stmt (gsi_stmt (gsi));
2158 gsi_next (&gsi);
2159 break;
2161 default:
2162 gsi_next (&gsi);
2167 /* Combine all the basic blocks from LOOP into one or two super basic
2168 blocks. Replace PHI nodes with conditional modify expressions. */
2170 static void
2171 combine_blocks (struct loop *loop, bool any_mask_load_store)
2173 basic_block bb, exit_bb, merge_target_bb;
2174 unsigned int orig_loop_num_nodes = loop->num_nodes;
2175 unsigned int i;
2176 edge e;
2177 edge_iterator ei;
2179 predicate_bbs (loop);
2180 remove_conditions_and_labels (loop);
2181 insert_gimplified_predicates (loop, any_mask_load_store);
2182 predicate_all_scalar_phis (loop);
2184 if (flag_tree_loop_if_convert_stores || any_mask_load_store)
2185 predicate_mem_writes (loop);
2187 /* Merge basic blocks: first remove all the edges in the loop,
2188 except for those from the exit block. */
2189 exit_bb = NULL;
2190 bool *predicated = XNEWVEC (bool, orig_loop_num_nodes);
2191 for (i = 0; i < orig_loop_num_nodes; i++)
2193 bb = ifc_bbs[i];
2194 predicated[i] = !is_true_predicate (bb_predicate (bb));
2195 free_bb_predicate (bb);
2196 if (bb_with_exit_edge_p (loop, bb))
2198 gcc_assert (exit_bb == NULL);
2199 exit_bb = bb;
2202 gcc_assert (exit_bb != loop->latch);
2204 for (i = 1; i < orig_loop_num_nodes; i++)
2206 bb = ifc_bbs[i];
2208 for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));)
2210 if (e->src == exit_bb)
2211 ei_next (&ei);
2212 else
2213 remove_edge (e);
2217 if (exit_bb != NULL)
2219 if (exit_bb != loop->header)
2221 /* Connect this node to loop header. */
2222 make_edge (loop->header, exit_bb, EDGE_FALLTHRU);
2223 set_immediate_dominator (CDI_DOMINATORS, exit_bb, loop->header);
2226 /* Redirect non-exit edges to loop->latch. */
2227 FOR_EACH_EDGE (e, ei, exit_bb->succs)
2229 if (!loop_exit_edge_p (loop, e))
2230 redirect_edge_and_branch (e, loop->latch);
2232 set_immediate_dominator (CDI_DOMINATORS, loop->latch, exit_bb);
2234 else
2236 /* If the loop does not have an exit, reconnect header and latch. */
2237 make_edge (loop->header, loop->latch, EDGE_FALLTHRU);
2238 set_immediate_dominator (CDI_DOMINATORS, loop->latch, loop->header);
2241 merge_target_bb = loop->header;
2242 for (i = 1; i < orig_loop_num_nodes; i++)
2244 gimple_stmt_iterator gsi;
2245 gimple_stmt_iterator last;
2247 bb = ifc_bbs[i];
2249 if (bb == exit_bb || bb == loop->latch)
2250 continue;
2252 /* Make stmts member of loop->header and clear range info from all stmts
2253 in BB which is now no longer executed conditional on a predicate we
2254 could have derived it from. */
2255 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2257 gimple *stmt = gsi_stmt (gsi);
2258 gimple_set_bb (stmt, merge_target_bb);
2259 if (predicated[i])
2261 ssa_op_iter i;
2262 tree op;
2263 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF)
2264 reset_flow_sensitive_info (op);
2268 /* Update stmt list. */
2269 last = gsi_last_bb (merge_target_bb);
2270 gsi_insert_seq_after (&last, bb_seq (bb), GSI_NEW_STMT);
2271 set_bb_seq (bb, NULL);
2273 delete_basic_block (bb);
2276 /* If possible, merge loop header to the block with the exit edge.
2277 This reduces the number of basic blocks to two, to please the
2278 vectorizer that handles only loops with two nodes. */
2279 if (exit_bb
2280 && exit_bb != loop->header
2281 && can_merge_blocks_p (loop->header, exit_bb))
2282 merge_blocks (loop->header, exit_bb);
2284 free (ifc_bbs);
2285 ifc_bbs = NULL;
2286 free (predicated);
2289 /* Version LOOP before if-converting it; the original loop
2290 will be if-converted, the new copy of the loop will not,
2291 and the LOOP_VECTORIZED internal call will be guarding which
2292 loop to execute. The vectorizer pass will fold this
2293 internal call into either true or false. */
2295 static bool
2296 version_loop_for_if_conversion (struct loop *loop)
2298 basic_block cond_bb;
2299 tree cond = make_ssa_name (boolean_type_node);
2300 struct loop *new_loop;
2301 gimple *g;
2302 gimple_stmt_iterator gsi;
2304 g = gimple_build_call_internal (IFN_LOOP_VECTORIZED, 2,
2305 build_int_cst (integer_type_node, loop->num),
2306 integer_zero_node);
2307 gimple_call_set_lhs (g, cond);
2309 initialize_original_copy_tables ();
2310 new_loop = loop_version (loop, cond, &cond_bb,
2311 REG_BR_PROB_BASE, REG_BR_PROB_BASE,
2312 REG_BR_PROB_BASE, true);
2313 free_original_copy_tables ();
2314 if (new_loop == NULL)
2315 return false;
2316 new_loop->dont_vectorize = true;
2317 new_loop->force_vectorize = false;
2318 gsi = gsi_last_bb (cond_bb);
2319 gimple_call_set_arg (g, 1, build_int_cst (integer_type_node, new_loop->num));
2320 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2321 update_ssa (TODO_update_ssa);
2322 return true;
2325 /* Performs splitting of critical edges if aggressive_if_conv is true.
2326 Returns false if loop won't be if converted and true otherwise. */
2328 static bool
2329 ifcvt_split_critical_edges (struct loop *loop)
2331 basic_block *body;
2332 basic_block bb;
2333 unsigned int num = loop->num_nodes;
2334 unsigned int i;
2335 gimple *stmt;
2336 edge e;
2337 edge_iterator ei;
2339 if (num <= 2)
2340 return false;
2341 if (loop->inner)
2342 return false;
2343 if (!single_exit (loop))
2344 return false;
2346 body = get_loop_body (loop);
2347 for (i = 0; i < num; i++)
2349 bb = body[i];
2350 if (bb == loop->latch
2351 || bb_with_exit_edge_p (loop, bb))
2352 continue;
2353 stmt = last_stmt (bb);
2354 /* Skip basic blocks not ending with conditional branch. */
2355 if (!(stmt && gimple_code (stmt) == GIMPLE_COND))
2356 continue;
2357 FOR_EACH_EDGE (e, ei, bb->succs)
2358 if (EDGE_CRITICAL_P (e) && e->dest->loop_father == loop)
2359 split_edge (e);
2361 free (body);
2362 return true;
2365 /* Assumes that lhs of DEF_STMT have multiple uses.
2366 Delete one use by (1) creation of copy DEF_STMT with
2367 unique lhs; (2) change original use of lhs in one
2368 use statement with newly created lhs. */
2370 static void
2371 ifcvt_split_def_stmt (gimple *def_stmt, gimple *use_stmt)
2373 tree var;
2374 tree lhs;
2375 gimple *copy_stmt;
2376 gimple_stmt_iterator gsi;
2377 use_operand_p use_p;
2378 imm_use_iterator imm_iter;
2380 var = gimple_assign_lhs (def_stmt);
2381 copy_stmt = gimple_copy (def_stmt);
2382 lhs = make_temp_ssa_name (TREE_TYPE (var), NULL, "_ifc_");
2383 gimple_assign_set_lhs (copy_stmt, lhs);
2384 SSA_NAME_DEF_STMT (lhs) = copy_stmt;
2385 /* Insert copy of DEF_STMT. */
2386 gsi = gsi_for_stmt (def_stmt);
2387 gsi_insert_after (&gsi, copy_stmt, GSI_SAME_STMT);
2388 /* Change use of var to lhs in use_stmt. */
2389 if (dump_file && (dump_flags & TDF_DETAILS))
2391 fprintf (dump_file, "Change use of var ");
2392 print_generic_expr (dump_file, var, TDF_SLIM);
2393 fprintf (dump_file, " to ");
2394 print_generic_expr (dump_file, lhs, TDF_SLIM);
2395 fprintf (dump_file, "\n");
2397 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, var)
2399 if (USE_STMT (use_p) != use_stmt)
2400 continue;
2401 SET_USE (use_p, lhs);
2402 break;
2406 /* Traverse bool pattern recursively starting from VAR.
2407 Save its def and use statements to defuse_list if VAR does
2408 not have single use. */
2410 static void
2411 ifcvt_walk_pattern_tree (tree var, vec<gimple *> *defuse_list,
2412 gimple *use_stmt)
2414 tree rhs1, rhs2;
2415 enum tree_code code;
2416 gimple *def_stmt;
2418 def_stmt = SSA_NAME_DEF_STMT (var);
2419 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
2420 return;
2421 if (!has_single_use (var))
2423 /* Put def and use stmts into defuse_list. */
2424 defuse_list->safe_push (def_stmt);
2425 defuse_list->safe_push (use_stmt);
2426 if (dump_file && (dump_flags & TDF_DETAILS))
2428 fprintf (dump_file, "Multiple lhs uses in stmt\n");
2429 print_gimple_stmt (dump_file, def_stmt, 0, TDF_SLIM);
2432 rhs1 = gimple_assign_rhs1 (def_stmt);
2433 code = gimple_assign_rhs_code (def_stmt);
2434 switch (code)
2436 case SSA_NAME:
2437 ifcvt_walk_pattern_tree (rhs1, defuse_list, def_stmt);
2438 break;
2439 CASE_CONVERT:
2440 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2441 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2442 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2443 break;
2444 ifcvt_walk_pattern_tree (rhs1, defuse_list, def_stmt);
2445 break;
2446 case BIT_NOT_EXPR:
2447 ifcvt_walk_pattern_tree (rhs1, defuse_list, def_stmt);
2448 break;
2449 case BIT_AND_EXPR:
2450 case BIT_IOR_EXPR:
2451 case BIT_XOR_EXPR:
2452 ifcvt_walk_pattern_tree (rhs1, defuse_list, def_stmt);
2453 rhs2 = gimple_assign_rhs2 (def_stmt);
2454 ifcvt_walk_pattern_tree (rhs2, defuse_list, def_stmt);
2455 break;
2456 default:
2457 break;
2459 return;
2462 /* Returns true if STMT can be a root of bool pattern applied
2463 by vectorizer. */
2465 static bool
2466 stmt_is_root_of_bool_pattern (gimple *stmt)
2468 enum tree_code code;
2469 tree lhs, rhs;
2471 code = gimple_assign_rhs_code (stmt);
2472 if (CONVERT_EXPR_CODE_P (code))
2474 lhs = gimple_assign_lhs (stmt);
2475 rhs = gimple_assign_rhs1 (stmt);
2476 if (TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2477 return false;
2478 if (TREE_CODE (TREE_TYPE (lhs)) == BOOLEAN_TYPE)
2479 return false;
2480 return true;
2482 else if (code == COND_EXPR)
2484 rhs = gimple_assign_rhs1 (stmt);
2485 if (TREE_CODE (rhs) != SSA_NAME)
2486 return false;
2487 return true;
2489 return false;
2492 /* Traverse all statements in BB which correspond to loop header to
2493 find out all statements which can start bool pattern applied by
2494 vectorizer and convert multiple uses in it to conform pattern
2495 restrictions. Such case can occur if the same predicate is used both
2496 for phi node conversion and load/store mask. */
2498 static void
2499 ifcvt_repair_bool_pattern (basic_block bb)
2501 tree rhs;
2502 gimple *stmt;
2503 gimple_stmt_iterator gsi;
2504 vec<gimple *> defuse_list = vNULL;
2505 vec<gimple *> pattern_roots = vNULL;
2506 bool repeat = true;
2507 int niter = 0;
2508 unsigned int ix;
2510 /* Collect all root pattern statements. */
2511 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2513 stmt = gsi_stmt (gsi);
2514 if (gimple_code (stmt) != GIMPLE_ASSIGN)
2515 continue;
2516 if (!stmt_is_root_of_bool_pattern (stmt))
2517 continue;
2518 pattern_roots.safe_push (stmt);
2521 if (pattern_roots.is_empty ())
2522 return;
2524 /* Split all statements with multiple uses iteratively since splitting
2525 may create new multiple uses. */
2526 while (repeat)
2528 repeat = false;
2529 niter++;
2530 FOR_EACH_VEC_ELT (pattern_roots, ix, stmt)
2532 rhs = gimple_assign_rhs1 (stmt);
2533 ifcvt_walk_pattern_tree (rhs, &defuse_list, stmt);
2534 while (defuse_list.length () > 0)
2536 repeat = true;
2537 gimple *def_stmt, *use_stmt;
2538 use_stmt = defuse_list.pop ();
2539 def_stmt = defuse_list.pop ();
2540 ifcvt_split_def_stmt (def_stmt, use_stmt);
2545 if (dump_file && (dump_flags & TDF_DETAILS))
2546 fprintf (dump_file, "Repair bool pattern takes %d iterations. \n",
2547 niter);
2550 /* Delete redundant statements produced by predication which prevents
2551 loop vectorization. */
2553 static void
2554 ifcvt_local_dce (basic_block bb)
2556 gimple *stmt;
2557 gimple *stmt1;
2558 gimple *phi;
2559 gimple_stmt_iterator gsi;
2560 vec<gimple *> worklist;
2561 enum gimple_code code;
2562 use_operand_p use_p;
2563 imm_use_iterator imm_iter;
2565 worklist.create (64);
2566 /* Consider all phi as live statements. */
2567 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2569 phi = gsi_stmt (gsi);
2570 gimple_set_plf (phi, GF_PLF_2, true);
2571 worklist.safe_push (phi);
2573 /* Consider load/store statements, CALL and COND as live. */
2574 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2576 stmt = gsi_stmt (gsi);
2577 if (gimple_store_p (stmt)
2578 || gimple_assign_load_p (stmt)
2579 || is_gimple_debug (stmt))
2581 gimple_set_plf (stmt, GF_PLF_2, true);
2582 worklist.safe_push (stmt);
2583 continue;
2585 code = gimple_code (stmt);
2586 if (code == GIMPLE_COND || code == GIMPLE_CALL)
2588 gimple_set_plf (stmt, GF_PLF_2, true);
2589 worklist.safe_push (stmt);
2590 continue;
2592 gimple_set_plf (stmt, GF_PLF_2, false);
2594 if (code == GIMPLE_ASSIGN)
2596 tree lhs = gimple_assign_lhs (stmt);
2597 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2599 stmt1 = USE_STMT (use_p);
2600 if (gimple_bb (stmt1) != bb)
2602 gimple_set_plf (stmt, GF_PLF_2, true);
2603 worklist.safe_push (stmt);
2604 break;
2609 /* Propagate liveness through arguments of live stmt. */
2610 while (worklist.length () > 0)
2612 ssa_op_iter iter;
2613 use_operand_p use_p;
2614 tree use;
2616 stmt = worklist.pop ();
2617 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2619 use = USE_FROM_PTR (use_p);
2620 if (TREE_CODE (use) != SSA_NAME)
2621 continue;
2622 stmt1 = SSA_NAME_DEF_STMT (use);
2623 if (gimple_bb (stmt1) != bb
2624 || gimple_plf (stmt1, GF_PLF_2))
2625 continue;
2626 gimple_set_plf (stmt1, GF_PLF_2, true);
2627 worklist.safe_push (stmt1);
2630 /* Delete dead statements. */
2631 gsi = gsi_start_bb (bb);
2632 while (!gsi_end_p (gsi))
2634 stmt = gsi_stmt (gsi);
2635 if (gimple_plf (stmt, GF_PLF_2))
2637 gsi_next (&gsi);
2638 continue;
2640 if (dump_file && (dump_flags & TDF_DETAILS))
2642 fprintf (dump_file, "Delete dead stmt in bb#%d\n", bb->index);
2643 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2645 gsi_remove (&gsi, true);
2646 release_defs (stmt);
2650 /* If-convert LOOP when it is legal. For the moment this pass has no
2651 profitability analysis. Returns non-zero todo flags when something
2652 changed. */
2654 static unsigned int
2655 tree_if_conversion (struct loop *loop)
2657 unsigned int todo = 0;
2658 ifc_bbs = NULL;
2659 bool any_mask_load_store = false;
2661 /* Set up aggressive if-conversion for loops marked with simd pragma. */
2662 aggressive_if_conv = loop->force_vectorize;
2663 /* Check either outer loop was marked with simd pragma. */
2664 if (!aggressive_if_conv)
2666 struct loop *outer_loop = loop_outer (loop);
2667 if (outer_loop && outer_loop->force_vectorize)
2668 aggressive_if_conv = true;
2671 if (aggressive_if_conv)
2672 if (!ifcvt_split_critical_edges (loop))
2673 goto cleanup;
2675 if (!if_convertible_loop_p (loop, &any_mask_load_store)
2676 || !dbg_cnt (if_conversion_tree))
2677 goto cleanup;
2679 if (any_mask_load_store
2680 && ((!flag_tree_loop_vectorize && !loop->force_vectorize)
2681 || loop->dont_vectorize))
2682 goto cleanup;
2684 if (any_mask_load_store && !version_loop_for_if_conversion (loop))
2685 goto cleanup;
2687 /* Now all statements are if-convertible. Combine all the basic
2688 blocks into one huge basic block doing the if-conversion
2689 on-the-fly. */
2690 combine_blocks (loop, any_mask_load_store);
2692 /* Delete dead predicate computations and repair tree correspondent
2693 to bool pattern to delete multiple uses of predicates. */
2694 if (aggressive_if_conv)
2696 ifcvt_local_dce (loop->header);
2697 ifcvt_repair_bool_pattern (loop->header);
2700 todo |= TODO_cleanup_cfg;
2701 if (flag_tree_loop_if_convert_stores || any_mask_load_store)
2703 mark_virtual_operands_for_renaming (cfun);
2704 todo |= TODO_update_ssa_only_virtuals;
2707 cleanup:
2708 if (ifc_bbs)
2710 unsigned int i;
2712 for (i = 0; i < loop->num_nodes; i++)
2713 free_bb_predicate (ifc_bbs[i]);
2715 free (ifc_bbs);
2716 ifc_bbs = NULL;
2718 free_dominance_info (CDI_POST_DOMINATORS);
2720 return todo;
2723 /* Tree if-conversion pass management. */
2725 namespace {
2727 const pass_data pass_data_if_conversion =
2729 GIMPLE_PASS, /* type */
2730 "ifcvt", /* name */
2731 OPTGROUP_NONE, /* optinfo_flags */
2732 TV_NONE, /* tv_id */
2733 ( PROP_cfg | PROP_ssa ), /* properties_required */
2734 0, /* properties_provided */
2735 0, /* properties_destroyed */
2736 0, /* todo_flags_start */
2737 0, /* todo_flags_finish */
2740 class pass_if_conversion : public gimple_opt_pass
2742 public:
2743 pass_if_conversion (gcc::context *ctxt)
2744 : gimple_opt_pass (pass_data_if_conversion, ctxt)
2747 /* opt_pass methods: */
2748 virtual bool gate (function *);
2749 virtual unsigned int execute (function *);
2751 }; // class pass_if_conversion
2753 bool
2754 pass_if_conversion::gate (function *fun)
2756 return (((flag_tree_loop_vectorize || fun->has_force_vectorize_loops)
2757 && flag_tree_loop_if_convert != 0)
2758 || flag_tree_loop_if_convert == 1
2759 || flag_tree_loop_if_convert_stores == 1);
2762 unsigned int
2763 pass_if_conversion::execute (function *fun)
2765 struct loop *loop;
2766 unsigned todo = 0;
2768 if (number_of_loops (fun) <= 1)
2769 return 0;
2771 FOR_EACH_LOOP (loop, 0)
2772 if (flag_tree_loop_if_convert == 1
2773 || flag_tree_loop_if_convert_stores == 1
2774 || ((flag_tree_loop_vectorize || loop->force_vectorize)
2775 && !loop->dont_vectorize))
2776 todo |= tree_if_conversion (loop);
2778 if (flag_checking)
2780 basic_block bb;
2781 FOR_EACH_BB_FN (bb, fun)
2782 gcc_assert (!bb->aux);
2785 return todo;
2788 } // anon namespace
2790 gimple_opt_pass *
2791 make_pass_if_conversion (gcc::context *ctxt)
2793 return new pass_if_conversion (ctxt);