* config/i386/winnt.c (i386_pe_encode_section_info) <FUNCTION_DECL>:
[official-gcc.git] / gcc / tree-if-conv.c
blob28e1c475c5f754a02dd0c1e2efad9f6fc1cb03b4
1 /* If-conversion for vectorizer.
2 Copyright (C) 2004-2015 Free Software Foundation, Inc.
3 Contributed by Devang Patel <dpatel@apple.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass implements a tree level if-conversion of loops. Its
22 initial goal is to help the vectorizer to vectorize loops with
23 conditions.
25 A short description of if-conversion:
27 o Decide if a loop is if-convertible or not.
28 o Walk all loop basic blocks in breadth first order (BFS order).
29 o Remove conditional statements (at the end of basic block)
30 and propagate condition into destination basic blocks'
31 predicate list.
32 o Replace modify expression with conditional modify expression
33 using current basic block's condition.
34 o Merge all basic blocks
35 o Replace phi nodes with conditional modify expr
36 o Merge all basic blocks into header
38 Sample transformation:
40 INPUT
41 -----
43 # i_23 = PHI <0(0), i_18(10)>;
44 <L0>:;
45 j_15 = A[i_23];
46 if (j_15 > 41) goto <L1>; else goto <L17>;
48 <L17>:;
49 goto <bb 3> (<L3>);
51 <L1>:;
53 # iftmp.2_4 = PHI <0(8), 42(2)>;
54 <L3>:;
55 A[i_23] = iftmp.2_4;
56 i_18 = i_23 + 1;
57 if (i_18 <= 15) goto <L19>; else goto <L18>;
59 <L19>:;
60 goto <bb 1> (<L0>);
62 <L18>:;
64 OUTPUT
65 ------
67 # i_23 = PHI <0(0), i_18(10)>;
68 <L0>:;
69 j_15 = A[i_23];
71 <L3>:;
72 iftmp.2_4 = j_15 > 41 ? 42 : 0;
73 A[i_23] = iftmp.2_4;
74 i_18 = i_23 + 1;
75 if (i_18 <= 15) goto <L19>; else goto <L18>;
77 <L19>:;
78 goto <bb 1> (<L0>);
80 <L18>:;
83 #include "config.h"
84 #include "system.h"
85 #include "coretypes.h"
86 #include "tm.h"
87 #include "hash-set.h"
88 #include "machmode.h"
89 #include "vec.h"
90 #include "double-int.h"
91 #include "input.h"
92 #include "alias.h"
93 #include "symtab.h"
94 #include "wide-int.h"
95 #include "inchash.h"
96 #include "tree.h"
97 #include "fold-const.h"
98 #include "stor-layout.h"
99 #include "flags.h"
100 #include "predict.h"
101 #include "hard-reg-set.h"
102 #include "function.h"
103 #include "dominance.h"
104 #include "cfg.h"
105 #include "basic-block.h"
106 #include "gimple-pretty-print.h"
107 #include "tree-ssa-alias.h"
108 #include "internal-fn.h"
109 #include "gimple-fold.h"
110 #include "gimple-expr.h"
111 #include "is-a.h"
112 #include "gimple.h"
113 #include "gimplify.h"
114 #include "gimple-iterator.h"
115 #include "gimplify-me.h"
116 #include "gimple-ssa.h"
117 #include "tree-cfg.h"
118 #include "tree-phinodes.h"
119 #include "ssa-iterators.h"
120 #include "stringpool.h"
121 #include "tree-ssanames.h"
122 #include "tree-into-ssa.h"
123 #include "tree-ssa.h"
124 #include "cfgloop.h"
125 #include "tree-chrec.h"
126 #include "tree-data-ref.h"
127 #include "tree-scalar-evolution.h"
128 #include "tree-ssa-loop-ivopts.h"
129 #include "tree-ssa-address.h"
130 #include "tree-pass.h"
131 #include "dbgcnt.h"
132 #include "hashtab.h"
133 #include "rtl.h"
134 #include "statistics.h"
135 #include "real.h"
136 #include "fixed-value.h"
137 #include "insn-config.h"
138 #include "expmed.h"
139 #include "dojump.h"
140 #include "explow.h"
141 #include "calls.h"
142 #include "emit-rtl.h"
143 #include "varasm.h"
144 #include "stmt.h"
145 #include "expr.h"
146 #include "insn-codes.h"
147 #include "optabs.h"
148 #include "hash-map.h"
150 /* List of basic blocks in if-conversion-suitable order. */
151 static basic_block *ifc_bbs;
153 /* Apply more aggressive (extended) if-conversion if true. */
154 static bool aggressive_if_conv;
156 /* Structure used to predicate basic blocks. This is attached to the
157 ->aux field of the BBs in the loop to be if-converted. */
158 typedef struct bb_predicate_s {
160 /* The condition under which this basic block is executed. */
161 tree predicate;
163 /* PREDICATE is gimplified, and the sequence of statements is
164 recorded here, in order to avoid the duplication of computations
165 that occur in previous conditions. See PR44483. */
166 gimple_seq predicate_gimplified_stmts;
167 } *bb_predicate_p;
169 /* Returns true when the basic block BB has a predicate. */
171 static inline bool
172 bb_has_predicate (basic_block bb)
174 return bb->aux != NULL;
177 /* Returns the gimplified predicate for basic block BB. */
179 static inline tree
180 bb_predicate (basic_block bb)
182 return ((bb_predicate_p) bb->aux)->predicate;
185 /* Sets the gimplified predicate COND for basic block BB. */
187 static inline void
188 set_bb_predicate (basic_block bb, tree cond)
190 gcc_assert ((TREE_CODE (cond) == TRUTH_NOT_EXPR
191 && is_gimple_condexpr (TREE_OPERAND (cond, 0)))
192 || is_gimple_condexpr (cond));
193 ((bb_predicate_p) bb->aux)->predicate = cond;
196 /* Returns the sequence of statements of the gimplification of the
197 predicate for basic block BB. */
199 static inline gimple_seq
200 bb_predicate_gimplified_stmts (basic_block bb)
202 return ((bb_predicate_p) bb->aux)->predicate_gimplified_stmts;
205 /* Sets the sequence of statements STMTS of the gimplification of the
206 predicate for basic block BB. */
208 static inline void
209 set_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
211 ((bb_predicate_p) bb->aux)->predicate_gimplified_stmts = stmts;
214 /* Adds the sequence of statements STMTS to the sequence of statements
215 of the predicate for basic block BB. */
217 static inline void
218 add_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
220 gimple_seq_add_seq
221 (&(((bb_predicate_p) bb->aux)->predicate_gimplified_stmts), stmts);
224 /* Initializes to TRUE the predicate of basic block BB. */
226 static inline void
227 init_bb_predicate (basic_block bb)
229 bb->aux = XNEW (struct bb_predicate_s);
230 set_bb_predicate_gimplified_stmts (bb, NULL);
231 set_bb_predicate (bb, boolean_true_node);
234 /* Release the SSA_NAMEs associated with the predicate of basic block BB,
235 but don't actually free it. */
237 static inline void
238 release_bb_predicate (basic_block bb)
240 gimple_seq stmts = bb_predicate_gimplified_stmts (bb);
241 if (stmts)
243 gimple_stmt_iterator i;
245 for (i = gsi_start (stmts); !gsi_end_p (i); gsi_next (&i))
246 free_stmt_operands (cfun, gsi_stmt (i));
247 set_bb_predicate_gimplified_stmts (bb, NULL);
251 /* Free the predicate of basic block BB. */
253 static inline void
254 free_bb_predicate (basic_block bb)
256 if (!bb_has_predicate (bb))
257 return;
259 release_bb_predicate (bb);
260 free (bb->aux);
261 bb->aux = NULL;
264 /* Reinitialize predicate of BB with the true predicate. */
266 static inline void
267 reset_bb_predicate (basic_block bb)
269 if (!bb_has_predicate (bb))
270 init_bb_predicate (bb);
271 else
273 release_bb_predicate (bb);
274 set_bb_predicate (bb, boolean_true_node);
278 /* Returns a new SSA_NAME of type TYPE that is assigned the value of
279 the expression EXPR. Inserts the statement created for this
280 computation before GSI and leaves the iterator GSI at the same
281 statement. */
283 static tree
284 ifc_temp_var (tree type, tree expr, gimple_stmt_iterator *gsi)
286 tree new_name = make_temp_ssa_name (type, NULL, "_ifc_");
287 gimple stmt = gimple_build_assign (new_name, expr);
288 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
289 return new_name;
292 /* Return true when COND is a true predicate. */
294 static inline bool
295 is_true_predicate (tree cond)
297 return (cond == NULL_TREE
298 || cond == boolean_true_node
299 || integer_onep (cond));
302 /* Returns true when BB has a predicate that is not trivial: true or
303 NULL_TREE. */
305 static inline bool
306 is_predicated (basic_block bb)
308 return !is_true_predicate (bb_predicate (bb));
311 /* Parses the predicate COND and returns its comparison code and
312 operands OP0 and OP1. */
314 static enum tree_code
315 parse_predicate (tree cond, tree *op0, tree *op1)
317 gimple s;
319 if (TREE_CODE (cond) == SSA_NAME
320 && is_gimple_assign (s = SSA_NAME_DEF_STMT (cond)))
322 if (TREE_CODE_CLASS (gimple_assign_rhs_code (s)) == tcc_comparison)
324 *op0 = gimple_assign_rhs1 (s);
325 *op1 = gimple_assign_rhs2 (s);
326 return gimple_assign_rhs_code (s);
329 else if (gimple_assign_rhs_code (s) == TRUTH_NOT_EXPR)
331 tree op = gimple_assign_rhs1 (s);
332 tree type = TREE_TYPE (op);
333 enum tree_code code = parse_predicate (op, op0, op1);
335 return code == ERROR_MARK ? ERROR_MARK
336 : invert_tree_comparison (code, HONOR_NANS (type));
339 return ERROR_MARK;
342 if (COMPARISON_CLASS_P (cond))
344 *op0 = TREE_OPERAND (cond, 0);
345 *op1 = TREE_OPERAND (cond, 1);
346 return TREE_CODE (cond);
349 return ERROR_MARK;
352 /* Returns the fold of predicate C1 OR C2 at location LOC. */
354 static tree
355 fold_or_predicates (location_t loc, tree c1, tree c2)
357 tree op1a, op1b, op2a, op2b;
358 enum tree_code code1 = parse_predicate (c1, &op1a, &op1b);
359 enum tree_code code2 = parse_predicate (c2, &op2a, &op2b);
361 if (code1 != ERROR_MARK && code2 != ERROR_MARK)
363 tree t = maybe_fold_or_comparisons (code1, op1a, op1b,
364 code2, op2a, op2b);
365 if (t)
366 return t;
369 return fold_build2_loc (loc, TRUTH_OR_EXPR, boolean_type_node, c1, c2);
372 /* Returns true if N is either a constant or a SSA_NAME. */
374 static bool
375 constant_or_ssa_name (tree n)
377 switch (TREE_CODE (n))
379 case SSA_NAME:
380 case INTEGER_CST:
381 case REAL_CST:
382 case COMPLEX_CST:
383 case VECTOR_CST:
384 return true;
385 default:
386 return false;
390 /* Returns either a COND_EXPR or the folded expression if the folded
391 expression is a MIN_EXPR, a MAX_EXPR, an ABS_EXPR,
392 a constant or a SSA_NAME. */
394 static tree
395 fold_build_cond_expr (tree type, tree cond, tree rhs, tree lhs)
397 tree rhs1, lhs1, cond_expr;
399 /* If COND is comparison r != 0 and r has boolean type, convert COND
400 to SSA_NAME to accept by vect bool pattern. */
401 if (TREE_CODE (cond) == NE_EXPR)
403 tree op0 = TREE_OPERAND (cond, 0);
404 tree op1 = TREE_OPERAND (cond, 1);
405 if (TREE_CODE (op0) == SSA_NAME
406 && TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE
407 && (integer_zerop (op1)))
408 cond = op0;
410 cond_expr = fold_ternary (COND_EXPR, type, cond,
411 rhs, lhs);
413 if (cond_expr == NULL_TREE)
414 return build3 (COND_EXPR, type, cond, rhs, lhs);
416 STRIP_USELESS_TYPE_CONVERSION (cond_expr);
418 if (constant_or_ssa_name (cond_expr))
419 return cond_expr;
421 if (TREE_CODE (cond_expr) == ABS_EXPR)
423 rhs1 = TREE_OPERAND (cond_expr, 1);
424 STRIP_USELESS_TYPE_CONVERSION (rhs1);
425 if (constant_or_ssa_name (rhs1))
426 return build1 (ABS_EXPR, type, rhs1);
429 if (TREE_CODE (cond_expr) == MIN_EXPR
430 || TREE_CODE (cond_expr) == MAX_EXPR)
432 lhs1 = TREE_OPERAND (cond_expr, 0);
433 STRIP_USELESS_TYPE_CONVERSION (lhs1);
434 rhs1 = TREE_OPERAND (cond_expr, 1);
435 STRIP_USELESS_TYPE_CONVERSION (rhs1);
436 if (constant_or_ssa_name (rhs1)
437 && constant_or_ssa_name (lhs1))
438 return build2 (TREE_CODE (cond_expr), type, lhs1, rhs1);
440 return build3 (COND_EXPR, type, cond, rhs, lhs);
443 /* Add condition NC to the predicate list of basic block BB. LOOP is
444 the loop to be if-converted. Use predicate of cd-equivalent block
445 for join bb if it exists: we call basic blocks bb1 and bb2
446 cd-equivalent if they are executed under the same condition. */
448 static inline void
449 add_to_predicate_list (struct loop *loop, basic_block bb, tree nc)
451 tree bc, *tp;
452 basic_block dom_bb;
454 if (is_true_predicate (nc))
455 return;
457 /* If dominance tells us this basic block is always executed,
458 don't record any predicates for it. */
459 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
460 return;
462 dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb);
463 /* We use notion of cd equivalence to get simpler predicate for
464 join block, e.g. if join block has 2 predecessors with predicates
465 p1 & p2 and p1 & !p2, we'd like to get p1 for it instead of
466 p1 & p2 | p1 & !p2. */
467 if (dom_bb != loop->header
468 && get_immediate_dominator (CDI_POST_DOMINATORS, dom_bb) == bb)
470 gcc_assert (flow_bb_inside_loop_p (loop, dom_bb));
471 bc = bb_predicate (dom_bb);
472 if (!is_true_predicate (bc))
473 set_bb_predicate (bb, bc);
474 else
475 gcc_assert (is_true_predicate (bb_predicate (bb)));
476 if (dump_file && (dump_flags & TDF_DETAILS))
477 fprintf (dump_file, "Use predicate of bb#%d for bb#%d\n",
478 dom_bb->index, bb->index);
479 return;
482 if (!is_predicated (bb))
483 bc = nc;
484 else
486 bc = bb_predicate (bb);
487 bc = fold_or_predicates (EXPR_LOCATION (bc), nc, bc);
488 if (is_true_predicate (bc))
490 reset_bb_predicate (bb);
491 return;
495 /* Allow a TRUTH_NOT_EXPR around the main predicate. */
496 if (TREE_CODE (bc) == TRUTH_NOT_EXPR)
497 tp = &TREE_OPERAND (bc, 0);
498 else
499 tp = &bc;
500 if (!is_gimple_condexpr (*tp))
502 gimple_seq stmts;
503 *tp = force_gimple_operand_1 (*tp, &stmts, is_gimple_condexpr, NULL_TREE);
504 add_bb_predicate_gimplified_stmts (bb, stmts);
506 set_bb_predicate (bb, bc);
509 /* Add the condition COND to the previous condition PREV_COND, and add
510 this to the predicate list of the destination of edge E. LOOP is
511 the loop to be if-converted. */
513 static void
514 add_to_dst_predicate_list (struct loop *loop, edge e,
515 tree prev_cond, tree cond)
517 if (!flow_bb_inside_loop_p (loop, e->dest))
518 return;
520 if (!is_true_predicate (prev_cond))
521 cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
522 prev_cond, cond);
524 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, e->dest))
525 add_to_predicate_list (loop, e->dest, cond);
528 /* Return true if one of the successor edges of BB exits LOOP. */
530 static bool
531 bb_with_exit_edge_p (struct loop *loop, basic_block bb)
533 edge e;
534 edge_iterator ei;
536 FOR_EACH_EDGE (e, ei, bb->succs)
537 if (loop_exit_edge_p (loop, e))
538 return true;
540 return false;
543 /* Return true when PHI is if-convertible. PHI is part of loop LOOP
544 and it belongs to basic block BB.
546 PHI is not if-convertible if:
547 - it has more than 2 arguments.
549 When the flag_tree_loop_if_convert_stores is not set, PHI is not
550 if-convertible if:
551 - a virtual PHI is immediately used in another PHI node,
552 - there is a virtual PHI in a BB other than the loop->header.
553 When the aggressive_if_conv is set, PHI can have more than
554 two arguments. */
556 static bool
557 if_convertible_phi_p (struct loop *loop, basic_block bb, gphi *phi,
558 bool any_mask_load_store)
560 if (dump_file && (dump_flags & TDF_DETAILS))
562 fprintf (dump_file, "-------------------------\n");
563 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
566 if (bb != loop->header)
568 if (gimple_phi_num_args (phi) != 2
569 && !aggressive_if_conv)
571 if (dump_file && (dump_flags & TDF_DETAILS))
572 fprintf (dump_file, "More than two phi node args.\n");
573 return false;
577 if (flag_tree_loop_if_convert_stores || any_mask_load_store)
578 return true;
580 /* When the flag_tree_loop_if_convert_stores is not set, check
581 that there are no memory writes in the branches of the loop to be
582 if-converted. */
583 if (virtual_operand_p (gimple_phi_result (phi)))
585 imm_use_iterator imm_iter;
586 use_operand_p use_p;
588 if (bb != loop->header)
590 if (dump_file && (dump_flags & TDF_DETAILS))
591 fprintf (dump_file, "Virtual phi not on loop->header.\n");
592 return false;
595 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_phi_result (phi))
597 if (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI
598 && USE_STMT (use_p) != (gimple) phi)
600 if (dump_file && (dump_flags & TDF_DETAILS))
601 fprintf (dump_file, "Difficult to handle this virtual phi.\n");
602 return false;
607 return true;
610 /* Records the status of a data reference. This struct is attached to
611 each DR->aux field. */
613 struct ifc_dr {
614 /* -1 when not initialized, 0 when false, 1 when true. */
615 int written_at_least_once;
617 /* -1 when not initialized, 0 when false, 1 when true. */
618 int rw_unconditionally;
621 #define IFC_DR(DR) ((struct ifc_dr *) (DR)->aux)
622 #define DR_WRITTEN_AT_LEAST_ONCE(DR) (IFC_DR (DR)->written_at_least_once)
623 #define DR_RW_UNCONDITIONALLY(DR) (IFC_DR (DR)->rw_unconditionally)
625 /* Returns true when the memory references of STMT are read or written
626 unconditionally. In other words, this function returns true when
627 for every data reference A in STMT there exist other accesses to
628 a data reference with the same base with predicates that add up (OR-up) to
629 the true predicate: this ensures that the data reference A is touched
630 (read or written) on every iteration of the if-converted loop. */
632 static bool
633 memrefs_read_or_written_unconditionally (gimple stmt,
634 vec<data_reference_p> drs)
636 int i, j;
637 data_reference_p a, b;
638 tree ca = bb_predicate (gimple_bb (stmt));
640 for (i = 0; drs.iterate (i, &a); i++)
641 if (DR_STMT (a) == stmt)
643 bool found = false;
644 int x = DR_RW_UNCONDITIONALLY (a);
646 if (x == 0)
647 return false;
649 if (x == 1)
650 continue;
652 for (j = 0; drs.iterate (j, &b); j++)
654 tree ref_base_a = DR_REF (a);
655 tree ref_base_b = DR_REF (b);
657 if (DR_STMT (b) == stmt)
658 continue;
660 while (TREE_CODE (ref_base_a) == COMPONENT_REF
661 || TREE_CODE (ref_base_a) == IMAGPART_EXPR
662 || TREE_CODE (ref_base_a) == REALPART_EXPR)
663 ref_base_a = TREE_OPERAND (ref_base_a, 0);
665 while (TREE_CODE (ref_base_b) == COMPONENT_REF
666 || TREE_CODE (ref_base_b) == IMAGPART_EXPR
667 || TREE_CODE (ref_base_b) == REALPART_EXPR)
668 ref_base_b = TREE_OPERAND (ref_base_b, 0);
670 if (!operand_equal_p (ref_base_a, ref_base_b, 0))
672 tree cb = bb_predicate (gimple_bb (DR_STMT (b)));
674 if (DR_RW_UNCONDITIONALLY (b) == 1
675 || is_true_predicate (cb)
676 || is_true_predicate (ca
677 = fold_or_predicates (EXPR_LOCATION (cb), ca, cb)))
679 DR_RW_UNCONDITIONALLY (a) = 1;
680 DR_RW_UNCONDITIONALLY (b) = 1;
681 found = true;
682 break;
687 if (!found)
689 DR_RW_UNCONDITIONALLY (a) = 0;
690 return false;
694 return true;
697 /* Returns true when the memory references of STMT are unconditionally
698 written. In other words, this function returns true when for every
699 data reference A written in STMT, there exist other writes to the
700 same data reference with predicates that add up (OR-up) to the true
701 predicate: this ensures that the data reference A is written on
702 every iteration of the if-converted loop. */
704 static bool
705 write_memrefs_written_at_least_once (gimple stmt,
706 vec<data_reference_p> drs)
708 int i, j;
709 data_reference_p a, b;
710 tree ca = bb_predicate (gimple_bb (stmt));
712 for (i = 0; drs.iterate (i, &a); i++)
713 if (DR_STMT (a) == stmt
714 && DR_IS_WRITE (a))
716 bool found = false;
717 int x = DR_WRITTEN_AT_LEAST_ONCE (a);
719 if (x == 0)
720 return false;
722 if (x == 1)
723 continue;
725 for (j = 0; drs.iterate (j, &b); j++)
726 if (DR_STMT (b) != stmt
727 && DR_IS_WRITE (b)
728 && same_data_refs_base_objects (a, b))
730 tree cb = bb_predicate (gimple_bb (DR_STMT (b)));
732 if (DR_WRITTEN_AT_LEAST_ONCE (b) == 1
733 || is_true_predicate (cb)
734 || is_true_predicate (ca = fold_or_predicates (EXPR_LOCATION (cb),
735 ca, cb)))
737 DR_WRITTEN_AT_LEAST_ONCE (a) = 1;
738 DR_WRITTEN_AT_LEAST_ONCE (b) = 1;
739 found = true;
740 break;
744 if (!found)
746 DR_WRITTEN_AT_LEAST_ONCE (a) = 0;
747 return false;
751 return true;
754 /* Return true when the memory references of STMT won't trap in the
755 if-converted code. There are two things that we have to check for:
757 - writes to memory occur to writable memory: if-conversion of
758 memory writes transforms the conditional memory writes into
759 unconditional writes, i.e. "if (cond) A[i] = foo" is transformed
760 into "A[i] = cond ? foo : A[i]", and as the write to memory may not
761 be executed at all in the original code, it may be a readonly
762 memory. To check that A is not const-qualified, we check that
763 there exists at least an unconditional write to A in the current
764 function.
766 - reads or writes to memory are valid memory accesses for every
767 iteration. To check that the memory accesses are correctly formed
768 and that we are allowed to read and write in these locations, we
769 check that the memory accesses to be if-converted occur at every
770 iteration unconditionally. */
772 static bool
773 ifcvt_memrefs_wont_trap (gimple stmt, vec<data_reference_p> refs)
775 return write_memrefs_written_at_least_once (stmt, refs)
776 && memrefs_read_or_written_unconditionally (stmt, refs);
779 /* Wrapper around gimple_could_trap_p refined for the needs of the
780 if-conversion. Try to prove that the memory accesses of STMT could
781 not trap in the innermost loop containing STMT. */
783 static bool
784 ifcvt_could_trap_p (gimple stmt, vec<data_reference_p> refs)
786 if (gimple_vuse (stmt)
787 && !gimple_could_trap_p_1 (stmt, false, false)
788 && ifcvt_memrefs_wont_trap (stmt, refs))
789 return false;
791 return gimple_could_trap_p (stmt);
794 /* Return true if STMT could be converted into a masked load or store
795 (conditional load or store based on a mask computed from bb predicate). */
797 static bool
798 ifcvt_can_use_mask_load_store (gimple stmt)
800 tree lhs, ref;
801 machine_mode mode;
802 basic_block bb = gimple_bb (stmt);
803 bool is_load;
805 if (!(flag_tree_loop_vectorize || bb->loop_father->force_vectorize)
806 || bb->loop_father->dont_vectorize
807 || !gimple_assign_single_p (stmt)
808 || gimple_has_volatile_ops (stmt))
809 return false;
811 /* Check whether this is a load or store. */
812 lhs = gimple_assign_lhs (stmt);
813 if (gimple_store_p (stmt))
815 if (!is_gimple_val (gimple_assign_rhs1 (stmt)))
816 return false;
817 is_load = false;
818 ref = lhs;
820 else if (gimple_assign_load_p (stmt))
822 is_load = true;
823 ref = gimple_assign_rhs1 (stmt);
825 else
826 return false;
828 if (may_be_nonaddressable_p (ref))
829 return false;
831 /* Mask should be integer mode of the same size as the load/store
832 mode. */
833 mode = TYPE_MODE (TREE_TYPE (lhs));
834 if (int_mode_for_mode (mode) == BLKmode
835 || VECTOR_MODE_P (mode))
836 return false;
838 if (can_vec_mask_load_store_p (mode, is_load))
839 return true;
841 return false;
844 /* Return true when STMT is if-convertible.
846 GIMPLE_ASSIGN statement is not if-convertible if,
847 - it is not movable,
848 - it could trap,
849 - LHS is not var decl. */
851 static bool
852 if_convertible_gimple_assign_stmt_p (gimple stmt,
853 vec<data_reference_p> refs,
854 bool *any_mask_load_store)
856 tree lhs = gimple_assign_lhs (stmt);
857 basic_block bb;
859 if (dump_file && (dump_flags & TDF_DETAILS))
861 fprintf (dump_file, "-------------------------\n");
862 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
865 if (!is_gimple_reg_type (TREE_TYPE (lhs)))
866 return false;
868 /* Some of these constrains might be too conservative. */
869 if (stmt_ends_bb_p (stmt)
870 || gimple_has_volatile_ops (stmt)
871 || (TREE_CODE (lhs) == SSA_NAME
872 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
873 || gimple_has_side_effects (stmt))
875 if (dump_file && (dump_flags & TDF_DETAILS))
876 fprintf (dump_file, "stmt not suitable for ifcvt\n");
877 return false;
880 /* tree-into-ssa.c uses GF_PLF_1, so avoid it, because
881 in between if_convertible_loop_p and combine_blocks
882 we can perform loop versioning. */
883 gimple_set_plf (stmt, GF_PLF_2, false);
885 if (flag_tree_loop_if_convert_stores)
887 if (ifcvt_could_trap_p (stmt, refs))
889 if (ifcvt_can_use_mask_load_store (stmt))
891 gimple_set_plf (stmt, GF_PLF_2, true);
892 *any_mask_load_store = true;
893 return true;
895 if (dump_file && (dump_flags & TDF_DETAILS))
896 fprintf (dump_file, "tree could trap...\n");
897 return false;
899 return true;
902 if (gimple_assign_rhs_could_trap_p (stmt))
904 if (ifcvt_can_use_mask_load_store (stmt))
906 gimple_set_plf (stmt, GF_PLF_2, true);
907 *any_mask_load_store = true;
908 return true;
910 if (dump_file && (dump_flags & TDF_DETAILS))
911 fprintf (dump_file, "tree could trap...\n");
912 return false;
915 bb = gimple_bb (stmt);
917 if (TREE_CODE (lhs) != SSA_NAME
918 && bb != bb->loop_father->header
919 && !bb_with_exit_edge_p (bb->loop_father, bb))
921 if (ifcvt_can_use_mask_load_store (stmt))
923 gimple_set_plf (stmt, GF_PLF_2, true);
924 *any_mask_load_store = true;
925 return true;
927 if (dump_file && (dump_flags & TDF_DETAILS))
929 fprintf (dump_file, "LHS is not var\n");
930 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
932 return false;
935 return true;
938 /* Return true when STMT is if-convertible.
940 A statement is if-convertible if:
941 - it is an if-convertible GIMPLE_ASSIGN,
942 - it is a GIMPLE_LABEL or a GIMPLE_COND,
943 - it is builtins call. */
945 static bool
946 if_convertible_stmt_p (gimple stmt, vec<data_reference_p> refs,
947 bool *any_mask_load_store)
949 switch (gimple_code (stmt))
951 case GIMPLE_LABEL:
952 case GIMPLE_DEBUG:
953 case GIMPLE_COND:
954 return true;
956 case GIMPLE_ASSIGN:
957 return if_convertible_gimple_assign_stmt_p (stmt, refs,
958 any_mask_load_store);
960 case GIMPLE_CALL:
962 tree fndecl = gimple_call_fndecl (stmt);
963 if (fndecl)
965 int flags = gimple_call_flags (stmt);
966 if ((flags & ECF_CONST)
967 && !(flags & ECF_LOOPING_CONST_OR_PURE)
968 /* We can only vectorize some builtins at the moment,
969 so restrict if-conversion to those. */
970 && DECL_BUILT_IN (fndecl))
971 return true;
973 return false;
976 default:
977 /* Don't know what to do with 'em so don't do anything. */
978 if (dump_file && (dump_flags & TDF_DETAILS))
980 fprintf (dump_file, "don't know what to do\n");
981 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
983 return false;
984 break;
987 return true;
990 /* Assumes that BB has more than 1 predecessors.
991 Returns false if at least one successor is not on critical edge
992 and true otherwise. */
994 static inline bool
995 all_preds_critical_p (basic_block bb)
997 edge e;
998 edge_iterator ei;
1000 FOR_EACH_EDGE (e, ei, bb->preds)
1001 if (EDGE_COUNT (e->src->succs) == 1)
1002 return false;
1003 return true;
1006 /* Returns true if at least one successor in on critical edge. */
1007 static inline bool
1008 has_pred_critical_p (basic_block bb)
1010 edge e;
1011 edge_iterator ei;
1013 FOR_EACH_EDGE (e, ei, bb->preds)
1014 if (EDGE_COUNT (e->src->succs) > 1)
1015 return true;
1016 return false;
1019 /* Return true when BB is if-convertible. This routine does not check
1020 basic block's statements and phis.
1022 A basic block is not if-convertible if:
1023 - it is non-empty and it is after the exit block (in BFS order),
1024 - it is after the exit block but before the latch,
1025 - its edges are not normal.
1027 Last restriction is valid if aggressive_if_conv is false.
1029 EXIT_BB is the basic block containing the exit of the LOOP. BB is
1030 inside LOOP. */
1032 static bool
1033 if_convertible_bb_p (struct loop *loop, basic_block bb, basic_block exit_bb)
1035 edge e;
1036 edge_iterator ei;
1038 if (dump_file && (dump_flags & TDF_DETAILS))
1039 fprintf (dump_file, "----------[%d]-------------\n", bb->index);
1041 if (EDGE_COUNT (bb->succs) > 2)
1042 return false;
1044 if (EDGE_COUNT (bb->preds) > 2
1045 && !aggressive_if_conv)
1046 return false;
1048 if (exit_bb)
1050 if (bb != loop->latch)
1052 if (dump_file && (dump_flags & TDF_DETAILS))
1053 fprintf (dump_file, "basic block after exit bb but before latch\n");
1054 return false;
1056 else if (!empty_block_p (bb))
1058 if (dump_file && (dump_flags & TDF_DETAILS))
1059 fprintf (dump_file, "non empty basic block after exit bb\n");
1060 return false;
1062 else if (bb == loop->latch
1063 && bb != exit_bb
1064 && !dominated_by_p (CDI_DOMINATORS, bb, exit_bb))
1066 if (dump_file && (dump_flags & TDF_DETAILS))
1067 fprintf (dump_file, "latch is not dominated by exit_block\n");
1068 return false;
1072 /* Be less adventurous and handle only normal edges. */
1073 FOR_EACH_EDGE (e, ei, bb->succs)
1074 if (e->flags & (EDGE_EH | EDGE_ABNORMAL | EDGE_IRREDUCIBLE_LOOP))
1076 if (dump_file && (dump_flags & TDF_DETAILS))
1077 fprintf (dump_file, "Difficult to handle edges\n");
1078 return false;
1081 /* At least one incoming edge has to be non-critical as otherwise edge
1082 predicates are not equal to basic-block predicates of the edge
1083 source. This check is skipped if aggressive_if_conv is true. */
1084 if (!aggressive_if_conv
1085 && EDGE_COUNT (bb->preds) > 1
1086 && bb != loop->header
1087 && all_preds_critical_p (bb))
1089 if (dump_file && (dump_flags & TDF_DETAILS))
1090 fprintf (dump_file, "only critical predecessors\n");
1091 return false;
1094 return true;
1097 /* Return true when all predecessor blocks of BB are visited. The
1098 VISITED bitmap keeps track of the visited blocks. */
1100 static bool
1101 pred_blocks_visited_p (basic_block bb, bitmap *visited)
1103 edge e;
1104 edge_iterator ei;
1105 FOR_EACH_EDGE (e, ei, bb->preds)
1106 if (!bitmap_bit_p (*visited, e->src->index))
1107 return false;
1109 return true;
1112 /* Get body of a LOOP in suitable order for if-conversion. It is
1113 caller's responsibility to deallocate basic block list.
1114 If-conversion suitable order is, breadth first sort (BFS) order
1115 with an additional constraint: select a block only if all its
1116 predecessors are already selected. */
1118 static basic_block *
1119 get_loop_body_in_if_conv_order (const struct loop *loop)
1121 basic_block *blocks, *blocks_in_bfs_order;
1122 basic_block bb;
1123 bitmap visited;
1124 unsigned int index = 0;
1125 unsigned int visited_count = 0;
1127 gcc_assert (loop->num_nodes);
1128 gcc_assert (loop->latch != EXIT_BLOCK_PTR_FOR_FN (cfun));
1130 blocks = XCNEWVEC (basic_block, loop->num_nodes);
1131 visited = BITMAP_ALLOC (NULL);
1133 blocks_in_bfs_order = get_loop_body_in_bfs_order (loop);
1135 index = 0;
1136 while (index < loop->num_nodes)
1138 bb = blocks_in_bfs_order [index];
1140 if (bb->flags & BB_IRREDUCIBLE_LOOP)
1142 free (blocks_in_bfs_order);
1143 BITMAP_FREE (visited);
1144 free (blocks);
1145 return NULL;
1148 if (!bitmap_bit_p (visited, bb->index))
1150 if (pred_blocks_visited_p (bb, &visited)
1151 || bb == loop->header)
1153 /* This block is now visited. */
1154 bitmap_set_bit (visited, bb->index);
1155 blocks[visited_count++] = bb;
1159 index++;
1161 if (index == loop->num_nodes
1162 && visited_count != loop->num_nodes)
1163 /* Not done yet. */
1164 index = 0;
1166 free (blocks_in_bfs_order);
1167 BITMAP_FREE (visited);
1168 return blocks;
1171 /* Returns true when the analysis of the predicates for all the basic
1172 blocks in LOOP succeeded.
1174 predicate_bbs first allocates the predicates of the basic blocks.
1175 These fields are then initialized with the tree expressions
1176 representing the predicates under which a basic block is executed
1177 in the LOOP. As the loop->header is executed at each iteration, it
1178 has the "true" predicate. Other statements executed under a
1179 condition are predicated with that condition, for example
1181 | if (x)
1182 | S1;
1183 | else
1184 | S2;
1186 S1 will be predicated with "x", and
1187 S2 will be predicated with "!x". */
1189 static void
1190 predicate_bbs (loop_p loop)
1192 unsigned int i;
1194 for (i = 0; i < loop->num_nodes; i++)
1195 init_bb_predicate (ifc_bbs[i]);
1197 for (i = 0; i < loop->num_nodes; i++)
1199 basic_block bb = ifc_bbs[i];
1200 tree cond;
1201 gimple stmt;
1203 /* The loop latch and loop exit block are always executed and
1204 have no extra conditions to be processed: skip them. */
1205 if (bb == loop->latch
1206 || bb_with_exit_edge_p (loop, bb))
1208 reset_bb_predicate (bb);
1209 continue;
1212 cond = bb_predicate (bb);
1213 stmt = last_stmt (bb);
1214 if (stmt && gimple_code (stmt) == GIMPLE_COND)
1216 tree c2;
1217 edge true_edge, false_edge;
1218 location_t loc = gimple_location (stmt);
1219 tree c = build2_loc (loc, gimple_cond_code (stmt),
1220 boolean_type_node,
1221 gimple_cond_lhs (stmt),
1222 gimple_cond_rhs (stmt));
1224 /* Add new condition into destination's predicate list. */
1225 extract_true_false_edges_from_block (gimple_bb (stmt),
1226 &true_edge, &false_edge);
1228 /* If C is true, then TRUE_EDGE is taken. */
1229 add_to_dst_predicate_list (loop, true_edge, unshare_expr (cond),
1230 unshare_expr (c));
1232 /* If C is false, then FALSE_EDGE is taken. */
1233 c2 = build1_loc (loc, TRUTH_NOT_EXPR, boolean_type_node,
1234 unshare_expr (c));
1235 add_to_dst_predicate_list (loop, false_edge,
1236 unshare_expr (cond), c2);
1238 cond = NULL_TREE;
1241 /* If current bb has only one successor, then consider it as an
1242 unconditional goto. */
1243 if (single_succ_p (bb))
1245 basic_block bb_n = single_succ (bb);
1247 /* The successor bb inherits the predicate of its
1248 predecessor. If there is no predicate in the predecessor
1249 bb, then consider the successor bb as always executed. */
1250 if (cond == NULL_TREE)
1251 cond = boolean_true_node;
1253 add_to_predicate_list (loop, bb_n, cond);
1257 /* The loop header is always executed. */
1258 reset_bb_predicate (loop->header);
1259 gcc_assert (bb_predicate_gimplified_stmts (loop->header) == NULL
1260 && bb_predicate_gimplified_stmts (loop->latch) == NULL);
1263 /* Return true when LOOP is if-convertible. This is a helper function
1264 for if_convertible_loop_p. REFS and DDRS are initialized and freed
1265 in if_convertible_loop_p. */
1267 static bool
1268 if_convertible_loop_p_1 (struct loop *loop,
1269 vec<loop_p> *loop_nest,
1270 vec<data_reference_p> *refs,
1271 vec<ddr_p> *ddrs, bool *any_mask_load_store)
1273 bool res;
1274 unsigned int i;
1275 basic_block exit_bb = NULL;
1277 /* Don't if-convert the loop when the data dependences cannot be
1278 computed: the loop won't be vectorized in that case. */
1279 res = compute_data_dependences_for_loop (loop, true, loop_nest, refs, ddrs);
1280 if (!res)
1281 return false;
1283 calculate_dominance_info (CDI_DOMINATORS);
1284 calculate_dominance_info (CDI_POST_DOMINATORS);
1286 /* Allow statements that can be handled during if-conversion. */
1287 ifc_bbs = get_loop_body_in_if_conv_order (loop);
1288 if (!ifc_bbs)
1290 if (dump_file && (dump_flags & TDF_DETAILS))
1291 fprintf (dump_file, "Irreducible loop\n");
1292 return false;
1295 for (i = 0; i < loop->num_nodes; i++)
1297 basic_block bb = ifc_bbs[i];
1299 if (!if_convertible_bb_p (loop, bb, exit_bb))
1300 return false;
1302 if (bb_with_exit_edge_p (loop, bb))
1303 exit_bb = bb;
1306 for (i = 0; i < loop->num_nodes; i++)
1308 basic_block bb = ifc_bbs[i];
1309 gimple_stmt_iterator gsi;
1311 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1312 switch (gimple_code (gsi_stmt (gsi)))
1314 case GIMPLE_LABEL:
1315 case GIMPLE_ASSIGN:
1316 case GIMPLE_CALL:
1317 case GIMPLE_DEBUG:
1318 case GIMPLE_COND:
1319 break;
1320 default:
1321 return false;
1325 if (flag_tree_loop_if_convert_stores)
1327 data_reference_p dr;
1329 for (i = 0; refs->iterate (i, &dr); i++)
1331 dr->aux = XNEW (struct ifc_dr);
1332 DR_WRITTEN_AT_LEAST_ONCE (dr) = -1;
1333 DR_RW_UNCONDITIONALLY (dr) = -1;
1335 predicate_bbs (loop);
1338 for (i = 0; i < loop->num_nodes; i++)
1340 basic_block bb = ifc_bbs[i];
1341 gimple_stmt_iterator itr;
1343 /* Check the if-convertibility of statements in predicated BBs. */
1344 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1345 for (itr = gsi_start_bb (bb); !gsi_end_p (itr); gsi_next (&itr))
1346 if (!if_convertible_stmt_p (gsi_stmt (itr), *refs,
1347 any_mask_load_store))
1348 return false;
1351 if (flag_tree_loop_if_convert_stores)
1352 for (i = 0; i < loop->num_nodes; i++)
1353 free_bb_predicate (ifc_bbs[i]);
1355 /* Checking PHIs needs to be done after stmts, as the fact whether there
1356 are any masked loads or stores affects the tests. */
1357 for (i = 0; i < loop->num_nodes; i++)
1359 basic_block bb = ifc_bbs[i];
1360 gphi_iterator itr;
1362 for (itr = gsi_start_phis (bb); !gsi_end_p (itr); gsi_next (&itr))
1363 if (!if_convertible_phi_p (loop, bb, itr.phi (),
1364 *any_mask_load_store))
1365 return false;
1368 if (dump_file)
1369 fprintf (dump_file, "Applying if-conversion\n");
1371 return true;
1374 /* Return true when LOOP is if-convertible.
1375 LOOP is if-convertible if:
1376 - it is innermost,
1377 - it has two or more basic blocks,
1378 - it has only one exit,
1379 - loop header is not the exit edge,
1380 - if its basic blocks and phi nodes are if convertible. */
1382 static bool
1383 if_convertible_loop_p (struct loop *loop, bool *any_mask_load_store)
1385 edge e;
1386 edge_iterator ei;
1387 bool res = false;
1388 vec<data_reference_p> refs;
1389 vec<ddr_p> ddrs;
1391 /* Handle only innermost loop. */
1392 if (!loop || loop->inner)
1394 if (dump_file && (dump_flags & TDF_DETAILS))
1395 fprintf (dump_file, "not innermost loop\n");
1396 return false;
1399 /* If only one block, no need for if-conversion. */
1400 if (loop->num_nodes <= 2)
1402 if (dump_file && (dump_flags & TDF_DETAILS))
1403 fprintf (dump_file, "less than 2 basic blocks\n");
1404 return false;
1407 /* More than one loop exit is too much to handle. */
1408 if (!single_exit (loop))
1410 if (dump_file && (dump_flags & TDF_DETAILS))
1411 fprintf (dump_file, "multiple exits\n");
1412 return false;
1415 /* If one of the loop header's edge is an exit edge then do not
1416 apply if-conversion. */
1417 FOR_EACH_EDGE (e, ei, loop->header->succs)
1418 if (loop_exit_edge_p (loop, e))
1419 return false;
1421 refs.create (5);
1422 ddrs.create (25);
1423 auto_vec<loop_p, 3> loop_nest;
1424 res = if_convertible_loop_p_1 (loop, &loop_nest, &refs, &ddrs,
1425 any_mask_load_store);
1427 if (flag_tree_loop_if_convert_stores)
1429 data_reference_p dr;
1430 unsigned int i;
1432 for (i = 0; refs.iterate (i, &dr); i++)
1433 free (dr->aux);
1436 free_data_refs (refs);
1437 free_dependence_relations (ddrs);
1438 return res;
1441 /* Returns true if def-stmt for phi argument ARG is simple increment/decrement
1442 which is in predicated basic block.
1443 In fact, the following PHI pattern is searching:
1444 loop-header:
1445 reduc_1 = PHI <..., reduc_2>
1447 if (...)
1448 reduc_3 = ...
1449 reduc_2 = PHI <reduc_1, reduc_3>
1451 ARG_0 and ARG_1 are correspondent PHI arguments.
1452 REDUC, OP0 and OP1 contain reduction stmt and its operands.
1453 EXTENDED is true if PHI has > 2 arguments. */
1455 static bool
1456 is_cond_scalar_reduction (gimple phi, gimple *reduc, tree arg_0, tree arg_1,
1457 tree *op0, tree *op1, bool extended)
1459 tree lhs, r_op1, r_op2;
1460 gimple stmt;
1461 gimple header_phi = NULL;
1462 enum tree_code reduction_op;
1463 basic_block bb = gimple_bb (phi);
1464 struct loop *loop = bb->loop_father;
1465 edge latch_e = loop_latch_edge (loop);
1466 imm_use_iterator imm_iter;
1467 use_operand_p use_p;
1468 edge e;
1469 edge_iterator ei;
1470 bool result = false;
1471 if (TREE_CODE (arg_0) != SSA_NAME || TREE_CODE (arg_1) != SSA_NAME)
1472 return false;
1474 if (!extended && gimple_code (SSA_NAME_DEF_STMT (arg_0)) == GIMPLE_PHI)
1476 lhs = arg_1;
1477 header_phi = SSA_NAME_DEF_STMT (arg_0);
1478 stmt = SSA_NAME_DEF_STMT (arg_1);
1480 else if (gimple_code (SSA_NAME_DEF_STMT (arg_1)) == GIMPLE_PHI)
1482 lhs = arg_0;
1483 header_phi = SSA_NAME_DEF_STMT (arg_1);
1484 stmt = SSA_NAME_DEF_STMT (arg_0);
1486 else
1487 return false;
1488 if (gimple_bb (header_phi) != loop->header)
1489 return false;
1491 if (PHI_ARG_DEF_FROM_EDGE (header_phi, latch_e) != PHI_RESULT (phi))
1492 return false;
1494 if (gimple_code (stmt) != GIMPLE_ASSIGN
1495 || gimple_has_volatile_ops (stmt))
1496 return false;
1498 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
1499 return false;
1501 if (!is_predicated (gimple_bb (stmt)))
1502 return false;
1504 /* Check that stmt-block is predecessor of phi-block. */
1505 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->succs)
1506 if (e->dest == bb)
1508 result = true;
1509 break;
1511 if (!result)
1512 return false;
1514 if (!has_single_use (lhs))
1515 return false;
1517 reduction_op = gimple_assign_rhs_code (stmt);
1518 if (reduction_op != PLUS_EXPR && reduction_op != MINUS_EXPR)
1519 return false;
1520 r_op1 = gimple_assign_rhs1 (stmt);
1521 r_op2 = gimple_assign_rhs2 (stmt);
1523 /* Make R_OP1 to hold reduction variable. */
1524 if (r_op2 == PHI_RESULT (header_phi)
1525 && reduction_op == PLUS_EXPR)
1527 tree tmp = r_op1;
1528 r_op1 = r_op2;
1529 r_op2 = tmp;
1531 else if (r_op1 != PHI_RESULT (header_phi))
1532 return false;
1534 /* Check that R_OP1 is used in reduction stmt or in PHI only. */
1535 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_op1)
1537 gimple use_stmt = USE_STMT (use_p);
1538 if (is_gimple_debug (use_stmt))
1539 continue;
1540 if (use_stmt == stmt)
1541 continue;
1542 if (gimple_code (use_stmt) != GIMPLE_PHI)
1543 return false;
1546 *op0 = r_op1; *op1 = r_op2;
1547 *reduc = stmt;
1548 return true;
1551 /* Converts conditional scalar reduction into unconditional form, e.g.
1552 bb_4
1553 if (_5 != 0) goto bb_5 else goto bb_6
1554 end_bb_4
1555 bb_5
1556 res_6 = res_13 + 1;
1557 end_bb_5
1558 bb_6
1559 # res_2 = PHI <res_13(4), res_6(5)>
1560 end_bb_6
1562 will be converted into sequence
1563 _ifc__1 = _5 != 0 ? 1 : 0;
1564 res_2 = res_13 + _ifc__1;
1565 Argument SWAP tells that arguments of conditional expression should be
1566 swapped.
1567 Returns rhs of resulting PHI assignment. */
1569 static tree
1570 convert_scalar_cond_reduction (gimple reduc, gimple_stmt_iterator *gsi,
1571 tree cond, tree op0, tree op1, bool swap)
1573 gimple_stmt_iterator stmt_it;
1574 gimple new_assign;
1575 tree rhs;
1576 tree rhs1 = gimple_assign_rhs1 (reduc);
1577 tree tmp = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, "_ifc_");
1578 tree c;
1579 tree zero = build_zero_cst (TREE_TYPE (rhs1));
1581 if (dump_file && (dump_flags & TDF_DETAILS))
1583 fprintf (dump_file, "Found cond scalar reduction.\n");
1584 print_gimple_stmt (dump_file, reduc, 0, TDF_SLIM);
1587 /* Build cond expression using COND and constant operand
1588 of reduction rhs. */
1589 c = fold_build_cond_expr (TREE_TYPE (rhs1),
1590 unshare_expr (cond),
1591 swap ? zero : op1,
1592 swap ? op1 : zero);
1594 /* Create assignment stmt and insert it at GSI. */
1595 new_assign = gimple_build_assign (tmp, c);
1596 gsi_insert_before (gsi, new_assign, GSI_SAME_STMT);
1597 /* Build rhs for unconditional increment/decrement. */
1598 rhs = fold_build2 (gimple_assign_rhs_code (reduc),
1599 TREE_TYPE (rhs1), op0, tmp);
1601 /* Delete original reduction stmt. */
1602 stmt_it = gsi_for_stmt (reduc);
1603 gsi_remove (&stmt_it, true);
1604 release_defs (reduc);
1605 return rhs;
1608 /* Helpers for PHI arguments hashtable map. */
1610 struct phi_args_hash_traits : default_hashmap_traits
1612 static inline hashval_t hash (tree);
1613 static inline bool equal_keys (tree, tree);
1616 inline hashval_t
1617 phi_args_hash_traits::hash (tree value)
1619 return iterative_hash_expr (value, 0);
1622 inline bool
1623 phi_args_hash_traits::equal_keys (tree value1, tree value2)
1625 return operand_equal_p (value1, value2, 0);
1628 /* Produce condition for all occurrences of ARG in PHI node. */
1630 static tree
1631 gen_phi_arg_condition (gphi *phi, vec<int> *occur,
1632 gimple_stmt_iterator *gsi)
1634 int len;
1635 int i;
1636 tree cond = NULL_TREE;
1637 tree c;
1638 edge e;
1640 len = occur->length ();
1641 gcc_assert (len > 0);
1642 for (i = 0; i < len; i++)
1644 e = gimple_phi_arg_edge (phi, (*occur)[i]);
1645 c = bb_predicate (e->src);
1646 if (is_true_predicate (c))
1647 continue;
1648 c = force_gimple_operand_gsi_1 (gsi, unshare_expr (c),
1649 is_gimple_condexpr, NULL_TREE,
1650 true, GSI_SAME_STMT);
1651 if (cond != NULL_TREE)
1653 /* Must build OR expression. */
1654 cond = fold_or_predicates (EXPR_LOCATION (c), c, cond);
1655 cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond),
1656 is_gimple_condexpr, NULL_TREE,
1657 true, GSI_SAME_STMT);
1659 else
1660 cond = c;
1662 gcc_assert (cond != NULL_TREE);
1663 return cond;
1666 /* Replace a scalar PHI node with a COND_EXPR using COND as condition.
1667 This routine can handle PHI nodes with more than two arguments.
1669 For example,
1670 S1: A = PHI <x1(1), x2(5)>
1671 is converted into,
1672 S2: A = cond ? x1 : x2;
1674 The generated code is inserted at GSI that points to the top of
1675 basic block's statement list.
1676 If PHI node has more than two arguments a chain of conditional
1677 expression is produced. */
1680 static void
1681 predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
1683 gimple new_stmt = NULL, reduc;
1684 tree rhs, res, arg0, arg1, op0, op1, scev;
1685 tree cond;
1686 unsigned int index0;
1687 unsigned int max, args_len;
1688 edge e;
1689 basic_block bb;
1690 unsigned int i;
1692 res = gimple_phi_result (phi);
1693 if (virtual_operand_p (res))
1694 return;
1696 if ((rhs = degenerate_phi_result (phi))
1697 || ((scev = analyze_scalar_evolution (gimple_bb (phi)->loop_father,
1698 res))
1699 && !chrec_contains_undetermined (scev)
1700 && scev != res
1701 && (rhs = gimple_phi_arg_def (phi, 0))))
1703 if (dump_file && (dump_flags & TDF_DETAILS))
1705 fprintf (dump_file, "Degenerate phi!\n");
1706 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
1708 new_stmt = gimple_build_assign (res, rhs);
1709 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1710 update_stmt (new_stmt);
1711 return;
1714 bb = gimple_bb (phi);
1715 if (EDGE_COUNT (bb->preds) == 2)
1717 /* Predicate ordinary PHI node with 2 arguments. */
1718 edge first_edge, second_edge;
1719 basic_block true_bb;
1720 first_edge = EDGE_PRED (bb, 0);
1721 second_edge = EDGE_PRED (bb, 1);
1722 cond = bb_predicate (first_edge->src);
1723 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1725 edge tmp_edge = first_edge;
1726 first_edge = second_edge;
1727 second_edge = tmp_edge;
1729 if (EDGE_COUNT (first_edge->src->succs) > 1)
1731 cond = bb_predicate (second_edge->src);
1732 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1733 cond = TREE_OPERAND (cond, 0);
1734 else
1735 first_edge = second_edge;
1737 else
1738 cond = bb_predicate (first_edge->src);
1739 /* Gimplify the condition to a valid cond-expr conditonal operand. */
1740 cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond),
1741 is_gimple_condexpr, NULL_TREE,
1742 true, GSI_SAME_STMT);
1743 true_bb = first_edge->src;
1744 if (EDGE_PRED (bb, 1)->src == true_bb)
1746 arg0 = gimple_phi_arg_def (phi, 1);
1747 arg1 = gimple_phi_arg_def (phi, 0);
1749 else
1751 arg0 = gimple_phi_arg_def (phi, 0);
1752 arg1 = gimple_phi_arg_def (phi, 1);
1754 if (is_cond_scalar_reduction (phi, &reduc, arg0, arg1,
1755 &op0, &op1, false))
1756 /* Convert reduction stmt into vectorizable form. */
1757 rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
1758 true_bb != gimple_bb (reduc));
1759 else
1760 /* Build new RHS using selected condition and arguments. */
1761 rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
1762 arg0, arg1);
1763 new_stmt = gimple_build_assign (res, rhs);
1764 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1765 update_stmt (new_stmt);
1767 if (dump_file && (dump_flags & TDF_DETAILS))
1769 fprintf (dump_file, "new phi replacement stmt\n");
1770 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
1772 return;
1775 /* Create hashmap for PHI node which contain vector of argument indexes
1776 having the same value. */
1777 bool swap = false;
1778 hash_map<tree, auto_vec<int>, phi_args_hash_traits> phi_arg_map;
1779 unsigned int num_args = gimple_phi_num_args (phi);
1780 int max_ind = -1;
1781 /* Vector of different PHI argument values. */
1782 auto_vec<tree> args (num_args);
1784 /* Compute phi_arg_map. */
1785 for (i = 0; i < num_args; i++)
1787 tree arg;
1789 arg = gimple_phi_arg_def (phi, i);
1790 if (!phi_arg_map.get (arg))
1791 args.quick_push (arg);
1792 phi_arg_map.get_or_insert (arg).safe_push (i);
1795 /* Determine element with max number of occurrences. */
1796 max_ind = -1;
1797 max = 1;
1798 args_len = args.length ();
1799 for (i = 0; i < args_len; i++)
1801 unsigned int len;
1802 if ((len = phi_arg_map.get (args[i])->length ()) > max)
1804 max_ind = (int) i;
1805 max = len;
1809 /* Put element with max number of occurences to the end of ARGS. */
1810 if (max_ind != -1 && max_ind +1 != (int) args_len)
1812 tree tmp = args[args_len - 1];
1813 args[args_len - 1] = args[max_ind];
1814 args[max_ind] = tmp;
1817 /* Handle one special case when number of arguments with different values
1818 is equal 2 and one argument has the only occurrence. Such PHI can be
1819 handled as if would have only 2 arguments. */
1820 if (args_len == 2 && phi_arg_map.get (args[0])->length () == 1)
1822 vec<int> *indexes;
1823 indexes = phi_arg_map.get (args[0]);
1824 index0 = (*indexes)[0];
1825 arg0 = args[0];
1826 arg1 = args[1];
1827 e = gimple_phi_arg_edge (phi, index0);
1828 cond = bb_predicate (e->src);
1829 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1831 swap = true;
1832 cond = TREE_OPERAND (cond, 0);
1834 /* Gimplify the condition to a valid cond-expr conditonal operand. */
1835 cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond),
1836 is_gimple_condexpr, NULL_TREE,
1837 true, GSI_SAME_STMT);
1838 if (!(is_cond_scalar_reduction (phi, &reduc, arg0 , arg1,
1839 &op0, &op1, true)))
1840 rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
1841 swap? arg1 : arg0,
1842 swap? arg0 : arg1);
1843 else
1844 /* Convert reduction stmt into vectorizable form. */
1845 rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
1846 swap);
1847 new_stmt = gimple_build_assign (res, rhs);
1848 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1849 update_stmt (new_stmt);
1851 else
1853 /* Common case. */
1854 vec<int> *indexes;
1855 tree type = TREE_TYPE (gimple_phi_result (phi));
1856 tree lhs;
1857 arg1 = args[1];
1858 for (i = 0; i < args_len; i++)
1860 arg0 = args[i];
1861 indexes = phi_arg_map.get (args[i]);
1862 if (i != args_len - 1)
1863 lhs = make_temp_ssa_name (type, NULL, "_ifc_");
1864 else
1865 lhs = res;
1866 cond = gen_phi_arg_condition (phi, indexes, gsi);
1867 rhs = fold_build_cond_expr (type, unshare_expr (cond),
1868 arg0, arg1);
1869 new_stmt = gimple_build_assign (lhs, rhs);
1870 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1871 update_stmt (new_stmt);
1872 arg1 = lhs;
1876 if (dump_file && (dump_flags & TDF_DETAILS))
1878 fprintf (dump_file, "new extended phi replacement stmt\n");
1879 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
1883 /* Replaces in LOOP all the scalar phi nodes other than those in the
1884 LOOP->header block with conditional modify expressions. */
1886 static void
1887 predicate_all_scalar_phis (struct loop *loop)
1889 basic_block bb;
1890 unsigned int orig_loop_num_nodes = loop->num_nodes;
1891 unsigned int i;
1893 for (i = 1; i < orig_loop_num_nodes; i++)
1895 gphi *phi;
1896 gimple_stmt_iterator gsi;
1897 gphi_iterator phi_gsi;
1898 bb = ifc_bbs[i];
1900 if (bb == loop->header)
1901 continue;
1903 if (EDGE_COUNT (bb->preds) == 1)
1904 continue;
1906 phi_gsi = gsi_start_phis (bb);
1907 if (gsi_end_p (phi_gsi))
1908 continue;
1910 gsi = gsi_after_labels (bb);
1911 while (!gsi_end_p (phi_gsi))
1913 phi = phi_gsi.phi ();
1914 predicate_scalar_phi (phi, &gsi);
1915 release_phi_node (phi);
1916 gsi_next (&phi_gsi);
1919 set_phi_nodes (bb, NULL);
1923 /* Insert in each basic block of LOOP the statements produced by the
1924 gimplification of the predicates. */
1926 static void
1927 insert_gimplified_predicates (loop_p loop, bool any_mask_load_store)
1929 unsigned int i;
1931 for (i = 0; i < loop->num_nodes; i++)
1933 basic_block bb = ifc_bbs[i];
1934 gimple_seq stmts;
1935 if (!is_predicated (bb))
1936 gcc_assert (bb_predicate_gimplified_stmts (bb) == NULL);
1937 if (!is_predicated (bb))
1939 /* Do not insert statements for a basic block that is not
1940 predicated. Also make sure that the predicate of the
1941 basic block is set to true. */
1942 reset_bb_predicate (bb);
1943 continue;
1946 stmts = bb_predicate_gimplified_stmts (bb);
1947 if (stmts)
1949 if (flag_tree_loop_if_convert_stores
1950 || any_mask_load_store)
1952 /* Insert the predicate of the BB just after the label,
1953 as the if-conversion of memory writes will use this
1954 predicate. */
1955 gimple_stmt_iterator gsi = gsi_after_labels (bb);
1956 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1958 else
1960 /* Insert the predicate of the BB at the end of the BB
1961 as this would reduce the register pressure: the only
1962 use of this predicate will be in successor BBs. */
1963 gimple_stmt_iterator gsi = gsi_last_bb (bb);
1965 if (gsi_end_p (gsi)
1966 || stmt_ends_bb_p (gsi_stmt (gsi)))
1967 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1968 else
1969 gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
1972 /* Once the sequence is code generated, set it to NULL. */
1973 set_bb_predicate_gimplified_stmts (bb, NULL);
1978 /* Helper function for predicate_mem_writes. Returns index of existent
1979 mask if it was created for given SIZE and -1 otherwise. */
1981 static int
1982 mask_exists (int size, vec<int> vec)
1984 unsigned int ix;
1985 int v;
1986 FOR_EACH_VEC_ELT (vec, ix, v)
1987 if (v == size)
1988 return (int) ix;
1989 return -1;
1992 /* Predicate each write to memory in LOOP.
1994 This function transforms control flow constructs containing memory
1995 writes of the form:
1997 | for (i = 0; i < N; i++)
1998 | if (cond)
1999 | A[i] = expr;
2001 into the following form that does not contain control flow:
2003 | for (i = 0; i < N; i++)
2004 | A[i] = cond ? expr : A[i];
2006 The original CFG looks like this:
2008 | bb_0
2009 | i = 0
2010 | end_bb_0
2012 | bb_1
2013 | if (i < N) goto bb_5 else goto bb_2
2014 | end_bb_1
2016 | bb_2
2017 | cond = some_computation;
2018 | if (cond) goto bb_3 else goto bb_4
2019 | end_bb_2
2021 | bb_3
2022 | A[i] = expr;
2023 | goto bb_4
2024 | end_bb_3
2026 | bb_4
2027 | goto bb_1
2028 | end_bb_4
2030 insert_gimplified_predicates inserts the computation of the COND
2031 expression at the beginning of the destination basic block:
2033 | bb_0
2034 | i = 0
2035 | end_bb_0
2037 | bb_1
2038 | if (i < N) goto bb_5 else goto bb_2
2039 | end_bb_1
2041 | bb_2
2042 | cond = some_computation;
2043 | if (cond) goto bb_3 else goto bb_4
2044 | end_bb_2
2046 | bb_3
2047 | cond = some_computation;
2048 | A[i] = expr;
2049 | goto bb_4
2050 | end_bb_3
2052 | bb_4
2053 | goto bb_1
2054 | end_bb_4
2056 predicate_mem_writes is then predicating the memory write as follows:
2058 | bb_0
2059 | i = 0
2060 | end_bb_0
2062 | bb_1
2063 | if (i < N) goto bb_5 else goto bb_2
2064 | end_bb_1
2066 | bb_2
2067 | if (cond) goto bb_3 else goto bb_4
2068 | end_bb_2
2070 | bb_3
2071 | cond = some_computation;
2072 | A[i] = cond ? expr : A[i];
2073 | goto bb_4
2074 | end_bb_3
2076 | bb_4
2077 | goto bb_1
2078 | end_bb_4
2080 and finally combine_blocks removes the basic block boundaries making
2081 the loop vectorizable:
2083 | bb_0
2084 | i = 0
2085 | if (i < N) goto bb_5 else goto bb_1
2086 | end_bb_0
2088 | bb_1
2089 | cond = some_computation;
2090 | A[i] = cond ? expr : A[i];
2091 | if (i < N) goto bb_5 else goto bb_4
2092 | end_bb_1
2094 | bb_4
2095 | goto bb_1
2096 | end_bb_4
2099 static void
2100 predicate_mem_writes (loop_p loop)
2102 unsigned int i, orig_loop_num_nodes = loop->num_nodes;
2103 auto_vec<int, 1> vect_sizes;
2104 auto_vec<tree, 1> vect_masks;
2106 for (i = 1; i < orig_loop_num_nodes; i++)
2108 gimple_stmt_iterator gsi;
2109 basic_block bb = ifc_bbs[i];
2110 tree cond = bb_predicate (bb);
2111 bool swap;
2112 gimple stmt;
2113 int index;
2115 if (is_true_predicate (cond))
2116 continue;
2118 swap = false;
2119 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2121 swap = true;
2122 cond = TREE_OPERAND (cond, 0);
2125 vect_sizes.truncate (0);
2126 vect_masks.truncate (0);
2128 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2129 if (!gimple_assign_single_p (stmt = gsi_stmt (gsi)))
2130 continue;
2131 else if (gimple_plf (stmt, GF_PLF_2))
2133 tree lhs = gimple_assign_lhs (stmt);
2134 tree rhs = gimple_assign_rhs1 (stmt);
2135 tree ref, addr, ptr, masktype, mask_op0, mask_op1, mask;
2136 gimple new_stmt;
2137 int bitsize = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (lhs)));
2138 ref = TREE_CODE (lhs) == SSA_NAME ? rhs : lhs;
2139 mark_addressable (ref);
2140 addr = force_gimple_operand_gsi (&gsi, build_fold_addr_expr (ref),
2141 true, NULL_TREE, true,
2142 GSI_SAME_STMT);
2143 if (!vect_sizes.is_empty ()
2144 && (index = mask_exists (bitsize, vect_sizes)) != -1)
2145 /* Use created mask. */
2146 mask = vect_masks[index];
2147 else
2149 masktype = build_nonstandard_integer_type (bitsize, 1);
2150 mask_op0 = build_int_cst (masktype, swap ? 0 : -1);
2151 mask_op1 = build_int_cst (masktype, swap ? -1 : 0);
2152 cond = force_gimple_operand_gsi_1 (&gsi, unshare_expr (cond),
2153 is_gimple_condexpr,
2154 NULL_TREE,
2155 true, GSI_SAME_STMT);
2156 mask = fold_build_cond_expr (masktype, unshare_expr (cond),
2157 mask_op0, mask_op1);
2158 mask = ifc_temp_var (masktype, mask, &gsi);
2159 /* Save mask and its size for further use. */
2160 vect_sizes.safe_push (bitsize);
2161 vect_masks.safe_push (mask);
2163 ptr = build_int_cst (reference_alias_ptr_type (ref), 0);
2164 /* Copy points-to info if possible. */
2165 if (TREE_CODE (addr) == SSA_NAME && !SSA_NAME_PTR_INFO (addr))
2166 copy_ref_info (build2 (MEM_REF, TREE_TYPE (ref), addr, ptr),
2167 ref);
2168 if (TREE_CODE (lhs) == SSA_NAME)
2170 new_stmt
2171 = gimple_build_call_internal (IFN_MASK_LOAD, 3, addr,
2172 ptr, mask);
2173 gimple_call_set_lhs (new_stmt, lhs);
2175 else
2176 new_stmt
2177 = gimple_build_call_internal (IFN_MASK_STORE, 4, addr, ptr,
2178 mask, rhs);
2179 gsi_replace (&gsi, new_stmt, true);
2181 else if (gimple_vdef (stmt))
2183 tree lhs = gimple_assign_lhs (stmt);
2184 tree rhs = gimple_assign_rhs1 (stmt);
2185 tree type = TREE_TYPE (lhs);
2187 lhs = ifc_temp_var (type, unshare_expr (lhs), &gsi);
2188 rhs = ifc_temp_var (type, unshare_expr (rhs), &gsi);
2189 if (swap)
2191 tree tem = lhs;
2192 lhs = rhs;
2193 rhs = tem;
2195 cond = force_gimple_operand_gsi_1 (&gsi, unshare_expr (cond),
2196 is_gimple_condexpr, NULL_TREE,
2197 true, GSI_SAME_STMT);
2198 rhs = fold_build_cond_expr (type, unshare_expr (cond), rhs, lhs);
2199 gimple_assign_set_rhs1 (stmt, ifc_temp_var (type, rhs, &gsi));
2200 update_stmt (stmt);
2205 /* Remove all GIMPLE_CONDs and GIMPLE_LABELs of all the basic blocks
2206 other than the exit and latch of the LOOP. Also resets the
2207 GIMPLE_DEBUG information. */
2209 static void
2210 remove_conditions_and_labels (loop_p loop)
2212 gimple_stmt_iterator gsi;
2213 unsigned int i;
2215 for (i = 0; i < loop->num_nodes; i++)
2217 basic_block bb = ifc_bbs[i];
2219 if (bb_with_exit_edge_p (loop, bb)
2220 || bb == loop->latch)
2221 continue;
2223 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
2224 switch (gimple_code (gsi_stmt (gsi)))
2226 case GIMPLE_COND:
2227 case GIMPLE_LABEL:
2228 gsi_remove (&gsi, true);
2229 break;
2231 case GIMPLE_DEBUG:
2232 /* ??? Should there be conditional GIMPLE_DEBUG_BINDs? */
2233 if (gimple_debug_bind_p (gsi_stmt (gsi)))
2235 gimple_debug_bind_reset_value (gsi_stmt (gsi));
2236 update_stmt (gsi_stmt (gsi));
2238 gsi_next (&gsi);
2239 break;
2241 default:
2242 gsi_next (&gsi);
2247 /* Combine all the basic blocks from LOOP into one or two super basic
2248 blocks. Replace PHI nodes with conditional modify expressions. */
2250 static void
2251 combine_blocks (struct loop *loop, bool any_mask_load_store)
2253 basic_block bb, exit_bb, merge_target_bb;
2254 unsigned int orig_loop_num_nodes = loop->num_nodes;
2255 unsigned int i;
2256 edge e;
2257 edge_iterator ei;
2259 predicate_bbs (loop);
2260 remove_conditions_and_labels (loop);
2261 insert_gimplified_predicates (loop, any_mask_load_store);
2262 predicate_all_scalar_phis (loop);
2264 if (flag_tree_loop_if_convert_stores || any_mask_load_store)
2265 predicate_mem_writes (loop);
2267 /* Merge basic blocks: first remove all the edges in the loop,
2268 except for those from the exit block. */
2269 exit_bb = NULL;
2270 for (i = 0; i < orig_loop_num_nodes; i++)
2272 bb = ifc_bbs[i];
2273 free_bb_predicate (bb);
2274 if (bb_with_exit_edge_p (loop, bb))
2276 gcc_assert (exit_bb == NULL);
2277 exit_bb = bb;
2280 gcc_assert (exit_bb != loop->latch);
2282 for (i = 1; i < orig_loop_num_nodes; i++)
2284 bb = ifc_bbs[i];
2286 for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));)
2288 if (e->src == exit_bb)
2289 ei_next (&ei);
2290 else
2291 remove_edge (e);
2295 if (exit_bb != NULL)
2297 if (exit_bb != loop->header)
2299 /* Connect this node to loop header. */
2300 make_edge (loop->header, exit_bb, EDGE_FALLTHRU);
2301 set_immediate_dominator (CDI_DOMINATORS, exit_bb, loop->header);
2304 /* Redirect non-exit edges to loop->latch. */
2305 FOR_EACH_EDGE (e, ei, exit_bb->succs)
2307 if (!loop_exit_edge_p (loop, e))
2308 redirect_edge_and_branch (e, loop->latch);
2310 set_immediate_dominator (CDI_DOMINATORS, loop->latch, exit_bb);
2312 else
2314 /* If the loop does not have an exit, reconnect header and latch. */
2315 make_edge (loop->header, loop->latch, EDGE_FALLTHRU);
2316 set_immediate_dominator (CDI_DOMINATORS, loop->latch, loop->header);
2319 merge_target_bb = loop->header;
2320 for (i = 1; i < orig_loop_num_nodes; i++)
2322 gimple_stmt_iterator gsi;
2323 gimple_stmt_iterator last;
2325 bb = ifc_bbs[i];
2327 if (bb == exit_bb || bb == loop->latch)
2328 continue;
2330 /* Make stmts member of loop->header. */
2331 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2332 gimple_set_bb (gsi_stmt (gsi), merge_target_bb);
2334 /* Update stmt list. */
2335 last = gsi_last_bb (merge_target_bb);
2336 gsi_insert_seq_after (&last, bb_seq (bb), GSI_NEW_STMT);
2337 set_bb_seq (bb, NULL);
2339 delete_basic_block (bb);
2342 /* If possible, merge loop header to the block with the exit edge.
2343 This reduces the number of basic blocks to two, to please the
2344 vectorizer that handles only loops with two nodes. */
2345 if (exit_bb
2346 && exit_bb != loop->header
2347 && can_merge_blocks_p (loop->header, exit_bb))
2348 merge_blocks (loop->header, exit_bb);
2350 free (ifc_bbs);
2351 ifc_bbs = NULL;
2354 /* Version LOOP before if-converting it, the original loop
2355 will be then if-converted, the new copy of the loop will not,
2356 and the LOOP_VECTORIZED internal call will be guarding which
2357 loop to execute. The vectorizer pass will fold this
2358 internal call into either true or false. */
2360 static bool
2361 version_loop_for_if_conversion (struct loop *loop)
2363 basic_block cond_bb;
2364 tree cond = make_ssa_name (boolean_type_node);
2365 struct loop *new_loop;
2366 gimple g;
2367 gimple_stmt_iterator gsi;
2369 g = gimple_build_call_internal (IFN_LOOP_VECTORIZED, 2,
2370 build_int_cst (integer_type_node, loop->num),
2371 integer_zero_node);
2372 gimple_call_set_lhs (g, cond);
2374 initialize_original_copy_tables ();
2375 new_loop = loop_version (loop, cond, &cond_bb,
2376 REG_BR_PROB_BASE, REG_BR_PROB_BASE,
2377 REG_BR_PROB_BASE, true);
2378 free_original_copy_tables ();
2379 if (new_loop == NULL)
2380 return false;
2381 new_loop->dont_vectorize = true;
2382 new_loop->force_vectorize = false;
2383 gsi = gsi_last_bb (cond_bb);
2384 gimple_call_set_arg (g, 1, build_int_cst (integer_type_node, new_loop->num));
2385 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2386 update_ssa (TODO_update_ssa);
2387 return true;
2390 /* Performs splitting of critical edges if aggressive_if_conv is true.
2391 Returns false if loop won't be if converted and true otherwise. */
2393 static bool
2394 ifcvt_split_critical_edges (struct loop *loop)
2396 basic_block *body;
2397 basic_block bb;
2398 unsigned int num = loop->num_nodes;
2399 unsigned int i;
2400 gimple stmt;
2401 edge e;
2402 edge_iterator ei;
2404 if (num <= 2)
2405 return false;
2406 if (loop->inner)
2407 return false;
2408 if (!single_exit (loop))
2409 return false;
2411 body = get_loop_body (loop);
2412 for (i = 0; i < num; i++)
2414 bb = body[i];
2415 if (bb == loop->latch
2416 || bb_with_exit_edge_p (loop, bb))
2417 continue;
2418 stmt = last_stmt (bb);
2419 /* Skip basic blocks not ending with conditional branch. */
2420 if (!(stmt && gimple_code (stmt) == GIMPLE_COND))
2421 continue;
2422 FOR_EACH_EDGE (e, ei, bb->succs)
2423 if (EDGE_CRITICAL_P (e) && e->dest->loop_father == loop)
2424 split_edge (e);
2426 free (body);
2427 return true;
2430 /* Assumes that lhs of DEF_STMT have multiple uses.
2431 Delete one use by (1) creation of copy DEF_STMT with
2432 unique lhs; (2) change original use of lhs in one
2433 use statement with newly created lhs. */
2435 static void
2436 ifcvt_split_def_stmt (gimple def_stmt, gimple use_stmt)
2438 tree var;
2439 tree lhs;
2440 gimple copy_stmt;
2441 gimple_stmt_iterator gsi;
2442 use_operand_p use_p;
2443 imm_use_iterator imm_iter;
2445 var = gimple_assign_lhs (def_stmt);
2446 copy_stmt = gimple_copy (def_stmt);
2447 lhs = make_temp_ssa_name (TREE_TYPE (var), NULL, "_ifc_");
2448 gimple_assign_set_lhs (copy_stmt, lhs);
2449 SSA_NAME_DEF_STMT (lhs) = copy_stmt;
2450 /* Insert copy of DEF_STMT. */
2451 gsi = gsi_for_stmt (def_stmt);
2452 gsi_insert_after (&gsi, copy_stmt, GSI_SAME_STMT);
2453 /* Change use of var to lhs in use_stmt. */
2454 if (dump_file && (dump_flags & TDF_DETAILS))
2456 fprintf (dump_file, "Change use of var ");
2457 print_generic_expr (dump_file, var, TDF_SLIM);
2458 fprintf (dump_file, " to ");
2459 print_generic_expr (dump_file, lhs, TDF_SLIM);
2460 fprintf (dump_file, "\n");
2462 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, var)
2464 if (USE_STMT (use_p) != use_stmt)
2465 continue;
2466 SET_USE (use_p, lhs);
2467 break;
2471 /* Traverse bool pattern recursively starting from VAR.
2472 Save its def and use statements to defuse_list if VAR does
2473 not have single use. */
2475 static void
2476 ifcvt_walk_pattern_tree (tree var, vec<gimple> *defuse_list,
2477 gimple use_stmt)
2479 tree rhs1, rhs2;
2480 enum tree_code code;
2481 gimple def_stmt;
2483 def_stmt = SSA_NAME_DEF_STMT (var);
2484 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
2485 return;
2486 if (!has_single_use (var))
2488 /* Put def and use stmts into defuse_list. */
2489 defuse_list->safe_push (def_stmt);
2490 defuse_list->safe_push (use_stmt);
2491 if (dump_file && (dump_flags & TDF_DETAILS))
2493 fprintf (dump_file, "Multiple lhs uses in stmt\n");
2494 print_gimple_stmt (dump_file, def_stmt, 0, TDF_SLIM);
2497 rhs1 = gimple_assign_rhs1 (def_stmt);
2498 code = gimple_assign_rhs_code (def_stmt);
2499 switch (code)
2501 case SSA_NAME:
2502 ifcvt_walk_pattern_tree (rhs1, defuse_list, def_stmt);
2503 break;
2504 CASE_CONVERT:
2505 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2506 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2507 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2508 break;
2509 ifcvt_walk_pattern_tree (rhs1, defuse_list, def_stmt);
2510 break;
2511 case BIT_NOT_EXPR:
2512 ifcvt_walk_pattern_tree (rhs1, defuse_list, def_stmt);
2513 break;
2514 case BIT_AND_EXPR:
2515 case BIT_IOR_EXPR:
2516 case BIT_XOR_EXPR:
2517 ifcvt_walk_pattern_tree (rhs1, defuse_list, def_stmt);
2518 rhs2 = gimple_assign_rhs2 (def_stmt);
2519 ifcvt_walk_pattern_tree (rhs2, defuse_list, def_stmt);
2520 break;
2521 default:
2522 break;
2524 return;
2527 /* Returns true if STMT can be a root of bool pattern apllied
2528 by vectorizer. */
2530 static bool
2531 stmt_is_root_of_bool_pattern (gimple stmt)
2533 enum tree_code code;
2534 tree lhs, rhs;
2536 code = gimple_assign_rhs_code (stmt);
2537 if (CONVERT_EXPR_CODE_P (code))
2539 lhs = gimple_assign_lhs (stmt);
2540 rhs = gimple_assign_rhs1 (stmt);
2541 if (TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2542 return false;
2543 if (TREE_CODE (TREE_TYPE (lhs)) == BOOLEAN_TYPE)
2544 return false;
2545 return true;
2547 else if (code == COND_EXPR)
2549 rhs = gimple_assign_rhs1 (stmt);
2550 if (TREE_CODE (rhs) != SSA_NAME)
2551 return false;
2552 return true;
2554 return false;
2557 /* Traverse all statements in BB which correspondent to loop header to
2558 find out all statements which can start bool pattern applied by
2559 vectorizer and convert multiple uses in it to conform pattern
2560 restrictions. Such case can occur if the same predicate is used both
2561 for phi node conversion and load/store mask. */
2563 static void
2564 ifcvt_repair_bool_pattern (basic_block bb)
2566 tree rhs;
2567 gimple stmt;
2568 gimple_stmt_iterator gsi;
2569 vec<gimple> defuse_list = vNULL;
2570 vec<gimple> pattern_roots = vNULL;
2571 bool repeat = true;
2572 int niter = 0;
2573 unsigned int ix;
2575 /* Collect all root pattern statements. */
2576 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2578 stmt = gsi_stmt (gsi);
2579 if (gimple_code (stmt) != GIMPLE_ASSIGN)
2580 continue;
2581 if (!stmt_is_root_of_bool_pattern (stmt))
2582 continue;
2583 pattern_roots.safe_push (stmt);
2586 if (pattern_roots.is_empty ())
2587 return;
2589 /* Split all statements with multiple uses iteratively since splitting
2590 may create new multiple uses. */
2591 while (repeat)
2593 repeat = false;
2594 niter++;
2595 FOR_EACH_VEC_ELT (pattern_roots, ix, stmt)
2597 rhs = gimple_assign_rhs1 (stmt);
2598 ifcvt_walk_pattern_tree (rhs, &defuse_list, stmt);
2599 while (defuse_list.length () > 0)
2601 repeat = true;
2602 gimple def_stmt, use_stmt;
2603 use_stmt = defuse_list.pop ();
2604 def_stmt = defuse_list.pop ();
2605 ifcvt_split_def_stmt (def_stmt, use_stmt);
2610 if (dump_file && (dump_flags & TDF_DETAILS))
2611 fprintf (dump_file, "Repair bool pattern takes %d iterations. \n",
2612 niter);
2615 /* Delete redundant statements produced by predication which prevents
2616 loop vectorization. */
2618 static void
2619 ifcvt_local_dce (basic_block bb)
2621 gimple stmt;
2622 gimple stmt1;
2623 gimple phi;
2624 gimple_stmt_iterator gsi;
2625 vec<gimple> worklist;
2626 enum gimple_code code;
2627 use_operand_p use_p;
2628 imm_use_iterator imm_iter;
2630 worklist.create (64);
2631 /* Consider all phi as live statements. */
2632 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2634 phi = gsi_stmt (gsi);
2635 gimple_set_plf (phi, GF_PLF_2, true);
2636 worklist.safe_push (phi);
2638 /* Consider load/store statemnts, CALL and COND as live. */
2639 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2641 stmt = gsi_stmt (gsi);
2642 if (gimple_store_p (stmt)
2643 || gimple_assign_load_p (stmt)
2644 || is_gimple_debug (stmt))
2646 gimple_set_plf (stmt, GF_PLF_2, true);
2647 worklist.safe_push (stmt);
2648 continue;
2650 code = gimple_code (stmt);
2651 if (code == GIMPLE_COND || code == GIMPLE_CALL)
2653 gimple_set_plf (stmt, GF_PLF_2, true);
2654 worklist.safe_push (stmt);
2655 continue;
2657 gimple_set_plf (stmt, GF_PLF_2, false);
2659 if (code == GIMPLE_ASSIGN)
2661 tree lhs = gimple_assign_lhs (stmt);
2662 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2664 stmt1 = USE_STMT (use_p);
2665 if (gimple_bb (stmt1) != bb)
2667 gimple_set_plf (stmt, GF_PLF_2, true);
2668 worklist.safe_push (stmt);
2669 break;
2674 /* Propagate liveness through arguments of live stmt. */
2675 while (worklist.length () > 0)
2677 ssa_op_iter iter;
2678 use_operand_p use_p;
2679 tree use;
2681 stmt = worklist.pop ();
2682 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2684 use = USE_FROM_PTR (use_p);
2685 if (TREE_CODE (use) != SSA_NAME)
2686 continue;
2687 stmt1 = SSA_NAME_DEF_STMT (use);
2688 if (gimple_bb (stmt1) != bb
2689 || gimple_plf (stmt1, GF_PLF_2))
2690 continue;
2691 gimple_set_plf (stmt1, GF_PLF_2, true);
2692 worklist.safe_push (stmt1);
2695 /* Delete dead statements. */
2696 gsi = gsi_start_bb (bb);
2697 while (!gsi_end_p (gsi))
2699 stmt = gsi_stmt (gsi);
2700 if (gimple_plf (stmt, GF_PLF_2))
2702 gsi_next (&gsi);
2703 continue;
2705 if (dump_file && (dump_flags & TDF_DETAILS))
2707 fprintf (dump_file, "Delete dead stmt in bb#%d\n", bb->index);
2708 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2710 gsi_remove (&gsi, true);
2711 release_defs (stmt);
2715 /* If-convert LOOP when it is legal. For the moment this pass has no
2716 profitability analysis. Returns non-zero todo flags when something
2717 changed. */
2719 static unsigned int
2720 tree_if_conversion (struct loop *loop)
2722 unsigned int todo = 0;
2723 ifc_bbs = NULL;
2724 bool any_mask_load_store = false;
2726 /* Set-up aggressive if-conversion for loops marked with simd pragma. */
2727 aggressive_if_conv = loop->force_vectorize;
2728 /* Check either outer loop was marked with simd pragma. */
2729 if (!aggressive_if_conv)
2731 struct loop *outer_loop = loop_outer (loop);
2732 if (outer_loop && outer_loop->force_vectorize)
2733 aggressive_if_conv = true;
2736 if (aggressive_if_conv)
2737 if (!ifcvt_split_critical_edges (loop))
2738 goto cleanup;
2740 if (!if_convertible_loop_p (loop, &any_mask_load_store)
2741 || !dbg_cnt (if_conversion_tree))
2742 goto cleanup;
2744 if (any_mask_load_store
2745 && ((!flag_tree_loop_vectorize && !loop->force_vectorize)
2746 || loop->dont_vectorize))
2747 goto cleanup;
2749 if (any_mask_load_store && !version_loop_for_if_conversion (loop))
2750 goto cleanup;
2752 /* Now all statements are if-convertible. Combine all the basic
2753 blocks into one huge basic block doing the if-conversion
2754 on-the-fly. */
2755 combine_blocks (loop, any_mask_load_store);
2757 /* Delete dead predicate computations and repair tree correspondent
2758 to bool pattern to delete multiple uses of preidcates. */
2759 if (aggressive_if_conv)
2761 ifcvt_local_dce (loop->header);
2762 ifcvt_repair_bool_pattern (loop->header);
2765 todo |= TODO_cleanup_cfg;
2766 if (flag_tree_loop_if_convert_stores || any_mask_load_store)
2768 mark_virtual_operands_for_renaming (cfun);
2769 todo |= TODO_update_ssa_only_virtuals;
2772 cleanup:
2773 if (ifc_bbs)
2775 unsigned int i;
2777 for (i = 0; i < loop->num_nodes; i++)
2778 free_bb_predicate (ifc_bbs[i]);
2780 free (ifc_bbs);
2781 ifc_bbs = NULL;
2783 free_dominance_info (CDI_POST_DOMINATORS);
2785 return todo;
2788 /* Tree if-conversion pass management. */
2790 namespace {
2792 const pass_data pass_data_if_conversion =
2794 GIMPLE_PASS, /* type */
2795 "ifcvt", /* name */
2796 OPTGROUP_NONE, /* optinfo_flags */
2797 TV_NONE, /* tv_id */
2798 ( PROP_cfg | PROP_ssa ), /* properties_required */
2799 0, /* properties_provided */
2800 0, /* properties_destroyed */
2801 0, /* todo_flags_start */
2802 0, /* todo_flags_finish */
2805 class pass_if_conversion : public gimple_opt_pass
2807 public:
2808 pass_if_conversion (gcc::context *ctxt)
2809 : gimple_opt_pass (pass_data_if_conversion, ctxt)
2812 /* opt_pass methods: */
2813 virtual bool gate (function *);
2814 virtual unsigned int execute (function *);
2816 }; // class pass_if_conversion
2818 bool
2819 pass_if_conversion::gate (function *fun)
2821 return (((flag_tree_loop_vectorize || fun->has_force_vectorize_loops)
2822 && flag_tree_loop_if_convert != 0)
2823 || flag_tree_loop_if_convert == 1
2824 || flag_tree_loop_if_convert_stores == 1);
2827 unsigned int
2828 pass_if_conversion::execute (function *fun)
2830 struct loop *loop;
2831 unsigned todo = 0;
2833 if (number_of_loops (fun) <= 1)
2834 return 0;
2836 FOR_EACH_LOOP (loop, 0)
2837 if (flag_tree_loop_if_convert == 1
2838 || flag_tree_loop_if_convert_stores == 1
2839 || ((flag_tree_loop_vectorize || loop->force_vectorize)
2840 && !loop->dont_vectorize))
2841 todo |= tree_if_conversion (loop);
2843 #ifdef ENABLE_CHECKING
2845 basic_block bb;
2846 FOR_EACH_BB_FN (bb, fun)
2847 gcc_assert (!bb->aux);
2849 #endif
2851 return todo;
2854 } // anon namespace
2856 gimple_opt_pass *
2857 make_pass_if_conversion (gcc::context *ctxt)
2859 return new pass_if_conversion (ctxt);