Daily bump.
[official-gcc.git] / gcc / tree-if-conv.c
blobe88ddc9f7882abbbb60a84761ce597324a0c7c4f
1 /* If-conversion for vectorizer.
2 Copyright (C) 2004-2021 Free Software Foundation, Inc.
3 Contributed by Devang Patel <dpatel@apple.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass implements a tree level if-conversion of loops. Its
22 initial goal is to help the vectorizer to vectorize loops with
23 conditions.
25 A short description of if-conversion:
27 o Decide if a loop is if-convertible or not.
28 o Walk all loop basic blocks in breadth first order (BFS order).
29 o Remove conditional statements (at the end of basic block)
30 and propagate condition into destination basic blocks'
31 predicate list.
32 o Replace modify expression with conditional modify expression
33 using current basic block's condition.
34 o Merge all basic blocks
35 o Replace phi nodes with conditional modify expr
36 o Merge all basic blocks into header
38 Sample transformation:
40 INPUT
41 -----
43 # i_23 = PHI <0(0), i_18(10)>;
44 <L0>:;
45 j_15 = A[i_23];
46 if (j_15 > 41) goto <L1>; else goto <L17>;
48 <L17>:;
49 goto <bb 3> (<L3>);
51 <L1>:;
53 # iftmp.2_4 = PHI <0(8), 42(2)>;
54 <L3>:;
55 A[i_23] = iftmp.2_4;
56 i_18 = i_23 + 1;
57 if (i_18 <= 15) goto <L19>; else goto <L18>;
59 <L19>:;
60 goto <bb 1> (<L0>);
62 <L18>:;
64 OUTPUT
65 ------
67 # i_23 = PHI <0(0), i_18(10)>;
68 <L0>:;
69 j_15 = A[i_23];
71 <L3>:;
72 iftmp.2_4 = j_15 > 41 ? 42 : 0;
73 A[i_23] = iftmp.2_4;
74 i_18 = i_23 + 1;
75 if (i_18 <= 15) goto <L19>; else goto <L18>;
77 <L19>:;
78 goto <bb 1> (<L0>);
80 <L18>:;
83 #include "config.h"
84 #include "system.h"
85 #include "coretypes.h"
86 #include "backend.h"
87 #include "rtl.h"
88 #include "tree.h"
89 #include "gimple.h"
90 #include "cfghooks.h"
91 #include "tree-pass.h"
92 #include "ssa.h"
93 #include "expmed.h"
94 #include "optabs-query.h"
95 #include "gimple-pretty-print.h"
96 #include "alias.h"
97 #include "fold-const.h"
98 #include "stor-layout.h"
99 #include "gimple-fold.h"
100 #include "gimplify.h"
101 #include "gimple-iterator.h"
102 #include "gimplify-me.h"
103 #include "tree-cfg.h"
104 #include "tree-into-ssa.h"
105 #include "tree-ssa.h"
106 #include "cfgloop.h"
107 #include "tree-data-ref.h"
108 #include "tree-scalar-evolution.h"
109 #include "tree-ssa-loop.h"
110 #include "tree-ssa-loop-niter.h"
111 #include "tree-ssa-loop-ivopts.h"
112 #include "tree-ssa-address.h"
113 #include "dbgcnt.h"
114 #include "tree-hash-traits.h"
115 #include "varasm.h"
116 #include "builtins.h"
117 #include "cfganal.h"
118 #include "internal-fn.h"
119 #include "fold-const.h"
120 #include "tree-ssa-sccvn.h"
121 #include "tree-cfgcleanup.h"
122 #include "tree-ssa-dse.h"
123 #include "tree-vectorizer.h"
125 /* Only handle PHIs with no more arguments unless we are asked to by
126 simd pragma. */
127 #define MAX_PHI_ARG_NUM \
128 ((unsigned) param_max_tree_if_conversion_phi_args)
130 /* True if we've converted a statement that was only executed when some
131 condition C was true, and if for correctness we need to predicate the
132 statement to ensure that it is a no-op when C is false. See
133 predicate_statements for the kinds of predication we support. */
134 static bool need_to_predicate;
136 /* True if we have to rewrite stmts that may invoke undefined behavior
137 when a condition C was false so it doesn't if it is always executed.
138 See predicate_statements for the kinds of predication we support. */
139 static bool need_to_rewrite_undefined;
141 /* Indicate if there are any complicated PHIs that need to be handled in
142 if-conversion. Complicated PHI has more than two arguments and can't
143 be degenerated to two arguments PHI. See more information in comment
144 before phi_convertible_by_degenerating_args. */
145 static bool any_complicated_phi;
147 /* Hash for struct innermost_loop_behavior. It depends on the user to
148 free the memory. */
150 struct innermost_loop_behavior_hash : nofree_ptr_hash <innermost_loop_behavior>
152 static inline hashval_t hash (const value_type &);
153 static inline bool equal (const value_type &,
154 const compare_type &);
157 inline hashval_t
158 innermost_loop_behavior_hash::hash (const value_type &e)
160 hashval_t hash;
162 hash = iterative_hash_expr (e->base_address, 0);
163 hash = iterative_hash_expr (e->offset, hash);
164 hash = iterative_hash_expr (e->init, hash);
165 return iterative_hash_expr (e->step, hash);
168 inline bool
169 innermost_loop_behavior_hash::equal (const value_type &e1,
170 const compare_type &e2)
172 if ((e1->base_address && !e2->base_address)
173 || (!e1->base_address && e2->base_address)
174 || (!e1->offset && e2->offset)
175 || (e1->offset && !e2->offset)
176 || (!e1->init && e2->init)
177 || (e1->init && !e2->init)
178 || (!e1->step && e2->step)
179 || (e1->step && !e2->step))
180 return false;
182 if (e1->base_address && e2->base_address
183 && !operand_equal_p (e1->base_address, e2->base_address, 0))
184 return false;
185 if (e1->offset && e2->offset
186 && !operand_equal_p (e1->offset, e2->offset, 0))
187 return false;
188 if (e1->init && e2->init
189 && !operand_equal_p (e1->init, e2->init, 0))
190 return false;
191 if (e1->step && e2->step
192 && !operand_equal_p (e1->step, e2->step, 0))
193 return false;
195 return true;
198 /* List of basic blocks in if-conversion-suitable order. */
199 static basic_block *ifc_bbs;
201 /* Hash table to store <DR's innermost loop behavior, DR> pairs. */
202 static hash_map<innermost_loop_behavior_hash,
203 data_reference_p> *innermost_DR_map;
205 /* Hash table to store <base reference, DR> pairs. */
206 static hash_map<tree_operand_hash, data_reference_p> *baseref_DR_map;
208 /* List of redundant SSA names: the first should be replaced by the second. */
209 static vec< std::pair<tree, tree> > redundant_ssa_names;
211 /* Structure used to predicate basic blocks. This is attached to the
212 ->aux field of the BBs in the loop to be if-converted. */
213 struct bb_predicate {
215 /* The condition under which this basic block is executed. */
216 tree predicate;
218 /* PREDICATE is gimplified, and the sequence of statements is
219 recorded here, in order to avoid the duplication of computations
220 that occur in previous conditions. See PR44483. */
221 gimple_seq predicate_gimplified_stmts;
224 /* Returns true when the basic block BB has a predicate. */
226 static inline bool
227 bb_has_predicate (basic_block bb)
229 return bb->aux != NULL;
232 /* Returns the gimplified predicate for basic block BB. */
234 static inline tree
235 bb_predicate (basic_block bb)
237 return ((struct bb_predicate *) bb->aux)->predicate;
240 /* Sets the gimplified predicate COND for basic block BB. */
242 static inline void
243 set_bb_predicate (basic_block bb, tree cond)
245 gcc_assert ((TREE_CODE (cond) == TRUTH_NOT_EXPR
246 && is_gimple_condexpr (TREE_OPERAND (cond, 0)))
247 || is_gimple_condexpr (cond));
248 ((struct bb_predicate *) bb->aux)->predicate = cond;
251 /* Returns the sequence of statements of the gimplification of the
252 predicate for basic block BB. */
254 static inline gimple_seq
255 bb_predicate_gimplified_stmts (basic_block bb)
257 return ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts;
260 /* Sets the sequence of statements STMTS of the gimplification of the
261 predicate for basic block BB. */
263 static inline void
264 set_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
266 ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts = stmts;
269 /* Adds the sequence of statements STMTS to the sequence of statements
270 of the predicate for basic block BB. */
272 static inline void
273 add_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
275 /* We might have updated some stmts in STMTS via force_gimple_operand
276 calling fold_stmt and that producing multiple stmts. Delink immediate
277 uses so update_ssa after loop versioning doesn't get confused for
278 the not yet inserted predicates.
279 ??? This should go away once we reliably avoid updating stmts
280 not in any BB. */
281 for (gimple_stmt_iterator gsi = gsi_start (stmts);
282 !gsi_end_p (gsi); gsi_next (&gsi))
284 gimple *stmt = gsi_stmt (gsi);
285 delink_stmt_imm_use (stmt);
286 gimple_set_modified (stmt, true);
288 gimple_seq_add_seq_without_update
289 (&(((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts), stmts);
292 /* Initializes to TRUE the predicate of basic block BB. */
294 static inline void
295 init_bb_predicate (basic_block bb)
297 bb->aux = XNEW (struct bb_predicate);
298 set_bb_predicate_gimplified_stmts (bb, NULL);
299 set_bb_predicate (bb, boolean_true_node);
302 /* Release the SSA_NAMEs associated with the predicate of basic block BB. */
304 static inline void
305 release_bb_predicate (basic_block bb)
307 gimple_seq stmts = bb_predicate_gimplified_stmts (bb);
308 if (stmts)
310 /* Ensure that these stmts haven't yet been added to a bb. */
311 if (flag_checking)
312 for (gimple_stmt_iterator i = gsi_start (stmts);
313 !gsi_end_p (i); gsi_next (&i))
314 gcc_assert (! gimple_bb (gsi_stmt (i)));
316 /* Discard them. */
317 gimple_seq_discard (stmts);
318 set_bb_predicate_gimplified_stmts (bb, NULL);
322 /* Free the predicate of basic block BB. */
324 static inline void
325 free_bb_predicate (basic_block bb)
327 if (!bb_has_predicate (bb))
328 return;
330 release_bb_predicate (bb);
331 free (bb->aux);
332 bb->aux = NULL;
335 /* Reinitialize predicate of BB with the true predicate. */
337 static inline void
338 reset_bb_predicate (basic_block bb)
340 if (!bb_has_predicate (bb))
341 init_bb_predicate (bb);
342 else
344 release_bb_predicate (bb);
345 set_bb_predicate (bb, boolean_true_node);
349 /* Returns a new SSA_NAME of type TYPE that is assigned the value of
350 the expression EXPR. Inserts the statement created for this
351 computation before GSI and leaves the iterator GSI at the same
352 statement. */
354 static tree
355 ifc_temp_var (tree type, tree expr, gimple_stmt_iterator *gsi)
357 tree new_name = make_temp_ssa_name (type, NULL, "_ifc_");
358 gimple *stmt = gimple_build_assign (new_name, expr);
359 gimple_set_vuse (stmt, gimple_vuse (gsi_stmt (*gsi)));
360 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
361 return new_name;
364 /* Return true when COND is a false predicate. */
366 static inline bool
367 is_false_predicate (tree cond)
369 return (cond != NULL_TREE
370 && (cond == boolean_false_node
371 || integer_zerop (cond)));
374 /* Return true when COND is a true predicate. */
376 static inline bool
377 is_true_predicate (tree cond)
379 return (cond == NULL_TREE
380 || cond == boolean_true_node
381 || integer_onep (cond));
384 /* Returns true when BB has a predicate that is not trivial: true or
385 NULL_TREE. */
387 static inline bool
388 is_predicated (basic_block bb)
390 return !is_true_predicate (bb_predicate (bb));
393 /* Parses the predicate COND and returns its comparison code and
394 operands OP0 and OP1. */
396 static enum tree_code
397 parse_predicate (tree cond, tree *op0, tree *op1)
399 gimple *s;
401 if (TREE_CODE (cond) == SSA_NAME
402 && is_gimple_assign (s = SSA_NAME_DEF_STMT (cond)))
404 if (TREE_CODE_CLASS (gimple_assign_rhs_code (s)) == tcc_comparison)
406 *op0 = gimple_assign_rhs1 (s);
407 *op1 = gimple_assign_rhs2 (s);
408 return gimple_assign_rhs_code (s);
411 else if (gimple_assign_rhs_code (s) == TRUTH_NOT_EXPR)
413 tree op = gimple_assign_rhs1 (s);
414 tree type = TREE_TYPE (op);
415 enum tree_code code = parse_predicate (op, op0, op1);
417 return code == ERROR_MARK ? ERROR_MARK
418 : invert_tree_comparison (code, HONOR_NANS (type));
421 return ERROR_MARK;
424 if (COMPARISON_CLASS_P (cond))
426 *op0 = TREE_OPERAND (cond, 0);
427 *op1 = TREE_OPERAND (cond, 1);
428 return TREE_CODE (cond);
431 return ERROR_MARK;
434 /* Returns the fold of predicate C1 OR C2 at location LOC. */
436 static tree
437 fold_or_predicates (location_t loc, tree c1, tree c2)
439 tree op1a, op1b, op2a, op2b;
440 enum tree_code code1 = parse_predicate (c1, &op1a, &op1b);
441 enum tree_code code2 = parse_predicate (c2, &op2a, &op2b);
443 if (code1 != ERROR_MARK && code2 != ERROR_MARK)
445 tree t = maybe_fold_or_comparisons (boolean_type_node, code1, op1a, op1b,
446 code2, op2a, op2b);
447 if (t)
448 return t;
451 return fold_build2_loc (loc, TRUTH_OR_EXPR, boolean_type_node, c1, c2);
454 /* Returns either a COND_EXPR or the folded expression if the folded
455 expression is a MIN_EXPR, a MAX_EXPR, an ABS_EXPR,
456 a constant or a SSA_NAME. */
458 static tree
459 fold_build_cond_expr (tree type, tree cond, tree rhs, tree lhs)
461 tree rhs1, lhs1, cond_expr;
463 /* If COND is comparison r != 0 and r has boolean type, convert COND
464 to SSA_NAME to accept by vect bool pattern. */
465 if (TREE_CODE (cond) == NE_EXPR)
467 tree op0 = TREE_OPERAND (cond, 0);
468 tree op1 = TREE_OPERAND (cond, 1);
469 if (TREE_CODE (op0) == SSA_NAME
470 && TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE
471 && (integer_zerop (op1)))
472 cond = op0;
474 cond_expr = fold_ternary (COND_EXPR, type, cond, rhs, lhs);
476 if (cond_expr == NULL_TREE)
477 return build3 (COND_EXPR, type, cond, rhs, lhs);
479 STRIP_USELESS_TYPE_CONVERSION (cond_expr);
481 if (is_gimple_val (cond_expr))
482 return cond_expr;
484 if (TREE_CODE (cond_expr) == ABS_EXPR)
486 rhs1 = TREE_OPERAND (cond_expr, 1);
487 STRIP_USELESS_TYPE_CONVERSION (rhs1);
488 if (is_gimple_val (rhs1))
489 return build1 (ABS_EXPR, type, rhs1);
492 if (TREE_CODE (cond_expr) == MIN_EXPR
493 || TREE_CODE (cond_expr) == MAX_EXPR)
495 lhs1 = TREE_OPERAND (cond_expr, 0);
496 STRIP_USELESS_TYPE_CONVERSION (lhs1);
497 rhs1 = TREE_OPERAND (cond_expr, 1);
498 STRIP_USELESS_TYPE_CONVERSION (rhs1);
499 if (is_gimple_val (rhs1) && is_gimple_val (lhs1))
500 return build2 (TREE_CODE (cond_expr), type, lhs1, rhs1);
502 return build3 (COND_EXPR, type, cond, rhs, lhs);
505 /* Add condition NC to the predicate list of basic block BB. LOOP is
506 the loop to be if-converted. Use predicate of cd-equivalent block
507 for join bb if it exists: we call basic blocks bb1 and bb2
508 cd-equivalent if they are executed under the same condition. */
510 static inline void
511 add_to_predicate_list (class loop *loop, basic_block bb, tree nc)
513 tree bc, *tp;
514 basic_block dom_bb;
516 if (is_true_predicate (nc))
517 return;
519 /* If dominance tells us this basic block is always executed,
520 don't record any predicates for it. */
521 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
522 return;
524 dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb);
525 /* We use notion of cd equivalence to get simpler predicate for
526 join block, e.g. if join block has 2 predecessors with predicates
527 p1 & p2 and p1 & !p2, we'd like to get p1 for it instead of
528 p1 & p2 | p1 & !p2. */
529 if (dom_bb != loop->header
530 && get_immediate_dominator (CDI_POST_DOMINATORS, dom_bb) == bb)
532 gcc_assert (flow_bb_inside_loop_p (loop, dom_bb));
533 bc = bb_predicate (dom_bb);
534 if (!is_true_predicate (bc))
535 set_bb_predicate (bb, bc);
536 else
537 gcc_assert (is_true_predicate (bb_predicate (bb)));
538 if (dump_file && (dump_flags & TDF_DETAILS))
539 fprintf (dump_file, "Use predicate of bb#%d for bb#%d\n",
540 dom_bb->index, bb->index);
541 return;
544 if (!is_predicated (bb))
545 bc = nc;
546 else
548 bc = bb_predicate (bb);
549 bc = fold_or_predicates (EXPR_LOCATION (bc), nc, bc);
550 if (is_true_predicate (bc))
552 reset_bb_predicate (bb);
553 return;
557 /* Allow a TRUTH_NOT_EXPR around the main predicate. */
558 if (TREE_CODE (bc) == TRUTH_NOT_EXPR)
559 tp = &TREE_OPERAND (bc, 0);
560 else
561 tp = &bc;
562 if (!is_gimple_condexpr (*tp))
564 gimple_seq stmts;
565 *tp = force_gimple_operand_1 (*tp, &stmts, is_gimple_condexpr, NULL_TREE);
566 add_bb_predicate_gimplified_stmts (bb, stmts);
568 set_bb_predicate (bb, bc);
571 /* Add the condition COND to the previous condition PREV_COND, and add
572 this to the predicate list of the destination of edge E. LOOP is
573 the loop to be if-converted. */
575 static void
576 add_to_dst_predicate_list (class loop *loop, edge e,
577 tree prev_cond, tree cond)
579 if (!flow_bb_inside_loop_p (loop, e->dest))
580 return;
582 if (!is_true_predicate (prev_cond))
583 cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
584 prev_cond, cond);
586 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, e->dest))
587 add_to_predicate_list (loop, e->dest, cond);
590 /* Return true if one of the successor edges of BB exits LOOP. */
592 static bool
593 bb_with_exit_edge_p (class loop *loop, basic_block bb)
595 edge e;
596 edge_iterator ei;
598 FOR_EACH_EDGE (e, ei, bb->succs)
599 if (loop_exit_edge_p (loop, e))
600 return true;
602 return false;
605 /* Given PHI which has more than two arguments, this function checks if
606 it's if-convertible by degenerating its arguments. Specifically, if
607 below two conditions are satisfied:
609 1) Number of PHI arguments with different values equals to 2 and one
610 argument has the only occurrence.
611 2) The edge corresponding to the unique argument isn't critical edge.
613 Such PHI can be handled as PHIs have only two arguments. For example,
614 below PHI:
616 res = PHI <A_1(e1), A_1(e2), A_2(e3)>;
618 can be transformed into:
620 res = (predicate of e3) ? A_2 : A_1;
622 Return TRUE if it is the case, FALSE otherwise. */
624 static bool
625 phi_convertible_by_degenerating_args (gphi *phi)
627 edge e;
628 tree arg, t1 = NULL, t2 = NULL;
629 unsigned int i, i1 = 0, i2 = 0, n1 = 0, n2 = 0;
630 unsigned int num_args = gimple_phi_num_args (phi);
632 gcc_assert (num_args > 2);
634 for (i = 0; i < num_args; i++)
636 arg = gimple_phi_arg_def (phi, i);
637 if (t1 == NULL || operand_equal_p (t1, arg, 0))
639 n1++;
640 i1 = i;
641 t1 = arg;
643 else if (t2 == NULL || operand_equal_p (t2, arg, 0))
645 n2++;
646 i2 = i;
647 t2 = arg;
649 else
650 return false;
653 if (n1 != 1 && n2 != 1)
654 return false;
656 /* Check if the edge corresponding to the unique arg is critical. */
657 e = gimple_phi_arg_edge (phi, (n1 == 1) ? i1 : i2);
658 if (EDGE_COUNT (e->src->succs) > 1)
659 return false;
661 return true;
664 /* Return true when PHI is if-convertible. PHI is part of loop LOOP
665 and it belongs to basic block BB. Note at this point, it is sure
666 that PHI is if-convertible. This function updates global variable
667 ANY_COMPLICATED_PHI if PHI is complicated. */
669 static bool
670 if_convertible_phi_p (class loop *loop, basic_block bb, gphi *phi)
672 if (dump_file && (dump_flags & TDF_DETAILS))
674 fprintf (dump_file, "-------------------------\n");
675 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
678 if (bb != loop->header
679 && gimple_phi_num_args (phi) > 2
680 && !phi_convertible_by_degenerating_args (phi))
681 any_complicated_phi = true;
683 return true;
686 /* Records the status of a data reference. This struct is attached to
687 each DR->aux field. */
689 struct ifc_dr {
690 bool rw_unconditionally;
691 bool w_unconditionally;
692 bool written_at_least_once;
694 tree rw_predicate;
695 tree w_predicate;
696 tree base_w_predicate;
699 #define IFC_DR(DR) ((struct ifc_dr *) (DR)->aux)
700 #define DR_BASE_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->written_at_least_once)
701 #define DR_RW_UNCONDITIONALLY(DR) (IFC_DR (DR)->rw_unconditionally)
702 #define DR_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->w_unconditionally)
704 /* Iterates over DR's and stores refs, DR and base refs, DR pairs in
705 HASH tables. While storing them in HASH table, it checks if the
706 reference is unconditionally read or written and stores that as a flag
707 information. For base reference it checks if it is written atlest once
708 unconditionally and stores it as flag information along with DR.
709 In other words for every data reference A in STMT there exist other
710 accesses to a data reference with the same base with predicates that
711 add up (OR-up) to the true predicate: this ensures that the data
712 reference A is touched (read or written) on every iteration of the
713 if-converted loop. */
714 static void
715 hash_memrefs_baserefs_and_store_DRs_read_written_info (data_reference_p a)
718 data_reference_p *master_dr, *base_master_dr;
719 tree base_ref = DR_BASE_OBJECT (a);
720 innermost_loop_behavior *innermost = &DR_INNERMOST (a);
721 tree ca = bb_predicate (gimple_bb (DR_STMT (a)));
722 bool exist1, exist2;
724 master_dr = &innermost_DR_map->get_or_insert (innermost, &exist1);
725 if (!exist1)
726 *master_dr = a;
728 if (DR_IS_WRITE (a))
730 IFC_DR (*master_dr)->w_predicate
731 = fold_or_predicates (UNKNOWN_LOCATION, ca,
732 IFC_DR (*master_dr)->w_predicate);
733 if (is_true_predicate (IFC_DR (*master_dr)->w_predicate))
734 DR_W_UNCONDITIONALLY (*master_dr) = true;
736 IFC_DR (*master_dr)->rw_predicate
737 = fold_or_predicates (UNKNOWN_LOCATION, ca,
738 IFC_DR (*master_dr)->rw_predicate);
739 if (is_true_predicate (IFC_DR (*master_dr)->rw_predicate))
740 DR_RW_UNCONDITIONALLY (*master_dr) = true;
742 if (DR_IS_WRITE (a))
744 base_master_dr = &baseref_DR_map->get_or_insert (base_ref, &exist2);
745 if (!exist2)
746 *base_master_dr = a;
747 IFC_DR (*base_master_dr)->base_w_predicate
748 = fold_or_predicates (UNKNOWN_LOCATION, ca,
749 IFC_DR (*base_master_dr)->base_w_predicate);
750 if (is_true_predicate (IFC_DR (*base_master_dr)->base_w_predicate))
751 DR_BASE_W_UNCONDITIONALLY (*base_master_dr) = true;
755 /* Return TRUE if can prove the index IDX of an array reference REF is
756 within array bound. Return false otherwise. */
758 static bool
759 idx_within_array_bound (tree ref, tree *idx, void *dta)
761 wi::overflow_type overflow;
762 widest_int niter, valid_niter, delta, wi_step;
763 tree ev, init, step;
764 tree low, high;
765 class loop *loop = (class loop*) dta;
767 /* Only support within-bound access for array references. */
768 if (TREE_CODE (ref) != ARRAY_REF)
769 return false;
771 /* For arrays at the end of the structure, we are not guaranteed that they
772 do not really extend over their declared size. However, for arrays of
773 size greater than one, this is unlikely to be intended. */
774 if (array_at_struct_end_p (ref))
775 return false;
777 ev = analyze_scalar_evolution (loop, *idx);
778 ev = instantiate_parameters (loop, ev);
779 init = initial_condition (ev);
780 step = evolution_part_in_loop_num (ev, loop->num);
782 if (!init || TREE_CODE (init) != INTEGER_CST
783 || (step && TREE_CODE (step) != INTEGER_CST))
784 return false;
786 low = array_ref_low_bound (ref);
787 high = array_ref_up_bound (ref);
789 /* The case of nonconstant bounds could be handled, but it would be
790 complicated. */
791 if (TREE_CODE (low) != INTEGER_CST
792 || !high || TREE_CODE (high) != INTEGER_CST)
793 return false;
795 /* Check if the intial idx is within bound. */
796 if (wi::to_widest (init) < wi::to_widest (low)
797 || wi::to_widest (init) > wi::to_widest (high))
798 return false;
800 /* The idx is always within bound. */
801 if (!step || integer_zerop (step))
802 return true;
804 if (!max_loop_iterations (loop, &niter))
805 return false;
807 if (wi::to_widest (step) < 0)
809 delta = wi::to_widest (init) - wi::to_widest (low);
810 wi_step = -wi::to_widest (step);
812 else
814 delta = wi::to_widest (high) - wi::to_widest (init);
815 wi_step = wi::to_widest (step);
818 valid_niter = wi::div_floor (delta, wi_step, SIGNED, &overflow);
819 /* The iteration space of idx is within array bound. */
820 if (!overflow && niter <= valid_niter)
821 return true;
823 return false;
826 /* Return TRUE if ref is a within bound array reference. */
828 static bool
829 ref_within_array_bound (gimple *stmt, tree ref)
831 class loop *loop = loop_containing_stmt (stmt);
833 gcc_assert (loop != NULL);
834 return for_each_index (&ref, idx_within_array_bound, loop);
838 /* Given a memory reference expression T, return TRUE if base object
839 it refers to is writable. The base object of a memory reference
840 is the main object being referenced, which is returned by function
841 get_base_address. */
843 static bool
844 base_object_writable (tree ref)
846 tree base_tree = get_base_address (ref);
848 return (base_tree
849 && DECL_P (base_tree)
850 && decl_binds_to_current_def_p (base_tree)
851 && !TREE_READONLY (base_tree));
854 /* Return true when the memory references of STMT won't trap in the
855 if-converted code. There are two things that we have to check for:
857 - writes to memory occur to writable memory: if-conversion of
858 memory writes transforms the conditional memory writes into
859 unconditional writes, i.e. "if (cond) A[i] = foo" is transformed
860 into "A[i] = cond ? foo : A[i]", and as the write to memory may not
861 be executed at all in the original code, it may be a readonly
862 memory. To check that A is not const-qualified, we check that
863 there exists at least an unconditional write to A in the current
864 function.
866 - reads or writes to memory are valid memory accesses for every
867 iteration. To check that the memory accesses are correctly formed
868 and that we are allowed to read and write in these locations, we
869 check that the memory accesses to be if-converted occur at every
870 iteration unconditionally.
872 Returns true for the memory reference in STMT, same memory reference
873 is read or written unconditionally atleast once and the base memory
874 reference is written unconditionally once. This is to check reference
875 will not write fault. Also retuns true if the memory reference is
876 unconditionally read once then we are conditionally writing to memory
877 which is defined as read and write and is bound to the definition
878 we are seeing. */
879 static bool
880 ifcvt_memrefs_wont_trap (gimple *stmt, vec<data_reference_p> drs)
882 /* If DR didn't see a reference here we can't use it to tell
883 whether the ref traps or not. */
884 if (gimple_uid (stmt) == 0)
885 return false;
887 data_reference_p *master_dr, *base_master_dr;
888 data_reference_p a = drs[gimple_uid (stmt) - 1];
890 tree base = DR_BASE_OBJECT (a);
891 innermost_loop_behavior *innermost = &DR_INNERMOST (a);
893 gcc_assert (DR_STMT (a) == stmt);
894 gcc_assert (DR_BASE_ADDRESS (a) || DR_OFFSET (a)
895 || DR_INIT (a) || DR_STEP (a));
897 master_dr = innermost_DR_map->get (innermost);
898 gcc_assert (master_dr != NULL);
900 base_master_dr = baseref_DR_map->get (base);
902 /* If a is unconditionally written to it doesn't trap. */
903 if (DR_W_UNCONDITIONALLY (*master_dr))
904 return true;
906 /* If a is unconditionally accessed then ...
908 Even a is conditional access, we can treat it as an unconditional
909 one if it's an array reference and all its index are within array
910 bound. */
911 if (DR_RW_UNCONDITIONALLY (*master_dr)
912 || ref_within_array_bound (stmt, DR_REF (a)))
914 /* an unconditional read won't trap. */
915 if (DR_IS_READ (a))
916 return true;
918 /* an unconditionaly write won't trap if the base is written
919 to unconditionally. */
920 if (base_master_dr
921 && DR_BASE_W_UNCONDITIONALLY (*base_master_dr))
922 return flag_store_data_races;
923 /* or the base is known to be not readonly. */
924 else if (base_object_writable (DR_REF (a)))
925 return flag_store_data_races;
928 return false;
931 /* Return true if STMT could be converted into a masked load or store
932 (conditional load or store based on a mask computed from bb predicate). */
934 static bool
935 ifcvt_can_use_mask_load_store (gimple *stmt)
937 /* Check whether this is a load or store. */
938 tree lhs = gimple_assign_lhs (stmt);
939 bool is_load;
940 tree ref;
941 if (gimple_store_p (stmt))
943 if (!is_gimple_val (gimple_assign_rhs1 (stmt)))
944 return false;
945 is_load = false;
946 ref = lhs;
948 else if (gimple_assign_load_p (stmt))
950 is_load = true;
951 ref = gimple_assign_rhs1 (stmt);
953 else
954 return false;
956 if (may_be_nonaddressable_p (ref))
957 return false;
959 /* Mask should be integer mode of the same size as the load/store
960 mode. */
961 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
962 if (!int_mode_for_mode (mode).exists () || VECTOR_MODE_P (mode))
963 return false;
965 if (can_vec_mask_load_store_p (mode, VOIDmode, is_load))
966 return true;
968 return false;
971 /* Return true if STMT could be converted from an operation that is
972 unconditional to one that is conditional on a bb predicate mask. */
974 static bool
975 ifcvt_can_predicate (gimple *stmt)
977 basic_block bb = gimple_bb (stmt);
979 if (!(flag_tree_loop_vectorize || bb->loop_father->force_vectorize)
980 || bb->loop_father->dont_vectorize
981 || gimple_has_volatile_ops (stmt))
982 return false;
984 if (gimple_assign_single_p (stmt))
985 return ifcvt_can_use_mask_load_store (stmt);
987 tree_code code = gimple_assign_rhs_code (stmt);
988 tree lhs_type = TREE_TYPE (gimple_assign_lhs (stmt));
989 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
990 if (!types_compatible_p (lhs_type, rhs_type))
991 return false;
992 internal_fn cond_fn = get_conditional_internal_fn (code);
993 return (cond_fn != IFN_LAST
994 && vectorized_internal_fn_supported_p (cond_fn, lhs_type));
997 /* Return true when STMT is if-convertible.
999 GIMPLE_ASSIGN statement is not if-convertible if,
1000 - it is not movable,
1001 - it could trap,
1002 - LHS is not var decl. */
1004 static bool
1005 if_convertible_gimple_assign_stmt_p (gimple *stmt,
1006 vec<data_reference_p> refs)
1008 tree lhs = gimple_assign_lhs (stmt);
1010 if (dump_file && (dump_flags & TDF_DETAILS))
1012 fprintf (dump_file, "-------------------------\n");
1013 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1016 if (!is_gimple_reg_type (TREE_TYPE (lhs)))
1017 return false;
1019 /* Some of these constrains might be too conservative. */
1020 if (stmt_ends_bb_p (stmt)
1021 || gimple_has_volatile_ops (stmt)
1022 || (TREE_CODE (lhs) == SSA_NAME
1023 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
1024 || gimple_has_side_effects (stmt))
1026 if (dump_file && (dump_flags & TDF_DETAILS))
1027 fprintf (dump_file, "stmt not suitable for ifcvt\n");
1028 return false;
1031 /* tree-into-ssa.c uses GF_PLF_1, so avoid it, because
1032 in between if_convertible_loop_p and combine_blocks
1033 we can perform loop versioning. */
1034 gimple_set_plf (stmt, GF_PLF_2, false);
1036 if ((! gimple_vuse (stmt)
1037 || gimple_could_trap_p_1 (stmt, false, false)
1038 || ! ifcvt_memrefs_wont_trap (stmt, refs))
1039 && gimple_could_trap_p (stmt))
1041 if (ifcvt_can_predicate (stmt))
1043 gimple_set_plf (stmt, GF_PLF_2, true);
1044 need_to_predicate = true;
1045 return true;
1047 if (dump_file && (dump_flags & TDF_DETAILS))
1048 fprintf (dump_file, "tree could trap...\n");
1049 return false;
1051 else if ((INTEGRAL_TYPE_P (TREE_TYPE (lhs))
1052 || POINTER_TYPE_P (TREE_TYPE (lhs)))
1053 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs))
1054 && arith_code_with_undefined_signed_overflow
1055 (gimple_assign_rhs_code (stmt)))
1056 /* We have to rewrite stmts with undefined overflow. */
1057 need_to_rewrite_undefined = true;
1059 /* When if-converting stores force versioning, likewise if we
1060 ended up generating store data races. */
1061 if (gimple_vdef (stmt))
1062 need_to_predicate = true;
1064 return true;
1067 /* Return true when STMT is if-convertible.
1069 A statement is if-convertible if:
1070 - it is an if-convertible GIMPLE_ASSIGN,
1071 - it is a GIMPLE_LABEL or a GIMPLE_COND,
1072 - it is builtins call. */
1074 static bool
1075 if_convertible_stmt_p (gimple *stmt, vec<data_reference_p> refs)
1077 switch (gimple_code (stmt))
1079 case GIMPLE_LABEL:
1080 case GIMPLE_DEBUG:
1081 case GIMPLE_COND:
1082 return true;
1084 case GIMPLE_ASSIGN:
1085 return if_convertible_gimple_assign_stmt_p (stmt, refs);
1087 case GIMPLE_CALL:
1089 tree fndecl = gimple_call_fndecl (stmt);
1090 if (fndecl)
1092 int flags = gimple_call_flags (stmt);
1093 if ((flags & ECF_CONST)
1094 && !(flags & ECF_LOOPING_CONST_OR_PURE)
1095 /* We can only vectorize some builtins at the moment,
1096 so restrict if-conversion to those. */
1097 && fndecl_built_in_p (fndecl))
1098 return true;
1100 return false;
1103 default:
1104 /* Don't know what to do with 'em so don't do anything. */
1105 if (dump_file && (dump_flags & TDF_DETAILS))
1107 fprintf (dump_file, "don't know what to do\n");
1108 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1110 return false;
1113 return true;
1116 /* Assumes that BB has more than 1 predecessors.
1117 Returns false if at least one successor is not on critical edge
1118 and true otherwise. */
1120 static inline bool
1121 all_preds_critical_p (basic_block bb)
1123 edge e;
1124 edge_iterator ei;
1126 FOR_EACH_EDGE (e, ei, bb->preds)
1127 if (EDGE_COUNT (e->src->succs) == 1)
1128 return false;
1129 return true;
1132 /* Return true when BB is if-convertible. This routine does not check
1133 basic block's statements and phis.
1135 A basic block is not if-convertible if:
1136 - it is non-empty and it is after the exit block (in BFS order),
1137 - it is after the exit block but before the latch,
1138 - its edges are not normal.
1140 EXIT_BB is the basic block containing the exit of the LOOP. BB is
1141 inside LOOP. */
1143 static bool
1144 if_convertible_bb_p (class loop *loop, basic_block bb, basic_block exit_bb)
1146 edge e;
1147 edge_iterator ei;
1149 if (dump_file && (dump_flags & TDF_DETAILS))
1150 fprintf (dump_file, "----------[%d]-------------\n", bb->index);
1152 if (EDGE_COUNT (bb->succs) > 2)
1153 return false;
1155 gimple *last = last_stmt (bb);
1156 if (gcall *call = safe_dyn_cast <gcall *> (last))
1157 if (gimple_call_ctrl_altering_p (call))
1158 return false;
1160 if (exit_bb)
1162 if (bb != loop->latch)
1164 if (dump_file && (dump_flags & TDF_DETAILS))
1165 fprintf (dump_file, "basic block after exit bb but before latch\n");
1166 return false;
1168 else if (!empty_block_p (bb))
1170 if (dump_file && (dump_flags & TDF_DETAILS))
1171 fprintf (dump_file, "non empty basic block after exit bb\n");
1172 return false;
1174 else if (bb == loop->latch
1175 && bb != exit_bb
1176 && !dominated_by_p (CDI_DOMINATORS, bb, exit_bb))
1178 if (dump_file && (dump_flags & TDF_DETAILS))
1179 fprintf (dump_file, "latch is not dominated by exit_block\n");
1180 return false;
1184 /* Be less adventurous and handle only normal edges. */
1185 FOR_EACH_EDGE (e, ei, bb->succs)
1186 if (e->flags & (EDGE_EH | EDGE_ABNORMAL | EDGE_IRREDUCIBLE_LOOP))
1188 if (dump_file && (dump_flags & TDF_DETAILS))
1189 fprintf (dump_file, "Difficult to handle edges\n");
1190 return false;
1193 return true;
1196 /* Return true when all predecessor blocks of BB are visited. The
1197 VISITED bitmap keeps track of the visited blocks. */
1199 static bool
1200 pred_blocks_visited_p (basic_block bb, bitmap *visited)
1202 edge e;
1203 edge_iterator ei;
1204 FOR_EACH_EDGE (e, ei, bb->preds)
1205 if (!bitmap_bit_p (*visited, e->src->index))
1206 return false;
1208 return true;
1211 /* Get body of a LOOP in suitable order for if-conversion. It is
1212 caller's responsibility to deallocate basic block list.
1213 If-conversion suitable order is, breadth first sort (BFS) order
1214 with an additional constraint: select a block only if all its
1215 predecessors are already selected. */
1217 static basic_block *
1218 get_loop_body_in_if_conv_order (const class loop *loop)
1220 basic_block *blocks, *blocks_in_bfs_order;
1221 basic_block bb;
1222 bitmap visited;
1223 unsigned int index = 0;
1224 unsigned int visited_count = 0;
1226 gcc_assert (loop->num_nodes);
1227 gcc_assert (loop->latch != EXIT_BLOCK_PTR_FOR_FN (cfun));
1229 blocks = XCNEWVEC (basic_block, loop->num_nodes);
1230 visited = BITMAP_ALLOC (NULL);
1232 blocks_in_bfs_order = get_loop_body_in_bfs_order (loop);
1234 index = 0;
1235 while (index < loop->num_nodes)
1237 bb = blocks_in_bfs_order [index];
1239 if (bb->flags & BB_IRREDUCIBLE_LOOP)
1241 free (blocks_in_bfs_order);
1242 BITMAP_FREE (visited);
1243 free (blocks);
1244 return NULL;
1247 if (!bitmap_bit_p (visited, bb->index))
1249 if (pred_blocks_visited_p (bb, &visited)
1250 || bb == loop->header)
1252 /* This block is now visited. */
1253 bitmap_set_bit (visited, bb->index);
1254 blocks[visited_count++] = bb;
1258 index++;
1260 if (index == loop->num_nodes
1261 && visited_count != loop->num_nodes)
1262 /* Not done yet. */
1263 index = 0;
1265 free (blocks_in_bfs_order);
1266 BITMAP_FREE (visited);
1267 return blocks;
1270 /* Returns true when the analysis of the predicates for all the basic
1271 blocks in LOOP succeeded.
1273 predicate_bbs first allocates the predicates of the basic blocks.
1274 These fields are then initialized with the tree expressions
1275 representing the predicates under which a basic block is executed
1276 in the LOOP. As the loop->header is executed at each iteration, it
1277 has the "true" predicate. Other statements executed under a
1278 condition are predicated with that condition, for example
1280 | if (x)
1281 | S1;
1282 | else
1283 | S2;
1285 S1 will be predicated with "x", and
1286 S2 will be predicated with "!x". */
1288 static void
1289 predicate_bbs (loop_p loop)
1291 unsigned int i;
1293 for (i = 0; i < loop->num_nodes; i++)
1294 init_bb_predicate (ifc_bbs[i]);
1296 for (i = 0; i < loop->num_nodes; i++)
1298 basic_block bb = ifc_bbs[i];
1299 tree cond;
1300 gimple *stmt;
1302 /* The loop latch and loop exit block are always executed and
1303 have no extra conditions to be processed: skip them. */
1304 if (bb == loop->latch
1305 || bb_with_exit_edge_p (loop, bb))
1307 reset_bb_predicate (bb);
1308 continue;
1311 cond = bb_predicate (bb);
1312 stmt = last_stmt (bb);
1313 if (stmt && gimple_code (stmt) == GIMPLE_COND)
1315 tree c2;
1316 edge true_edge, false_edge;
1317 location_t loc = gimple_location (stmt);
1318 tree c = build2_loc (loc, gimple_cond_code (stmt),
1319 boolean_type_node,
1320 gimple_cond_lhs (stmt),
1321 gimple_cond_rhs (stmt));
1323 /* Add new condition into destination's predicate list. */
1324 extract_true_false_edges_from_block (gimple_bb (stmt),
1325 &true_edge, &false_edge);
1327 /* If C is true, then TRUE_EDGE is taken. */
1328 add_to_dst_predicate_list (loop, true_edge, unshare_expr (cond),
1329 unshare_expr (c));
1331 /* If C is false, then FALSE_EDGE is taken. */
1332 c2 = build1_loc (loc, TRUTH_NOT_EXPR, boolean_type_node,
1333 unshare_expr (c));
1334 add_to_dst_predicate_list (loop, false_edge,
1335 unshare_expr (cond), c2);
1337 cond = NULL_TREE;
1340 /* If current bb has only one successor, then consider it as an
1341 unconditional goto. */
1342 if (single_succ_p (bb))
1344 basic_block bb_n = single_succ (bb);
1346 /* The successor bb inherits the predicate of its
1347 predecessor. If there is no predicate in the predecessor
1348 bb, then consider the successor bb as always executed. */
1349 if (cond == NULL_TREE)
1350 cond = boolean_true_node;
1352 add_to_predicate_list (loop, bb_n, cond);
1356 /* The loop header is always executed. */
1357 reset_bb_predicate (loop->header);
1358 gcc_assert (bb_predicate_gimplified_stmts (loop->header) == NULL
1359 && bb_predicate_gimplified_stmts (loop->latch) == NULL);
1362 /* Build region by adding loop pre-header and post-header blocks. */
1364 static vec<basic_block>
1365 build_region (class loop *loop)
1367 vec<basic_block> region = vNULL;
1368 basic_block exit_bb = NULL;
1370 gcc_assert (ifc_bbs);
1371 /* The first element is loop pre-header. */
1372 region.safe_push (loop_preheader_edge (loop)->src);
1374 for (unsigned int i = 0; i < loop->num_nodes; i++)
1376 basic_block bb = ifc_bbs[i];
1377 region.safe_push (bb);
1378 /* Find loop postheader. */
1379 edge e;
1380 edge_iterator ei;
1381 FOR_EACH_EDGE (e, ei, bb->succs)
1382 if (loop_exit_edge_p (loop, e))
1384 exit_bb = e->dest;
1385 break;
1388 /* The last element is loop post-header. */
1389 gcc_assert (exit_bb);
1390 region.safe_push (exit_bb);
1391 return region;
1394 /* Return true when LOOP is if-convertible. This is a helper function
1395 for if_convertible_loop_p. REFS and DDRS are initialized and freed
1396 in if_convertible_loop_p. */
1398 static bool
1399 if_convertible_loop_p_1 (class loop *loop, vec<data_reference_p> *refs)
1401 unsigned int i;
1402 basic_block exit_bb = NULL;
1403 vec<basic_block> region;
1405 if (find_data_references_in_loop (loop, refs) == chrec_dont_know)
1406 return false;
1408 calculate_dominance_info (CDI_DOMINATORS);
1410 /* Allow statements that can be handled during if-conversion. */
1411 ifc_bbs = get_loop_body_in_if_conv_order (loop);
1412 if (!ifc_bbs)
1414 if (dump_file && (dump_flags & TDF_DETAILS))
1415 fprintf (dump_file, "Irreducible loop\n");
1416 return false;
1419 for (i = 0; i < loop->num_nodes; i++)
1421 basic_block bb = ifc_bbs[i];
1423 if (!if_convertible_bb_p (loop, bb, exit_bb))
1424 return false;
1426 if (bb_with_exit_edge_p (loop, bb))
1427 exit_bb = bb;
1430 for (i = 0; i < loop->num_nodes; i++)
1432 basic_block bb = ifc_bbs[i];
1433 gimple_stmt_iterator gsi;
1435 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1436 switch (gimple_code (gsi_stmt (gsi)))
1438 case GIMPLE_LABEL:
1439 case GIMPLE_ASSIGN:
1440 case GIMPLE_CALL:
1441 case GIMPLE_DEBUG:
1442 case GIMPLE_COND:
1443 gimple_set_uid (gsi_stmt (gsi), 0);
1444 break;
1445 default:
1446 return false;
1450 data_reference_p dr;
1452 innermost_DR_map
1453 = new hash_map<innermost_loop_behavior_hash, data_reference_p>;
1454 baseref_DR_map = new hash_map<tree_operand_hash, data_reference_p>;
1456 /* Compute post-dominator tree locally. */
1457 region = build_region (loop);
1458 calculate_dominance_info_for_region (CDI_POST_DOMINATORS, region);
1460 predicate_bbs (loop);
1462 /* Free post-dominator tree since it is not used after predication. */
1463 free_dominance_info_for_region (cfun, CDI_POST_DOMINATORS, region);
1464 region.release ();
1466 for (i = 0; refs->iterate (i, &dr); i++)
1468 tree ref = DR_REF (dr);
1470 dr->aux = XNEW (struct ifc_dr);
1471 DR_BASE_W_UNCONDITIONALLY (dr) = false;
1472 DR_RW_UNCONDITIONALLY (dr) = false;
1473 DR_W_UNCONDITIONALLY (dr) = false;
1474 IFC_DR (dr)->rw_predicate = boolean_false_node;
1475 IFC_DR (dr)->w_predicate = boolean_false_node;
1476 IFC_DR (dr)->base_w_predicate = boolean_false_node;
1477 if (gimple_uid (DR_STMT (dr)) == 0)
1478 gimple_set_uid (DR_STMT (dr), i + 1);
1480 /* If DR doesn't have innermost loop behavior or it's a compound
1481 memory reference, we synthesize its innermost loop behavior
1482 for hashing. */
1483 if (TREE_CODE (ref) == COMPONENT_REF
1484 || TREE_CODE (ref) == IMAGPART_EXPR
1485 || TREE_CODE (ref) == REALPART_EXPR
1486 || !(DR_BASE_ADDRESS (dr) || DR_OFFSET (dr)
1487 || DR_INIT (dr) || DR_STEP (dr)))
1489 while (TREE_CODE (ref) == COMPONENT_REF
1490 || TREE_CODE (ref) == IMAGPART_EXPR
1491 || TREE_CODE (ref) == REALPART_EXPR)
1492 ref = TREE_OPERAND (ref, 0);
1494 memset (&DR_INNERMOST (dr), 0, sizeof (DR_INNERMOST (dr)));
1495 DR_BASE_ADDRESS (dr) = ref;
1497 hash_memrefs_baserefs_and_store_DRs_read_written_info (dr);
1500 for (i = 0; i < loop->num_nodes; i++)
1502 basic_block bb = ifc_bbs[i];
1503 gimple_stmt_iterator itr;
1505 /* Check the if-convertibility of statements in predicated BBs. */
1506 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1507 for (itr = gsi_start_bb (bb); !gsi_end_p (itr); gsi_next (&itr))
1508 if (!if_convertible_stmt_p (gsi_stmt (itr), *refs))
1509 return false;
1512 /* Checking PHIs needs to be done after stmts, as the fact whether there
1513 are any masked loads or stores affects the tests. */
1514 for (i = 0; i < loop->num_nodes; i++)
1516 basic_block bb = ifc_bbs[i];
1517 gphi_iterator itr;
1519 for (itr = gsi_start_phis (bb); !gsi_end_p (itr); gsi_next (&itr))
1520 if (!if_convertible_phi_p (loop, bb, itr.phi ()))
1521 return false;
1524 if (dump_file)
1525 fprintf (dump_file, "Applying if-conversion\n");
1527 return true;
1530 /* Return true when LOOP is if-convertible.
1531 LOOP is if-convertible if:
1532 - it is innermost,
1533 - it has two or more basic blocks,
1534 - it has only one exit,
1535 - loop header is not the exit edge,
1536 - if its basic blocks and phi nodes are if convertible. */
1538 static bool
1539 if_convertible_loop_p (class loop *loop)
1541 edge e;
1542 edge_iterator ei;
1543 bool res = false;
1544 vec<data_reference_p> refs;
1546 /* Handle only innermost loop. */
1547 if (!loop || loop->inner)
1549 if (dump_file && (dump_flags & TDF_DETAILS))
1550 fprintf (dump_file, "not innermost loop\n");
1551 return false;
1554 /* If only one block, no need for if-conversion. */
1555 if (loop->num_nodes <= 2)
1557 if (dump_file && (dump_flags & TDF_DETAILS))
1558 fprintf (dump_file, "less than 2 basic blocks\n");
1559 return false;
1562 /* More than one loop exit is too much to handle. */
1563 if (!single_exit (loop))
1565 if (dump_file && (dump_flags & TDF_DETAILS))
1566 fprintf (dump_file, "multiple exits\n");
1567 return false;
1570 /* If one of the loop header's edge is an exit edge then do not
1571 apply if-conversion. */
1572 FOR_EACH_EDGE (e, ei, loop->header->succs)
1573 if (loop_exit_edge_p (loop, e))
1574 return false;
1576 refs.create (5);
1577 res = if_convertible_loop_p_1 (loop, &refs);
1579 data_reference_p dr;
1580 unsigned int i;
1581 for (i = 0; refs.iterate (i, &dr); i++)
1582 free (dr->aux);
1584 free_data_refs (refs);
1586 delete innermost_DR_map;
1587 innermost_DR_map = NULL;
1589 delete baseref_DR_map;
1590 baseref_DR_map = NULL;
1592 return res;
1595 /* Return reduc_1 if has_nop.
1597 if (...)
1598 tmp1 = (unsigned type) reduc_1;
1599 tmp2 = tmp1 + rhs2;
1600 reduc_3 = (signed type) tmp2. */
1601 static tree
1602 strip_nop_cond_scalar_reduction (bool has_nop, tree op)
1604 if (!has_nop)
1605 return op;
1607 if (TREE_CODE (op) != SSA_NAME)
1608 return NULL_TREE;
1610 gassign *stmt = safe_dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op));
1611 if (!stmt
1612 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt))
1613 || !tree_nop_conversion_p (TREE_TYPE (op), TREE_TYPE
1614 (gimple_assign_rhs1 (stmt))))
1615 return NULL_TREE;
1617 return gimple_assign_rhs1 (stmt);
1620 /* Returns true if def-stmt for phi argument ARG is simple increment/decrement
1621 which is in predicated basic block.
1622 In fact, the following PHI pattern is searching:
1623 loop-header:
1624 reduc_1 = PHI <..., reduc_2>
1626 if (...)
1627 reduc_3 = ...
1628 reduc_2 = PHI <reduc_1, reduc_3>
1630 ARG_0 and ARG_1 are correspondent PHI arguments.
1631 REDUC, OP0 and OP1 contain reduction stmt and its operands.
1632 EXTENDED is true if PHI has > 2 arguments. */
1634 static bool
1635 is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1,
1636 tree *op0, tree *op1, bool extended, bool* has_nop,
1637 gimple **nop_reduc)
1639 tree lhs, r_op1, r_op2, r_nop1, r_nop2;
1640 gimple *stmt;
1641 gimple *header_phi = NULL;
1642 enum tree_code reduction_op;
1643 basic_block bb = gimple_bb (phi);
1644 class loop *loop = bb->loop_father;
1645 edge latch_e = loop_latch_edge (loop);
1646 imm_use_iterator imm_iter;
1647 use_operand_p use_p;
1648 edge e;
1649 edge_iterator ei;
1650 bool result = *has_nop = false;
1651 if (TREE_CODE (arg_0) != SSA_NAME || TREE_CODE (arg_1) != SSA_NAME)
1652 return false;
1654 if (!extended && gimple_code (SSA_NAME_DEF_STMT (arg_0)) == GIMPLE_PHI)
1656 lhs = arg_1;
1657 header_phi = SSA_NAME_DEF_STMT (arg_0);
1658 stmt = SSA_NAME_DEF_STMT (arg_1);
1660 else if (gimple_code (SSA_NAME_DEF_STMT (arg_1)) == GIMPLE_PHI)
1662 lhs = arg_0;
1663 header_phi = SSA_NAME_DEF_STMT (arg_1);
1664 stmt = SSA_NAME_DEF_STMT (arg_0);
1666 else
1667 return false;
1668 if (gimple_bb (header_phi) != loop->header)
1669 return false;
1671 if (PHI_ARG_DEF_FROM_EDGE (header_phi, latch_e) != PHI_RESULT (phi))
1672 return false;
1674 if (gimple_code (stmt) != GIMPLE_ASSIGN
1675 || gimple_has_volatile_ops (stmt))
1676 return false;
1678 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
1679 return false;
1681 if (!is_predicated (gimple_bb (stmt)))
1682 return false;
1684 /* Check that stmt-block is predecessor of phi-block. */
1685 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->succs)
1686 if (e->dest == bb)
1688 result = true;
1689 break;
1691 if (!result)
1692 return false;
1694 if (!has_single_use (lhs))
1695 return false;
1697 reduction_op = gimple_assign_rhs_code (stmt);
1699 /* Catch something like below
1701 loop-header:
1702 reduc_1 = PHI <..., reduc_2>
1704 if (...)
1705 tmp1 = (unsigned type) reduc_1;
1706 tmp2 = tmp1 + rhs2;
1707 reduc_3 = (signed type) tmp2;
1709 reduc_2 = PHI <reduc_1, reduc_3>
1711 and convert to
1713 reduc_2 = PHI <0, reduc_3>
1714 tmp1 = (unsigned type)reduce_1;
1715 ifcvt = cond_expr ? rhs2 : 0
1716 tmp2 = tmp1 +/- ifcvt;
1717 reduce_1 = (signed type)tmp2; */
1719 if (CONVERT_EXPR_CODE_P (reduction_op))
1721 lhs = gimple_assign_rhs1 (stmt);
1722 if (TREE_CODE (lhs) != SSA_NAME
1723 || !has_single_use (lhs))
1724 return false;
1726 *nop_reduc = stmt;
1727 stmt = SSA_NAME_DEF_STMT (lhs);
1728 if (gimple_bb (stmt) != gimple_bb (*nop_reduc)
1729 || !is_gimple_assign (stmt))
1730 return false;
1732 *has_nop = true;
1733 reduction_op = gimple_assign_rhs_code (stmt);
1736 if (reduction_op != PLUS_EXPR
1737 && reduction_op != MINUS_EXPR
1738 && reduction_op != BIT_IOR_EXPR
1739 && reduction_op != BIT_XOR_EXPR
1740 && reduction_op != BIT_AND_EXPR)
1741 return false;
1742 r_op1 = gimple_assign_rhs1 (stmt);
1743 r_op2 = gimple_assign_rhs2 (stmt);
1745 r_nop1 = strip_nop_cond_scalar_reduction (*has_nop, r_op1);
1746 r_nop2 = strip_nop_cond_scalar_reduction (*has_nop, r_op2);
1748 /* Make R_OP1 to hold reduction variable. */
1749 if (r_nop2 == PHI_RESULT (header_phi)
1750 && commutative_tree_code (reduction_op))
1752 std::swap (r_op1, r_op2);
1753 std::swap (r_nop1, r_nop2);
1755 else if (r_nop1 != PHI_RESULT (header_phi))
1756 return false;
1758 if (*has_nop)
1760 /* Check that R_NOP1 is used in nop_stmt or in PHI only. */
1761 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_nop1)
1763 gimple *use_stmt = USE_STMT (use_p);
1764 if (is_gimple_debug (use_stmt))
1765 continue;
1766 if (use_stmt == SSA_NAME_DEF_STMT (r_op1))
1767 continue;
1768 if (use_stmt != phi)
1769 return false;
1773 /* Check that R_OP1 is used in reduction stmt or in PHI only. */
1774 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_op1)
1776 gimple *use_stmt = USE_STMT (use_p);
1777 if (is_gimple_debug (use_stmt))
1778 continue;
1779 if (use_stmt == stmt)
1780 continue;
1781 if (gimple_code (use_stmt) != GIMPLE_PHI)
1782 return false;
1785 *op0 = r_op1; *op1 = r_op2;
1786 *reduc = stmt;
1787 return true;
1790 /* Converts conditional scalar reduction into unconditional form, e.g.
1791 bb_4
1792 if (_5 != 0) goto bb_5 else goto bb_6
1793 end_bb_4
1794 bb_5
1795 res_6 = res_13 + 1;
1796 end_bb_5
1797 bb_6
1798 # res_2 = PHI <res_13(4), res_6(5)>
1799 end_bb_6
1801 will be converted into sequence
1802 _ifc__1 = _5 != 0 ? 1 : 0;
1803 res_2 = res_13 + _ifc__1;
1804 Argument SWAP tells that arguments of conditional expression should be
1805 swapped.
1806 Returns rhs of resulting PHI assignment. */
1808 static tree
1809 convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi,
1810 tree cond, tree op0, tree op1, bool swap,
1811 bool has_nop, gimple* nop_reduc)
1813 gimple_stmt_iterator stmt_it;
1814 gimple *new_assign;
1815 tree rhs;
1816 tree rhs1 = gimple_assign_rhs1 (reduc);
1817 tree tmp = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, "_ifc_");
1818 tree c;
1819 enum tree_code reduction_op = gimple_assign_rhs_code (reduc);
1820 tree op_nochange = neutral_op_for_reduction (TREE_TYPE (rhs1), reduction_op, NULL);
1821 gimple_seq stmts = NULL;
1823 if (dump_file && (dump_flags & TDF_DETAILS))
1825 fprintf (dump_file, "Found cond scalar reduction.\n");
1826 print_gimple_stmt (dump_file, reduc, 0, TDF_SLIM);
1829 /* Build cond expression using COND and constant operand
1830 of reduction rhs. */
1831 c = fold_build_cond_expr (TREE_TYPE (rhs1),
1832 unshare_expr (cond),
1833 swap ? op_nochange : op1,
1834 swap ? op1 : op_nochange);
1836 /* Create assignment stmt and insert it at GSI. */
1837 new_assign = gimple_build_assign (tmp, c);
1838 gsi_insert_before (gsi, new_assign, GSI_SAME_STMT);
1839 /* Build rhs for unconditional increment/decrement/logic_operation. */
1840 rhs = gimple_build (&stmts, reduction_op,
1841 TREE_TYPE (rhs1), op0, tmp);
1843 if (has_nop)
1845 rhs = gimple_convert (&stmts,
1846 TREE_TYPE (gimple_assign_lhs (nop_reduc)), rhs);
1847 stmt_it = gsi_for_stmt (nop_reduc);
1848 gsi_remove (&stmt_it, true);
1849 release_defs (nop_reduc);
1851 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
1853 /* Delete original reduction stmt. */
1854 stmt_it = gsi_for_stmt (reduc);
1855 gsi_remove (&stmt_it, true);
1856 release_defs (reduc);
1857 return rhs;
1860 /* Produce condition for all occurrences of ARG in PHI node. */
1862 static tree
1863 gen_phi_arg_condition (gphi *phi, vec<int> *occur,
1864 gimple_stmt_iterator *gsi)
1866 int len;
1867 int i;
1868 tree cond = NULL_TREE;
1869 tree c;
1870 edge e;
1872 len = occur->length ();
1873 gcc_assert (len > 0);
1874 for (i = 0; i < len; i++)
1876 e = gimple_phi_arg_edge (phi, (*occur)[i]);
1877 c = bb_predicate (e->src);
1878 if (is_true_predicate (c))
1880 cond = c;
1881 break;
1883 c = force_gimple_operand_gsi_1 (gsi, unshare_expr (c),
1884 is_gimple_condexpr, NULL_TREE,
1885 true, GSI_SAME_STMT);
1886 if (cond != NULL_TREE)
1888 /* Must build OR expression. */
1889 cond = fold_or_predicates (EXPR_LOCATION (c), c, cond);
1890 cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond),
1891 is_gimple_condexpr, NULL_TREE,
1892 true, GSI_SAME_STMT);
1894 else
1895 cond = c;
1897 gcc_assert (cond != NULL_TREE);
1898 return cond;
1901 /* Local valueization callback that follows all-use SSA edges. */
1903 static tree
1904 ifcvt_follow_ssa_use_edges (tree val)
1906 return val;
1909 /* Replace a scalar PHI node with a COND_EXPR using COND as condition.
1910 This routine can handle PHI nodes with more than two arguments.
1912 For example,
1913 S1: A = PHI <x1(1), x2(5)>
1914 is converted into,
1915 S2: A = cond ? x1 : x2;
1917 The generated code is inserted at GSI that points to the top of
1918 basic block's statement list.
1919 If PHI node has more than two arguments a chain of conditional
1920 expression is produced. */
1923 static void
1924 predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
1926 gimple *new_stmt = NULL, *reduc, *nop_reduc;
1927 tree rhs, res, arg0, arg1, op0, op1, scev;
1928 tree cond;
1929 unsigned int index0;
1930 unsigned int max, args_len;
1931 edge e;
1932 basic_block bb;
1933 unsigned int i;
1934 bool has_nop;
1936 res = gimple_phi_result (phi);
1937 if (virtual_operand_p (res))
1938 return;
1940 if ((rhs = degenerate_phi_result (phi))
1941 || ((scev = analyze_scalar_evolution (gimple_bb (phi)->loop_father,
1942 res))
1943 && !chrec_contains_undetermined (scev)
1944 && scev != res
1945 && (rhs = gimple_phi_arg_def (phi, 0))))
1947 if (dump_file && (dump_flags & TDF_DETAILS))
1949 fprintf (dump_file, "Degenerate phi!\n");
1950 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
1952 new_stmt = gimple_build_assign (res, rhs);
1953 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1954 update_stmt (new_stmt);
1955 return;
1958 bb = gimple_bb (phi);
1959 if (EDGE_COUNT (bb->preds) == 2)
1961 /* Predicate ordinary PHI node with 2 arguments. */
1962 edge first_edge, second_edge;
1963 basic_block true_bb;
1964 first_edge = EDGE_PRED (bb, 0);
1965 second_edge = EDGE_PRED (bb, 1);
1966 cond = bb_predicate (first_edge->src);
1967 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1968 std::swap (first_edge, second_edge);
1969 if (EDGE_COUNT (first_edge->src->succs) > 1)
1971 cond = bb_predicate (second_edge->src);
1972 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1973 cond = TREE_OPERAND (cond, 0);
1974 else
1975 first_edge = second_edge;
1977 else
1978 cond = bb_predicate (first_edge->src);
1979 /* Gimplify the condition to a valid cond-expr conditonal operand. */
1980 cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond),
1981 is_gimple_condexpr, NULL_TREE,
1982 true, GSI_SAME_STMT);
1983 true_bb = first_edge->src;
1984 if (EDGE_PRED (bb, 1)->src == true_bb)
1986 arg0 = gimple_phi_arg_def (phi, 1);
1987 arg1 = gimple_phi_arg_def (phi, 0);
1989 else
1991 arg0 = gimple_phi_arg_def (phi, 0);
1992 arg1 = gimple_phi_arg_def (phi, 1);
1994 if (is_cond_scalar_reduction (phi, &reduc, arg0, arg1,
1995 &op0, &op1, false, &has_nop,
1996 &nop_reduc))
1998 /* Convert reduction stmt into vectorizable form. */
1999 rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
2000 true_bb != gimple_bb (reduc),
2001 has_nop, nop_reduc);
2002 redundant_ssa_names.safe_push (std::make_pair (res, rhs));
2004 else
2005 /* Build new RHS using selected condition and arguments. */
2006 rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
2007 arg0, arg1);
2008 new_stmt = gimple_build_assign (res, rhs);
2009 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2010 gimple_stmt_iterator new_gsi = gsi_for_stmt (new_stmt);
2011 if (fold_stmt (&new_gsi, ifcvt_follow_ssa_use_edges))
2013 new_stmt = gsi_stmt (new_gsi);
2014 update_stmt (new_stmt);
2017 if (dump_file && (dump_flags & TDF_DETAILS))
2019 fprintf (dump_file, "new phi replacement stmt\n");
2020 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
2022 return;
2025 /* Create hashmap for PHI node which contain vector of argument indexes
2026 having the same value. */
2027 bool swap = false;
2028 hash_map<tree_operand_hash, auto_vec<int> > phi_arg_map;
2029 unsigned int num_args = gimple_phi_num_args (phi);
2030 int max_ind = -1;
2031 /* Vector of different PHI argument values. */
2032 auto_vec<tree> args (num_args);
2034 /* Compute phi_arg_map. */
2035 for (i = 0; i < num_args; i++)
2037 tree arg;
2039 arg = gimple_phi_arg_def (phi, i);
2040 if (!phi_arg_map.get (arg))
2041 args.quick_push (arg);
2042 phi_arg_map.get_or_insert (arg).safe_push (i);
2045 /* Determine element with max number of occurrences. */
2046 max_ind = -1;
2047 max = 1;
2048 args_len = args.length ();
2049 for (i = 0; i < args_len; i++)
2051 unsigned int len;
2052 if ((len = phi_arg_map.get (args[i])->length ()) > max)
2054 max_ind = (int) i;
2055 max = len;
2059 /* Put element with max number of occurences to the end of ARGS. */
2060 if (max_ind != -1 && max_ind +1 != (int) args_len)
2061 std::swap (args[args_len - 1], args[max_ind]);
2063 /* Handle one special case when number of arguments with different values
2064 is equal 2 and one argument has the only occurrence. Such PHI can be
2065 handled as if would have only 2 arguments. */
2066 if (args_len == 2 && phi_arg_map.get (args[0])->length () == 1)
2068 vec<int> *indexes;
2069 indexes = phi_arg_map.get (args[0]);
2070 index0 = (*indexes)[0];
2071 arg0 = args[0];
2072 arg1 = args[1];
2073 e = gimple_phi_arg_edge (phi, index0);
2074 cond = bb_predicate (e->src);
2075 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2077 swap = true;
2078 cond = TREE_OPERAND (cond, 0);
2080 /* Gimplify the condition to a valid cond-expr conditonal operand. */
2081 cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond),
2082 is_gimple_condexpr, NULL_TREE,
2083 true, GSI_SAME_STMT);
2084 if (!(is_cond_scalar_reduction (phi, &reduc, arg0 , arg1,
2085 &op0, &op1, true, &has_nop, &nop_reduc)))
2086 rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
2087 swap? arg1 : arg0,
2088 swap? arg0 : arg1);
2089 else
2091 /* Convert reduction stmt into vectorizable form. */
2092 rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
2093 swap,has_nop, nop_reduc);
2094 redundant_ssa_names.safe_push (std::make_pair (res, rhs));
2096 new_stmt = gimple_build_assign (res, rhs);
2097 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2098 update_stmt (new_stmt);
2100 else
2102 /* Common case. */
2103 vec<int> *indexes;
2104 tree type = TREE_TYPE (gimple_phi_result (phi));
2105 tree lhs;
2106 arg1 = args[1];
2107 for (i = 0; i < args_len; i++)
2109 arg0 = args[i];
2110 indexes = phi_arg_map.get (args[i]);
2111 if (i != args_len - 1)
2112 lhs = make_temp_ssa_name (type, NULL, "_ifc_");
2113 else
2114 lhs = res;
2115 cond = gen_phi_arg_condition (phi, indexes, gsi);
2116 rhs = fold_build_cond_expr (type, unshare_expr (cond),
2117 arg0, arg1);
2118 new_stmt = gimple_build_assign (lhs, rhs);
2119 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2120 update_stmt (new_stmt);
2121 arg1 = lhs;
2125 if (dump_file && (dump_flags & TDF_DETAILS))
2127 fprintf (dump_file, "new extended phi replacement stmt\n");
2128 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
2132 /* Replaces in LOOP all the scalar phi nodes other than those in the
2133 LOOP->header block with conditional modify expressions. */
2135 static void
2136 predicate_all_scalar_phis (class loop *loop)
2138 basic_block bb;
2139 unsigned int orig_loop_num_nodes = loop->num_nodes;
2140 unsigned int i;
2142 for (i = 1; i < orig_loop_num_nodes; i++)
2144 gphi *phi;
2145 gimple_stmt_iterator gsi;
2146 gphi_iterator phi_gsi;
2147 bb = ifc_bbs[i];
2149 if (bb == loop->header)
2150 continue;
2152 phi_gsi = gsi_start_phis (bb);
2153 if (gsi_end_p (phi_gsi))
2154 continue;
2156 gsi = gsi_after_labels (bb);
2157 while (!gsi_end_p (phi_gsi))
2159 phi = phi_gsi.phi ();
2160 if (virtual_operand_p (gimple_phi_result (phi)))
2161 gsi_next (&phi_gsi);
2162 else
2164 predicate_scalar_phi (phi, &gsi);
2165 remove_phi_node (&phi_gsi, false);
2171 /* Insert in each basic block of LOOP the statements produced by the
2172 gimplification of the predicates. */
2174 static void
2175 insert_gimplified_predicates (loop_p loop)
2177 unsigned int i;
2179 for (i = 0; i < loop->num_nodes; i++)
2181 basic_block bb = ifc_bbs[i];
2182 gimple_seq stmts;
2183 if (!is_predicated (bb))
2184 gcc_assert (bb_predicate_gimplified_stmts (bb) == NULL);
2185 if (!is_predicated (bb))
2187 /* Do not insert statements for a basic block that is not
2188 predicated. Also make sure that the predicate of the
2189 basic block is set to true. */
2190 reset_bb_predicate (bb);
2191 continue;
2194 stmts = bb_predicate_gimplified_stmts (bb);
2195 if (stmts)
2197 if (need_to_predicate)
2199 /* Insert the predicate of the BB just after the label,
2200 as the if-conversion of memory writes will use this
2201 predicate. */
2202 gimple_stmt_iterator gsi = gsi_after_labels (bb);
2203 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2205 else
2207 /* Insert the predicate of the BB at the end of the BB
2208 as this would reduce the register pressure: the only
2209 use of this predicate will be in successor BBs. */
2210 gimple_stmt_iterator gsi = gsi_last_bb (bb);
2212 if (gsi_end_p (gsi)
2213 || stmt_ends_bb_p (gsi_stmt (gsi)))
2214 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2215 else
2216 gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
2219 /* Once the sequence is code generated, set it to NULL. */
2220 set_bb_predicate_gimplified_stmts (bb, NULL);
2225 /* Helper function for predicate_statements. Returns index of existent
2226 mask if it was created for given SIZE and -1 otherwise. */
2228 static int
2229 mask_exists (int size, const vec<int> &vec)
2231 unsigned int ix;
2232 int v;
2233 FOR_EACH_VEC_ELT (vec, ix, v)
2234 if (v == size)
2235 return (int) ix;
2236 return -1;
2239 /* Helper function for predicate_statements. STMT is a memory read or
2240 write and it needs to be predicated by MASK. Return a statement
2241 that does so. */
2243 static gimple *
2244 predicate_load_or_store (gimple_stmt_iterator *gsi, gassign *stmt, tree mask)
2246 gcall *new_stmt;
2248 tree lhs = gimple_assign_lhs (stmt);
2249 tree rhs = gimple_assign_rhs1 (stmt);
2250 tree ref = TREE_CODE (lhs) == SSA_NAME ? rhs : lhs;
2251 mark_addressable (ref);
2252 tree addr = force_gimple_operand_gsi (gsi, build_fold_addr_expr (ref),
2253 true, NULL_TREE, true, GSI_SAME_STMT);
2254 tree ptr = build_int_cst (reference_alias_ptr_type (ref),
2255 get_object_alignment (ref));
2256 /* Copy points-to info if possible. */
2257 if (TREE_CODE (addr) == SSA_NAME && !SSA_NAME_PTR_INFO (addr))
2258 copy_ref_info (build2 (MEM_REF, TREE_TYPE (ref), addr, ptr),
2259 ref);
2260 if (TREE_CODE (lhs) == SSA_NAME)
2262 new_stmt
2263 = gimple_build_call_internal (IFN_MASK_LOAD, 3, addr,
2264 ptr, mask);
2265 gimple_call_set_lhs (new_stmt, lhs);
2266 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
2268 else
2270 new_stmt
2271 = gimple_build_call_internal (IFN_MASK_STORE, 4, addr, ptr,
2272 mask, rhs);
2273 gimple_move_vops (new_stmt, stmt);
2275 gimple_call_set_nothrow (new_stmt, true);
2276 return new_stmt;
2279 /* STMT uses OP_LHS. Check whether it is equivalent to:
2281 ... = OP_MASK ? OP_LHS : X;
2283 Return X if so, otherwise return null. OP_MASK is an SSA_NAME that is
2284 known to have value OP_COND. */
2286 static tree
2287 check_redundant_cond_expr (gimple *stmt, tree op_mask, tree op_cond,
2288 tree op_lhs)
2290 gassign *assign = dyn_cast <gassign *> (stmt);
2291 if (!assign || gimple_assign_rhs_code (assign) != COND_EXPR)
2292 return NULL_TREE;
2294 tree use_cond = gimple_assign_rhs1 (assign);
2295 tree if_true = gimple_assign_rhs2 (assign);
2296 tree if_false = gimple_assign_rhs3 (assign);
2298 if ((use_cond == op_mask || operand_equal_p (use_cond, op_cond, 0))
2299 && if_true == op_lhs)
2300 return if_false;
2302 if (inverse_conditions_p (use_cond, op_cond) && if_false == op_lhs)
2303 return if_true;
2305 return NULL_TREE;
2308 /* Return true if VALUE is available for use at STMT. SSA_NAMES is
2309 the set of SSA names defined earlier in STMT's block. */
2311 static bool
2312 value_available_p (gimple *stmt, hash_set<tree_ssa_name_hash> *ssa_names,
2313 tree value)
2315 if (is_gimple_min_invariant (value))
2316 return true;
2318 if (TREE_CODE (value) == SSA_NAME)
2320 if (SSA_NAME_IS_DEFAULT_DEF (value))
2321 return true;
2323 basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (value));
2324 basic_block use_bb = gimple_bb (stmt);
2325 return (def_bb == use_bb
2326 ? ssa_names->contains (value)
2327 : dominated_by_p (CDI_DOMINATORS, use_bb, def_bb));
2330 return false;
2333 /* Helper function for predicate_statements. STMT is a potentially-trapping
2334 arithmetic operation that needs to be predicated by MASK, an SSA_NAME that
2335 has value COND. Return a statement that does so. SSA_NAMES is the set of
2336 SSA names defined earlier in STMT's block. */
2338 static gimple *
2339 predicate_rhs_code (gassign *stmt, tree mask, tree cond,
2340 hash_set<tree_ssa_name_hash> *ssa_names)
2342 tree lhs = gimple_assign_lhs (stmt);
2343 tree_code code = gimple_assign_rhs_code (stmt);
2344 unsigned int nops = gimple_num_ops (stmt);
2345 internal_fn cond_fn = get_conditional_internal_fn (code);
2347 /* Construct the arguments to the conditional internal function. */
2348 auto_vec<tree, 8> args;
2349 args.safe_grow (nops + 1, true);
2350 args[0] = mask;
2351 for (unsigned int i = 1; i < nops; ++i)
2352 args[i] = gimple_op (stmt, i);
2353 args[nops] = NULL_TREE;
2355 /* Look for uses of the result to see whether they are COND_EXPRs that can
2356 be folded into the conditional call. */
2357 imm_use_iterator imm_iter;
2358 gimple *use_stmt;
2359 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs)
2361 tree new_else = check_redundant_cond_expr (use_stmt, mask, cond, lhs);
2362 if (new_else && value_available_p (stmt, ssa_names, new_else))
2364 if (!args[nops])
2365 args[nops] = new_else;
2366 if (operand_equal_p (new_else, args[nops], 0))
2368 /* We have:
2370 LHS = IFN_COND (MASK, ..., ELSE);
2371 X = MASK ? LHS : ELSE;
2373 which makes X equivalent to LHS. */
2374 tree use_lhs = gimple_assign_lhs (use_stmt);
2375 redundant_ssa_names.safe_push (std::make_pair (use_lhs, lhs));
2379 if (!args[nops])
2380 args[nops] = targetm.preferred_else_value (cond_fn, TREE_TYPE (lhs),
2381 nops - 1, &args[1]);
2383 /* Create and insert the call. */
2384 gcall *new_stmt = gimple_build_call_internal_vec (cond_fn, args);
2385 gimple_call_set_lhs (new_stmt, lhs);
2386 gimple_call_set_nothrow (new_stmt, true);
2388 return new_stmt;
2391 /* Predicate each write to memory in LOOP.
2393 This function transforms control flow constructs containing memory
2394 writes of the form:
2396 | for (i = 0; i < N; i++)
2397 | if (cond)
2398 | A[i] = expr;
2400 into the following form that does not contain control flow:
2402 | for (i = 0; i < N; i++)
2403 | A[i] = cond ? expr : A[i];
2405 The original CFG looks like this:
2407 | bb_0
2408 | i = 0
2409 | end_bb_0
2411 | bb_1
2412 | if (i < N) goto bb_5 else goto bb_2
2413 | end_bb_1
2415 | bb_2
2416 | cond = some_computation;
2417 | if (cond) goto bb_3 else goto bb_4
2418 | end_bb_2
2420 | bb_3
2421 | A[i] = expr;
2422 | goto bb_4
2423 | end_bb_3
2425 | bb_4
2426 | goto bb_1
2427 | end_bb_4
2429 insert_gimplified_predicates inserts the computation of the COND
2430 expression at the beginning of the destination basic block:
2432 | bb_0
2433 | i = 0
2434 | end_bb_0
2436 | bb_1
2437 | if (i < N) goto bb_5 else goto bb_2
2438 | end_bb_1
2440 | bb_2
2441 | cond = some_computation;
2442 | if (cond) goto bb_3 else goto bb_4
2443 | end_bb_2
2445 | bb_3
2446 | cond = some_computation;
2447 | A[i] = expr;
2448 | goto bb_4
2449 | end_bb_3
2451 | bb_4
2452 | goto bb_1
2453 | end_bb_4
2455 predicate_statements is then predicating the memory write as follows:
2457 | bb_0
2458 | i = 0
2459 | end_bb_0
2461 | bb_1
2462 | if (i < N) goto bb_5 else goto bb_2
2463 | end_bb_1
2465 | bb_2
2466 | if (cond) goto bb_3 else goto bb_4
2467 | end_bb_2
2469 | bb_3
2470 | cond = some_computation;
2471 | A[i] = cond ? expr : A[i];
2472 | goto bb_4
2473 | end_bb_3
2475 | bb_4
2476 | goto bb_1
2477 | end_bb_4
2479 and finally combine_blocks removes the basic block boundaries making
2480 the loop vectorizable:
2482 | bb_0
2483 | i = 0
2484 | if (i < N) goto bb_5 else goto bb_1
2485 | end_bb_0
2487 | bb_1
2488 | cond = some_computation;
2489 | A[i] = cond ? expr : A[i];
2490 | if (i < N) goto bb_5 else goto bb_4
2491 | end_bb_1
2493 | bb_4
2494 | goto bb_1
2495 | end_bb_4
2498 static void
2499 predicate_statements (loop_p loop, edge pe)
2501 unsigned int i, orig_loop_num_nodes = loop->num_nodes;
2502 auto_vec<int, 1> vect_sizes;
2503 auto_vec<tree, 1> vect_masks;
2504 hash_set<tree_ssa_name_hash> ssa_names;
2506 for (i = 1; i < orig_loop_num_nodes; i++)
2508 gimple_stmt_iterator gsi;
2509 basic_block bb = ifc_bbs[i];
2510 tree cond = bb_predicate (bb);
2511 bool swap;
2512 int index;
2514 if (is_true_predicate (cond))
2515 continue;
2517 swap = false;
2518 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2520 swap = true;
2521 cond = TREE_OPERAND (cond, 0);
2524 vect_sizes.truncate (0);
2525 vect_masks.truncate (0);
2527 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
2529 gassign *stmt = dyn_cast <gassign *> (gsi_stmt (gsi));
2530 tree lhs;
2531 if (!stmt)
2533 else if (is_false_predicate (cond)
2534 && gimple_vdef (stmt))
2536 unlink_stmt_vdef (stmt);
2537 gsi_remove (&gsi, true);
2538 release_defs (stmt);
2539 continue;
2541 else if (gimple_plf (stmt, GF_PLF_2))
2543 tree lhs = gimple_assign_lhs (stmt);
2544 tree mask;
2545 gimple *new_stmt;
2546 gimple_seq stmts = NULL;
2547 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
2548 /* We checked before setting GF_PLF_2 that an equivalent
2549 integer mode exists. */
2550 int bitsize = GET_MODE_BITSIZE (mode).to_constant ();
2551 if (!vect_sizes.is_empty ()
2552 && (index = mask_exists (bitsize, vect_sizes)) != -1)
2553 /* Use created mask. */
2554 mask = vect_masks[index];
2555 else
2557 if (COMPARISON_CLASS_P (cond))
2558 mask = gimple_build (&stmts, TREE_CODE (cond),
2559 boolean_type_node,
2560 TREE_OPERAND (cond, 0),
2561 TREE_OPERAND (cond, 1));
2562 else
2563 mask = cond;
2565 if (swap)
2567 tree true_val
2568 = constant_boolean_node (true, TREE_TYPE (mask));
2569 mask = gimple_build (&stmts, BIT_XOR_EXPR,
2570 TREE_TYPE (mask), mask, true_val);
2572 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2574 /* Save mask and its size for further use. */
2575 vect_sizes.safe_push (bitsize);
2576 vect_masks.safe_push (mask);
2578 if (gimple_assign_single_p (stmt))
2579 new_stmt = predicate_load_or_store (&gsi, stmt, mask);
2580 else
2581 new_stmt = predicate_rhs_code (stmt, mask, cond, &ssa_names);
2583 gsi_replace (&gsi, new_stmt, true);
2585 else if (((lhs = gimple_assign_lhs (stmt)), true)
2586 && (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2587 || POINTER_TYPE_P (TREE_TYPE (lhs)))
2588 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs))
2589 && arith_code_with_undefined_signed_overflow
2590 (gimple_assign_rhs_code (stmt)))
2592 gsi_remove (&gsi, true);
2593 gimple_seq stmts = rewrite_to_defined_overflow (stmt);
2594 bool first = true;
2595 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
2596 !gsi_end_p (gsi2);)
2598 gassign *stmt2 = as_a <gassign *> (gsi_stmt (gsi2));
2599 gsi_remove (&gsi2, false);
2600 /* Make sure to move invariant conversions out of the
2601 loop. */
2602 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt2))
2603 && expr_invariant_in_loop_p (loop,
2604 gimple_assign_rhs1 (stmt2)))
2605 gsi_insert_on_edge_immediate (pe, stmt2);
2606 else if (first)
2608 gsi_insert_before (&gsi, stmt2, GSI_NEW_STMT);
2609 first = false;
2611 else
2612 gsi_insert_after (&gsi, stmt2, GSI_NEW_STMT);
2615 else if (gimple_vdef (stmt))
2617 tree lhs = gimple_assign_lhs (stmt);
2618 tree rhs = gimple_assign_rhs1 (stmt);
2619 tree type = TREE_TYPE (lhs);
2621 lhs = ifc_temp_var (type, unshare_expr (lhs), &gsi);
2622 rhs = ifc_temp_var (type, unshare_expr (rhs), &gsi);
2623 if (swap)
2624 std::swap (lhs, rhs);
2625 cond = force_gimple_operand_gsi_1 (&gsi, unshare_expr (cond),
2626 is_gimple_condexpr, NULL_TREE,
2627 true, GSI_SAME_STMT);
2628 rhs = fold_build_cond_expr (type, unshare_expr (cond), rhs, lhs);
2629 gimple_assign_set_rhs1 (stmt, ifc_temp_var (type, rhs, &gsi));
2630 update_stmt (stmt);
2632 lhs = gimple_get_lhs (gsi_stmt (gsi));
2633 if (lhs && TREE_CODE (lhs) == SSA_NAME)
2634 ssa_names.add (lhs);
2635 gsi_next (&gsi);
2637 ssa_names.empty ();
2641 /* Remove all GIMPLE_CONDs and GIMPLE_LABELs of all the basic blocks
2642 other than the exit and latch of the LOOP. Also resets the
2643 GIMPLE_DEBUG information. */
2645 static void
2646 remove_conditions_and_labels (loop_p loop)
2648 gimple_stmt_iterator gsi;
2649 unsigned int i;
2651 for (i = 0; i < loop->num_nodes; i++)
2653 basic_block bb = ifc_bbs[i];
2655 if (bb_with_exit_edge_p (loop, bb)
2656 || bb == loop->latch)
2657 continue;
2659 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
2660 switch (gimple_code (gsi_stmt (gsi)))
2662 case GIMPLE_COND:
2663 case GIMPLE_LABEL:
2664 gsi_remove (&gsi, true);
2665 break;
2667 case GIMPLE_DEBUG:
2668 /* ??? Should there be conditional GIMPLE_DEBUG_BINDs? */
2669 if (gimple_debug_bind_p (gsi_stmt (gsi)))
2671 gimple_debug_bind_reset_value (gsi_stmt (gsi));
2672 update_stmt (gsi_stmt (gsi));
2674 gsi_next (&gsi);
2675 break;
2677 default:
2678 gsi_next (&gsi);
2683 /* Combine all the basic blocks from LOOP into one or two super basic
2684 blocks. Replace PHI nodes with conditional modify expressions. */
2686 static void
2687 combine_blocks (class loop *loop, edge pe)
2689 basic_block bb, exit_bb, merge_target_bb;
2690 unsigned int orig_loop_num_nodes = loop->num_nodes;
2691 unsigned int i;
2692 edge e;
2693 edge_iterator ei;
2695 remove_conditions_and_labels (loop);
2696 insert_gimplified_predicates (loop);
2697 predicate_all_scalar_phis (loop);
2699 if (need_to_predicate || need_to_rewrite_undefined)
2700 predicate_statements (loop, pe);
2702 /* Merge basic blocks. */
2703 exit_bb = NULL;
2704 bool *predicated = XNEWVEC (bool, orig_loop_num_nodes);
2705 for (i = 0; i < orig_loop_num_nodes; i++)
2707 bb = ifc_bbs[i];
2708 predicated[i] = !is_true_predicate (bb_predicate (bb));
2709 free_bb_predicate (bb);
2710 if (bb_with_exit_edge_p (loop, bb))
2712 gcc_assert (exit_bb == NULL);
2713 exit_bb = bb;
2716 gcc_assert (exit_bb != loop->latch);
2718 merge_target_bb = loop->header;
2720 /* Get at the virtual def valid for uses starting at the first block
2721 we merge into the header. Without a virtual PHI the loop has the
2722 same virtual use on all stmts. */
2723 gphi *vphi = get_virtual_phi (loop->header);
2724 tree last_vdef = NULL_TREE;
2725 if (vphi)
2727 last_vdef = gimple_phi_result (vphi);
2728 for (gimple_stmt_iterator gsi = gsi_start_bb (loop->header);
2729 ! gsi_end_p (gsi); gsi_next (&gsi))
2730 if (gimple_vdef (gsi_stmt (gsi)))
2731 last_vdef = gimple_vdef (gsi_stmt (gsi));
2733 for (i = 1; i < orig_loop_num_nodes; i++)
2735 gimple_stmt_iterator gsi;
2736 gimple_stmt_iterator last;
2738 bb = ifc_bbs[i];
2740 if (bb == exit_bb || bb == loop->latch)
2741 continue;
2743 /* We release virtual PHIs late because we have to propagate them
2744 out using the current VUSE. The def might be the one used
2745 after the loop. */
2746 vphi = get_virtual_phi (bb);
2747 if (vphi)
2749 /* When there's just loads inside the loop a stray virtual
2750 PHI merging the uses can appear, update last_vdef from
2751 it. */
2752 if (!last_vdef)
2753 last_vdef = gimple_phi_arg_def (vphi, 0);
2754 imm_use_iterator iter;
2755 use_operand_p use_p;
2756 gimple *use_stmt;
2757 FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi))
2759 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2760 SET_USE (use_p, last_vdef);
2762 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (vphi)))
2763 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (last_vdef) = 1;
2764 gsi = gsi_for_stmt (vphi);
2765 remove_phi_node (&gsi, true);
2768 /* Make stmts member of loop->header and clear range info from all stmts
2769 in BB which is now no longer executed conditional on a predicate we
2770 could have derived it from. */
2771 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2773 gimple *stmt = gsi_stmt (gsi);
2774 gimple_set_bb (stmt, merge_target_bb);
2775 /* Update virtual operands. */
2776 if (last_vdef)
2778 use_operand_p use_p = ssa_vuse_operand (stmt);
2779 if (use_p
2780 && USE_FROM_PTR (use_p) != last_vdef)
2781 SET_USE (use_p, last_vdef);
2782 if (gimple_vdef (stmt))
2783 last_vdef = gimple_vdef (stmt);
2785 else
2786 /* If this is the first load we arrive at update last_vdef
2787 so we handle stray PHIs correctly. */
2788 last_vdef = gimple_vuse (stmt);
2789 if (predicated[i])
2791 ssa_op_iter i;
2792 tree op;
2793 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF)
2794 reset_flow_sensitive_info (op);
2798 /* Update stmt list. */
2799 last = gsi_last_bb (merge_target_bb);
2800 gsi_insert_seq_after_without_update (&last, bb_seq (bb), GSI_NEW_STMT);
2801 set_bb_seq (bb, NULL);
2804 /* Fixup virtual operands in the exit block. */
2805 if (exit_bb
2806 && exit_bb != loop->header)
2808 /* We release virtual PHIs late because we have to propagate them
2809 out using the current VUSE. The def might be the one used
2810 after the loop. */
2811 vphi = get_virtual_phi (exit_bb);
2812 if (vphi)
2814 /* When there's just loads inside the loop a stray virtual
2815 PHI merging the uses can appear, update last_vdef from
2816 it. */
2817 if (!last_vdef)
2818 last_vdef = gimple_phi_arg_def (vphi, 0);
2819 imm_use_iterator iter;
2820 use_operand_p use_p;
2821 gimple *use_stmt;
2822 FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi))
2824 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2825 SET_USE (use_p, last_vdef);
2827 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (vphi)))
2828 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (last_vdef) = 1;
2829 gimple_stmt_iterator gsi = gsi_for_stmt (vphi);
2830 remove_phi_node (&gsi, true);
2834 /* Now remove all the edges in the loop, except for those from the exit
2835 block and delete the blocks we elided. */
2836 for (i = 1; i < orig_loop_num_nodes; i++)
2838 bb = ifc_bbs[i];
2840 for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));)
2842 if (e->src == exit_bb)
2843 ei_next (&ei);
2844 else
2845 remove_edge (e);
2848 for (i = 1; i < orig_loop_num_nodes; i++)
2850 bb = ifc_bbs[i];
2852 if (bb == exit_bb || bb == loop->latch)
2853 continue;
2855 delete_basic_block (bb);
2858 /* Re-connect the exit block. */
2859 if (exit_bb != NULL)
2861 if (exit_bb != loop->header)
2863 /* Connect this node to loop header. */
2864 make_single_succ_edge (loop->header, exit_bb, EDGE_FALLTHRU);
2865 set_immediate_dominator (CDI_DOMINATORS, exit_bb, loop->header);
2868 /* Redirect non-exit edges to loop->latch. */
2869 FOR_EACH_EDGE (e, ei, exit_bb->succs)
2871 if (!loop_exit_edge_p (loop, e))
2872 redirect_edge_and_branch (e, loop->latch);
2874 set_immediate_dominator (CDI_DOMINATORS, loop->latch, exit_bb);
2876 else
2878 /* If the loop does not have an exit, reconnect header and latch. */
2879 make_edge (loop->header, loop->latch, EDGE_FALLTHRU);
2880 set_immediate_dominator (CDI_DOMINATORS, loop->latch, loop->header);
2883 /* If possible, merge loop header to the block with the exit edge.
2884 This reduces the number of basic blocks to two, to please the
2885 vectorizer that handles only loops with two nodes. */
2886 if (exit_bb
2887 && exit_bb != loop->header)
2889 if (can_merge_blocks_p (loop->header, exit_bb))
2890 merge_blocks (loop->header, exit_bb);
2893 free (ifc_bbs);
2894 ifc_bbs = NULL;
2895 free (predicated);
2898 /* Version LOOP before if-converting it; the original loop
2899 will be if-converted, the new copy of the loop will not,
2900 and the LOOP_VECTORIZED internal call will be guarding which
2901 loop to execute. The vectorizer pass will fold this
2902 internal call into either true or false.
2904 Note that this function intentionally invalidates profile. Both edges
2905 out of LOOP_VECTORIZED must have 100% probability so the profile remains
2906 consistent after the condition is folded in the vectorizer. */
2908 static class loop *
2909 version_loop_for_if_conversion (class loop *loop, vec<gimple *> *preds)
2911 basic_block cond_bb;
2912 tree cond = make_ssa_name (boolean_type_node);
2913 class loop *new_loop;
2914 gimple *g;
2915 gimple_stmt_iterator gsi;
2916 unsigned int save_length;
2918 g = gimple_build_call_internal (IFN_LOOP_VECTORIZED, 2,
2919 build_int_cst (integer_type_node, loop->num),
2920 integer_zero_node);
2921 gimple_call_set_lhs (g, cond);
2923 /* Save BB->aux around loop_version as that uses the same field. */
2924 save_length = loop->inner ? loop->inner->num_nodes : loop->num_nodes;
2925 void **saved_preds = XALLOCAVEC (void *, save_length);
2926 for (unsigned i = 0; i < save_length; i++)
2927 saved_preds[i] = ifc_bbs[i]->aux;
2929 initialize_original_copy_tables ();
2930 /* At this point we invalidate porfile confistency until IFN_LOOP_VECTORIZED
2931 is re-merged in the vectorizer. */
2932 new_loop = loop_version (loop, cond, &cond_bb,
2933 profile_probability::always (),
2934 profile_probability::always (),
2935 profile_probability::always (),
2936 profile_probability::always (), true);
2937 free_original_copy_tables ();
2939 for (unsigned i = 0; i < save_length; i++)
2940 ifc_bbs[i]->aux = saved_preds[i];
2942 if (new_loop == NULL)
2943 return NULL;
2945 new_loop->dont_vectorize = true;
2946 new_loop->force_vectorize = false;
2947 gsi = gsi_last_bb (cond_bb);
2948 gimple_call_set_arg (g, 1, build_int_cst (integer_type_node, new_loop->num));
2949 if (preds)
2950 preds->safe_push (g);
2951 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2952 update_ssa (TODO_update_ssa);
2953 return new_loop;
2956 /* Return true when LOOP satisfies the follow conditions that will
2957 allow it to be recognized by the vectorizer for outer-loop
2958 vectorization:
2959 - The loop is not the root node of the loop tree.
2960 - The loop has exactly one inner loop.
2961 - The loop has a single exit.
2962 - The loop header has a single successor, which is the inner
2963 loop header.
2964 - Each of the inner and outer loop latches have a single
2965 predecessor.
2966 - The loop exit block has a single predecessor, which is the
2967 inner loop's exit block. */
2969 static bool
2970 versionable_outer_loop_p (class loop *loop)
2972 if (!loop_outer (loop)
2973 || loop->dont_vectorize
2974 || !loop->inner
2975 || loop->inner->next
2976 || !single_exit (loop)
2977 || !single_succ_p (loop->header)
2978 || single_succ (loop->header) != loop->inner->header
2979 || !single_pred_p (loop->latch)
2980 || !single_pred_p (loop->inner->latch))
2981 return false;
2983 basic_block outer_exit = single_pred (loop->latch);
2984 basic_block inner_exit = single_pred (loop->inner->latch);
2986 if (!single_pred_p (outer_exit) || single_pred (outer_exit) != inner_exit)
2987 return false;
2989 if (dump_file)
2990 fprintf (dump_file, "Found vectorizable outer loop for versioning\n");
2992 return true;
2995 /* Performs splitting of critical edges. Skip splitting and return false
2996 if LOOP will not be converted because:
2998 - LOOP is not well formed.
2999 - LOOP has PHI with more than MAX_PHI_ARG_NUM arguments.
3001 Last restriction is valid only if AGGRESSIVE_IF_CONV is false. */
3003 static bool
3004 ifcvt_split_critical_edges (class loop *loop, bool aggressive_if_conv)
3006 basic_block *body;
3007 basic_block bb;
3008 unsigned int num = loop->num_nodes;
3009 unsigned int i;
3010 gimple *stmt;
3011 edge e;
3012 edge_iterator ei;
3013 auto_vec<edge> critical_edges;
3015 /* Loop is not well formed. */
3016 if (num <= 2 || loop->inner || !single_exit (loop))
3017 return false;
3019 body = get_loop_body (loop);
3020 for (i = 0; i < num; i++)
3022 bb = body[i];
3023 if (!aggressive_if_conv
3024 && phi_nodes (bb)
3025 && EDGE_COUNT (bb->preds) > MAX_PHI_ARG_NUM)
3027 if (dump_file && (dump_flags & TDF_DETAILS))
3028 fprintf (dump_file,
3029 "BB %d has complicated PHI with more than %u args.\n",
3030 bb->index, MAX_PHI_ARG_NUM);
3032 free (body);
3033 return false;
3035 if (bb == loop->latch || bb_with_exit_edge_p (loop, bb))
3036 continue;
3038 stmt = last_stmt (bb);
3039 /* Skip basic blocks not ending with conditional branch. */
3040 if (!stmt || gimple_code (stmt) != GIMPLE_COND)
3041 continue;
3043 FOR_EACH_EDGE (e, ei, bb->succs)
3044 if (EDGE_CRITICAL_P (e) && e->dest->loop_father == loop)
3045 critical_edges.safe_push (e);
3047 free (body);
3049 while (critical_edges.length () > 0)
3051 e = critical_edges.pop ();
3052 /* Don't split if bb can be predicated along non-critical edge. */
3053 if (EDGE_COUNT (e->dest->preds) > 2 || all_preds_critical_p (e->dest))
3054 split_edge (e);
3057 return true;
3060 /* Delete redundant statements produced by predication which prevents
3061 loop vectorization. */
3063 static void
3064 ifcvt_local_dce (class loop *loop)
3066 gimple *stmt;
3067 gimple *stmt1;
3068 gimple *phi;
3069 gimple_stmt_iterator gsi;
3070 auto_vec<gimple *> worklist;
3071 enum gimple_code code;
3072 use_operand_p use_p;
3073 imm_use_iterator imm_iter;
3075 /* The loop has a single BB only. */
3076 basic_block bb = loop->header;
3077 tree latch_vdef = NULL_TREE;
3079 worklist.create (64);
3080 /* Consider all phi as live statements. */
3081 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3083 phi = gsi_stmt (gsi);
3084 gimple_set_plf (phi, GF_PLF_2, true);
3085 worklist.safe_push (phi);
3086 if (virtual_operand_p (gimple_phi_result (phi)))
3087 latch_vdef = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
3089 /* Consider load/store statements, CALL and COND as live. */
3090 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3092 stmt = gsi_stmt (gsi);
3093 if (is_gimple_debug (stmt))
3095 gimple_set_plf (stmt, GF_PLF_2, true);
3096 continue;
3098 if (gimple_store_p (stmt) || gimple_assign_load_p (stmt))
3100 gimple_set_plf (stmt, GF_PLF_2, true);
3101 worklist.safe_push (stmt);
3102 continue;
3104 code = gimple_code (stmt);
3105 if (code == GIMPLE_COND || code == GIMPLE_CALL)
3107 gimple_set_plf (stmt, GF_PLF_2, true);
3108 worklist.safe_push (stmt);
3109 continue;
3111 gimple_set_plf (stmt, GF_PLF_2, false);
3113 if (code == GIMPLE_ASSIGN)
3115 tree lhs = gimple_assign_lhs (stmt);
3116 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
3118 stmt1 = USE_STMT (use_p);
3119 if (!is_gimple_debug (stmt1) && gimple_bb (stmt1) != bb)
3121 gimple_set_plf (stmt, GF_PLF_2, true);
3122 worklist.safe_push (stmt);
3123 break;
3128 /* Propagate liveness through arguments of live stmt. */
3129 while (worklist.length () > 0)
3131 ssa_op_iter iter;
3132 use_operand_p use_p;
3133 tree use;
3135 stmt = worklist.pop ();
3136 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
3138 use = USE_FROM_PTR (use_p);
3139 if (TREE_CODE (use) != SSA_NAME)
3140 continue;
3141 stmt1 = SSA_NAME_DEF_STMT (use);
3142 if (gimple_bb (stmt1) != bb || gimple_plf (stmt1, GF_PLF_2))
3143 continue;
3144 gimple_set_plf (stmt1, GF_PLF_2, true);
3145 worklist.safe_push (stmt1);
3148 /* Delete dead statements. */
3149 gsi = gsi_last_bb (bb);
3150 while (!gsi_end_p (gsi))
3152 gimple_stmt_iterator gsiprev = gsi;
3153 gsi_prev (&gsiprev);
3154 stmt = gsi_stmt (gsi);
3155 if (gimple_store_p (stmt))
3157 tree lhs = gimple_get_lhs (stmt);
3158 ao_ref write;
3159 ao_ref_init (&write, lhs);
3161 if (dse_classify_store (&write, stmt, false, NULL, NULL, latch_vdef)
3162 == DSE_STORE_DEAD)
3163 delete_dead_or_redundant_assignment (&gsi, "dead");
3164 gsi = gsiprev;
3165 continue;
3168 if (gimple_plf (stmt, GF_PLF_2))
3170 gsi = gsiprev;
3171 continue;
3173 if (dump_file && (dump_flags & TDF_DETAILS))
3175 fprintf (dump_file, "Delete dead stmt in bb#%d\n", bb->index);
3176 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3178 gsi_remove (&gsi, true);
3179 release_defs (stmt);
3180 gsi = gsiprev;
3184 /* If-convert LOOP when it is legal. For the moment this pass has no
3185 profitability analysis. Returns non-zero todo flags when something
3186 changed. */
3188 unsigned int
3189 tree_if_conversion (class loop *loop, vec<gimple *> *preds)
3191 unsigned int todo = 0;
3192 bool aggressive_if_conv;
3193 class loop *rloop;
3194 bitmap exit_bbs;
3195 edge pe;
3197 again:
3198 rloop = NULL;
3199 ifc_bbs = NULL;
3200 need_to_predicate = false;
3201 need_to_rewrite_undefined = false;
3202 any_complicated_phi = false;
3204 /* Apply more aggressive if-conversion when loop or its outer loop were
3205 marked with simd pragma. When that's the case, we try to if-convert
3206 loop containing PHIs with more than MAX_PHI_ARG_NUM arguments. */
3207 aggressive_if_conv = loop->force_vectorize;
3208 if (!aggressive_if_conv)
3210 class loop *outer_loop = loop_outer (loop);
3211 if (outer_loop && outer_loop->force_vectorize)
3212 aggressive_if_conv = true;
3215 if (!ifcvt_split_critical_edges (loop, aggressive_if_conv))
3216 goto cleanup;
3218 if (!if_convertible_loop_p (loop)
3219 || !dbg_cnt (if_conversion_tree))
3220 goto cleanup;
3222 if ((need_to_predicate || any_complicated_phi)
3223 && ((!flag_tree_loop_vectorize && !loop->force_vectorize)
3224 || loop->dont_vectorize))
3225 goto cleanup;
3227 /* The edge to insert invariant stmts on. */
3228 pe = loop_preheader_edge (loop);
3230 /* Since we have no cost model, always version loops unless the user
3231 specified -ftree-loop-if-convert or unless versioning is required.
3232 Either version this loop, or if the pattern is right for outer-loop
3233 vectorization, version the outer loop. In the latter case we will
3234 still if-convert the original inner loop. */
3235 if (need_to_predicate
3236 || any_complicated_phi
3237 || flag_tree_loop_if_convert != 1)
3239 class loop *vloop
3240 = (versionable_outer_loop_p (loop_outer (loop))
3241 ? loop_outer (loop) : loop);
3242 class loop *nloop = version_loop_for_if_conversion (vloop, preds);
3243 if (nloop == NULL)
3244 goto cleanup;
3245 if (vloop != loop)
3247 /* If versionable_outer_loop_p decided to version the
3248 outer loop, version also the inner loop of the non-vectorized
3249 loop copy. So we transform:
3250 loop1
3251 loop2
3252 into:
3253 if (LOOP_VECTORIZED (1, 3))
3255 loop1
3256 loop2
3258 else
3259 loop3 (copy of loop1)
3260 if (LOOP_VECTORIZED (4, 5))
3261 loop4 (copy of loop2)
3262 else
3263 loop5 (copy of loop4) */
3264 gcc_assert (nloop->inner && nloop->inner->next == NULL);
3265 rloop = nloop->inner;
3267 else
3268 /* If we versioned loop then make sure to insert invariant
3269 stmts before the .LOOP_VECTORIZED check since the vectorizer
3270 will re-use that for things like runtime alias versioning
3271 whose condition can end up using those invariants. */
3272 pe = single_pred_edge (gimple_bb (preds->last ()));
3275 /* Now all statements are if-convertible. Combine all the basic
3276 blocks into one huge basic block doing the if-conversion
3277 on-the-fly. */
3278 combine_blocks (loop, pe);
3280 /* Perform local CSE, this esp. helps the vectorizer analysis if loads
3281 and stores are involved. CSE only the loop body, not the entry
3282 PHIs, those are to be kept in sync with the non-if-converted copy.
3283 ??? We'll still keep dead stores though. */
3284 exit_bbs = BITMAP_ALLOC (NULL);
3285 bitmap_set_bit (exit_bbs, single_exit (loop)->dest->index);
3286 bitmap_set_bit (exit_bbs, loop->latch->index);
3288 std::pair <tree, tree> *name_pair;
3289 unsigned ssa_names_idx;
3290 FOR_EACH_VEC_ELT (redundant_ssa_names, ssa_names_idx, name_pair)
3291 replace_uses_by (name_pair->first, name_pair->second);
3292 redundant_ssa_names.release ();
3294 todo |= do_rpo_vn (cfun, loop_preheader_edge (loop), exit_bbs);
3296 /* Delete dead predicate computations. */
3297 ifcvt_local_dce (loop);
3298 BITMAP_FREE (exit_bbs);
3300 todo |= TODO_cleanup_cfg;
3302 cleanup:
3303 if (ifc_bbs)
3305 unsigned int i;
3307 for (i = 0; i < loop->num_nodes; i++)
3308 free_bb_predicate (ifc_bbs[i]);
3310 free (ifc_bbs);
3311 ifc_bbs = NULL;
3313 if (rloop != NULL)
3315 loop = rloop;
3316 goto again;
3319 return todo;
3322 /* Tree if-conversion pass management. */
3324 namespace {
3326 const pass_data pass_data_if_conversion =
3328 GIMPLE_PASS, /* type */
3329 "ifcvt", /* name */
3330 OPTGROUP_NONE, /* optinfo_flags */
3331 TV_TREE_LOOP_IFCVT, /* tv_id */
3332 ( PROP_cfg | PROP_ssa ), /* properties_required */
3333 0, /* properties_provided */
3334 0, /* properties_destroyed */
3335 0, /* todo_flags_start */
3336 0, /* todo_flags_finish */
3339 class pass_if_conversion : public gimple_opt_pass
3341 public:
3342 pass_if_conversion (gcc::context *ctxt)
3343 : gimple_opt_pass (pass_data_if_conversion, ctxt)
3346 /* opt_pass methods: */
3347 virtual bool gate (function *);
3348 virtual unsigned int execute (function *);
3350 }; // class pass_if_conversion
3352 bool
3353 pass_if_conversion::gate (function *fun)
3355 return (((flag_tree_loop_vectorize || fun->has_force_vectorize_loops)
3356 && flag_tree_loop_if_convert != 0)
3357 || flag_tree_loop_if_convert == 1);
3360 unsigned int
3361 pass_if_conversion::execute (function *fun)
3363 unsigned todo = 0;
3365 if (number_of_loops (fun) <= 1)
3366 return 0;
3368 auto_vec<gimple *> preds;
3369 for (auto loop : loops_list (cfun, 0))
3370 if (flag_tree_loop_if_convert == 1
3371 || ((flag_tree_loop_vectorize || loop->force_vectorize)
3372 && !loop->dont_vectorize))
3373 todo |= tree_if_conversion (loop, &preds);
3375 if (todo)
3377 free_numbers_of_iterations_estimates (fun);
3378 scev_reset ();
3381 if (flag_checking)
3383 basic_block bb;
3384 FOR_EACH_BB_FN (bb, fun)
3385 gcc_assert (!bb->aux);
3388 /* Perform IL update now, it might elide some loops. */
3389 if (todo & TODO_cleanup_cfg)
3391 cleanup_tree_cfg ();
3392 if (need_ssa_update_p (fun))
3393 todo |= TODO_update_ssa;
3395 if (todo & TODO_update_ssa_any)
3396 update_ssa (todo & TODO_update_ssa_any);
3398 /* If if-conversion elided the loop fall back to the original one. */
3399 for (unsigned i = 0; i < preds.length (); ++i)
3401 gimple *g = preds[i];
3402 if (!gimple_bb (g))
3403 continue;
3404 unsigned ifcvt_loop = tree_to_uhwi (gimple_call_arg (g, 0));
3405 if (!get_loop (fun, ifcvt_loop))
3407 if (dump_file)
3408 fprintf (dump_file, "If-converted loop vanished\n");
3409 fold_loop_internal_call (g, boolean_false_node);
3413 return 0;
3416 } // anon namespace
3418 gimple_opt_pass *
3419 make_pass_if_conversion (gcc::context *ctxt)
3421 return new pass_if_conversion (ctxt);