compiler: only build thunk struct type when it is needed
[official-gcc.git] / gcc / tree-if-conv.cc
blobbac29fb557462f5d3193481ef180f1412e8bc639
1 /* If-conversion for vectorizer.
2 Copyright (C) 2004-2022 Free Software Foundation, Inc.
3 Contributed by Devang Patel <dpatel@apple.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass implements a tree level if-conversion of loops. Its
22 initial goal is to help the vectorizer to vectorize loops with
23 conditions.
25 A short description of if-conversion:
27 o Decide if a loop is if-convertible or not.
28 o Walk all loop basic blocks in breadth first order (BFS order).
29 o Remove conditional statements (at the end of basic block)
30 and propagate condition into destination basic blocks'
31 predicate list.
32 o Replace modify expression with conditional modify expression
33 using current basic block's condition.
34 o Merge all basic blocks
35 o Replace phi nodes with conditional modify expr
36 o Merge all basic blocks into header
38 Sample transformation:
40 INPUT
41 -----
43 # i_23 = PHI <0(0), i_18(10)>;
44 <L0>:;
45 j_15 = A[i_23];
46 if (j_15 > 41) goto <L1>; else goto <L17>;
48 <L17>:;
49 goto <bb 3> (<L3>);
51 <L1>:;
53 # iftmp.2_4 = PHI <0(8), 42(2)>;
54 <L3>:;
55 A[i_23] = iftmp.2_4;
56 i_18 = i_23 + 1;
57 if (i_18 <= 15) goto <L19>; else goto <L18>;
59 <L19>:;
60 goto <bb 1> (<L0>);
62 <L18>:;
64 OUTPUT
65 ------
67 # i_23 = PHI <0(0), i_18(10)>;
68 <L0>:;
69 j_15 = A[i_23];
71 <L3>:;
72 iftmp.2_4 = j_15 > 41 ? 42 : 0;
73 A[i_23] = iftmp.2_4;
74 i_18 = i_23 + 1;
75 if (i_18 <= 15) goto <L19>; else goto <L18>;
77 <L19>:;
78 goto <bb 1> (<L0>);
80 <L18>:;
83 #include "config.h"
84 #include "system.h"
85 #include "coretypes.h"
86 #include "backend.h"
87 #include "rtl.h"
88 #include "tree.h"
89 #include "gimple.h"
90 #include "cfghooks.h"
91 #include "tree-pass.h"
92 #include "ssa.h"
93 #include "expmed.h"
94 #include "optabs-query.h"
95 #include "gimple-pretty-print.h"
96 #include "alias.h"
97 #include "fold-const.h"
98 #include "stor-layout.h"
99 #include "gimple-iterator.h"
100 #include "gimple-fold.h"
101 #include "gimplify.h"
102 #include "gimplify-me.h"
103 #include "tree-cfg.h"
104 #include "tree-into-ssa.h"
105 #include "tree-ssa.h"
106 #include "cfgloop.h"
107 #include "tree-data-ref.h"
108 #include "tree-scalar-evolution.h"
109 #include "tree-ssa-loop.h"
110 #include "tree-ssa-loop-niter.h"
111 #include "tree-ssa-loop-ivopts.h"
112 #include "tree-ssa-address.h"
113 #include "dbgcnt.h"
114 #include "tree-hash-traits.h"
115 #include "varasm.h"
116 #include "builtins.h"
117 #include "cfganal.h"
118 #include "internal-fn.h"
119 #include "fold-const.h"
120 #include "tree-ssa-sccvn.h"
121 #include "tree-cfgcleanup.h"
122 #include "tree-ssa-dse.h"
123 #include "tree-vectorizer.h"
124 #include "tree-eh.h"
126 /* Only handle PHIs with no more arguments unless we are asked to by
127 simd pragma. */
128 #define MAX_PHI_ARG_NUM \
129 ((unsigned) param_max_tree_if_conversion_phi_args)
131 /* True if we've converted a statement that was only executed when some
132 condition C was true, and if for correctness we need to predicate the
133 statement to ensure that it is a no-op when C is false. See
134 predicate_statements for the kinds of predication we support. */
135 static bool need_to_predicate;
137 /* True if we have to rewrite stmts that may invoke undefined behavior
138 when a condition C was false so it doesn't if it is always executed.
139 See predicate_statements for the kinds of predication we support. */
140 static bool need_to_rewrite_undefined;
142 /* Indicate if there are any complicated PHIs that need to be handled in
143 if-conversion. Complicated PHI has more than two arguments and can't
144 be degenerated to two arguments PHI. See more information in comment
145 before phi_convertible_by_degenerating_args. */
146 static bool any_complicated_phi;
148 /* Hash for struct innermost_loop_behavior. It depends on the user to
149 free the memory. */
151 struct innermost_loop_behavior_hash : nofree_ptr_hash <innermost_loop_behavior>
153 static inline hashval_t hash (const value_type &);
154 static inline bool equal (const value_type &,
155 const compare_type &);
158 inline hashval_t
159 innermost_loop_behavior_hash::hash (const value_type &e)
161 hashval_t hash;
163 hash = iterative_hash_expr (e->base_address, 0);
164 hash = iterative_hash_expr (e->offset, hash);
165 hash = iterative_hash_expr (e->init, hash);
166 return iterative_hash_expr (e->step, hash);
169 inline bool
170 innermost_loop_behavior_hash::equal (const value_type &e1,
171 const compare_type &e2)
173 if ((e1->base_address && !e2->base_address)
174 || (!e1->base_address && e2->base_address)
175 || (!e1->offset && e2->offset)
176 || (e1->offset && !e2->offset)
177 || (!e1->init && e2->init)
178 || (e1->init && !e2->init)
179 || (!e1->step && e2->step)
180 || (e1->step && !e2->step))
181 return false;
183 if (e1->base_address && e2->base_address
184 && !operand_equal_p (e1->base_address, e2->base_address, 0))
185 return false;
186 if (e1->offset && e2->offset
187 && !operand_equal_p (e1->offset, e2->offset, 0))
188 return false;
189 if (e1->init && e2->init
190 && !operand_equal_p (e1->init, e2->init, 0))
191 return false;
192 if (e1->step && e2->step
193 && !operand_equal_p (e1->step, e2->step, 0))
194 return false;
196 return true;
199 /* List of basic blocks in if-conversion-suitable order. */
200 static basic_block *ifc_bbs;
202 /* Hash table to store <DR's innermost loop behavior, DR> pairs. */
203 static hash_map<innermost_loop_behavior_hash,
204 data_reference_p> *innermost_DR_map;
206 /* Hash table to store <base reference, DR> pairs. */
207 static hash_map<tree_operand_hash, data_reference_p> *baseref_DR_map;
209 /* List of redundant SSA names: the first should be replaced by the second. */
210 static vec< std::pair<tree, tree> > redundant_ssa_names;
212 /* Structure used to predicate basic blocks. This is attached to the
213 ->aux field of the BBs in the loop to be if-converted. */
214 struct bb_predicate {
216 /* The condition under which this basic block is executed. */
217 tree predicate;
219 /* PREDICATE is gimplified, and the sequence of statements is
220 recorded here, in order to avoid the duplication of computations
221 that occur in previous conditions. See PR44483. */
222 gimple_seq predicate_gimplified_stmts;
225 /* Returns true when the basic block BB has a predicate. */
227 static inline bool
228 bb_has_predicate (basic_block bb)
230 return bb->aux != NULL;
233 /* Returns the gimplified predicate for basic block BB. */
235 static inline tree
236 bb_predicate (basic_block bb)
238 return ((struct bb_predicate *) bb->aux)->predicate;
241 /* Sets the gimplified predicate COND for basic block BB. */
243 static inline void
244 set_bb_predicate (basic_block bb, tree cond)
246 gcc_assert ((TREE_CODE (cond) == TRUTH_NOT_EXPR
247 && is_gimple_val (TREE_OPERAND (cond, 0)))
248 || is_gimple_val (cond));
249 ((struct bb_predicate *) bb->aux)->predicate = cond;
252 /* Returns the sequence of statements of the gimplification of the
253 predicate for basic block BB. */
255 static inline gimple_seq
256 bb_predicate_gimplified_stmts (basic_block bb)
258 return ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts;
261 /* Sets the sequence of statements STMTS of the gimplification of the
262 predicate for basic block BB. */
264 static inline void
265 set_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
267 ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts = stmts;
270 /* Adds the sequence of statements STMTS to the sequence of statements
271 of the predicate for basic block BB. */
273 static inline void
274 add_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
276 /* We might have updated some stmts in STMTS via force_gimple_operand
277 calling fold_stmt and that producing multiple stmts. Delink immediate
278 uses so update_ssa after loop versioning doesn't get confused for
279 the not yet inserted predicates.
280 ??? This should go away once we reliably avoid updating stmts
281 not in any BB. */
282 for (gimple_stmt_iterator gsi = gsi_start (stmts);
283 !gsi_end_p (gsi); gsi_next (&gsi))
285 gimple *stmt = gsi_stmt (gsi);
286 delink_stmt_imm_use (stmt);
287 gimple_set_modified (stmt, true);
289 gimple_seq_add_seq_without_update
290 (&(((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts), stmts);
293 /* Initializes to TRUE the predicate of basic block BB. */
295 static inline void
296 init_bb_predicate (basic_block bb)
298 bb->aux = XNEW (struct bb_predicate);
299 set_bb_predicate_gimplified_stmts (bb, NULL);
300 set_bb_predicate (bb, boolean_true_node);
303 /* Release the SSA_NAMEs associated with the predicate of basic block BB. */
305 static inline void
306 release_bb_predicate (basic_block bb)
308 gimple_seq stmts = bb_predicate_gimplified_stmts (bb);
309 if (stmts)
311 /* Ensure that these stmts haven't yet been added to a bb. */
312 if (flag_checking)
313 for (gimple_stmt_iterator i = gsi_start (stmts);
314 !gsi_end_p (i); gsi_next (&i))
315 gcc_assert (! gimple_bb (gsi_stmt (i)));
317 /* Discard them. */
318 gimple_seq_discard (stmts);
319 set_bb_predicate_gimplified_stmts (bb, NULL);
323 /* Free the predicate of basic block BB. */
325 static inline void
326 free_bb_predicate (basic_block bb)
328 if (!bb_has_predicate (bb))
329 return;
331 release_bb_predicate (bb);
332 free (bb->aux);
333 bb->aux = NULL;
336 /* Reinitialize predicate of BB with the true predicate. */
338 static inline void
339 reset_bb_predicate (basic_block bb)
341 if (!bb_has_predicate (bb))
342 init_bb_predicate (bb);
343 else
345 release_bb_predicate (bb);
346 set_bb_predicate (bb, boolean_true_node);
350 /* Returns a new SSA_NAME of type TYPE that is assigned the value of
351 the expression EXPR. Inserts the statement created for this
352 computation before GSI and leaves the iterator GSI at the same
353 statement. */
355 static tree
356 ifc_temp_var (tree type, tree expr, gimple_stmt_iterator *gsi)
358 tree new_name = make_temp_ssa_name (type, NULL, "_ifc_");
359 gimple *stmt = gimple_build_assign (new_name, expr);
360 gimple_set_vuse (stmt, gimple_vuse (gsi_stmt (*gsi)));
361 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
362 return new_name;
365 /* Return true when COND is a false predicate. */
367 static inline bool
368 is_false_predicate (tree cond)
370 return (cond != NULL_TREE
371 && (cond == boolean_false_node
372 || integer_zerop (cond)));
375 /* Return true when COND is a true predicate. */
377 static inline bool
378 is_true_predicate (tree cond)
380 return (cond == NULL_TREE
381 || cond == boolean_true_node
382 || integer_onep (cond));
385 /* Returns true when BB has a predicate that is not trivial: true or
386 NULL_TREE. */
388 static inline bool
389 is_predicated (basic_block bb)
391 return !is_true_predicate (bb_predicate (bb));
394 /* Parses the predicate COND and returns its comparison code and
395 operands OP0 and OP1. */
397 static enum tree_code
398 parse_predicate (tree cond, tree *op0, tree *op1)
400 gimple *s;
402 if (TREE_CODE (cond) == SSA_NAME
403 && is_gimple_assign (s = SSA_NAME_DEF_STMT (cond)))
405 if (TREE_CODE_CLASS (gimple_assign_rhs_code (s)) == tcc_comparison)
407 *op0 = gimple_assign_rhs1 (s);
408 *op1 = gimple_assign_rhs2 (s);
409 return gimple_assign_rhs_code (s);
412 else if (gimple_assign_rhs_code (s) == TRUTH_NOT_EXPR)
414 tree op = gimple_assign_rhs1 (s);
415 tree type = TREE_TYPE (op);
416 enum tree_code code = parse_predicate (op, op0, op1);
418 return code == ERROR_MARK ? ERROR_MARK
419 : invert_tree_comparison (code, HONOR_NANS (type));
422 return ERROR_MARK;
425 if (COMPARISON_CLASS_P (cond))
427 *op0 = TREE_OPERAND (cond, 0);
428 *op1 = TREE_OPERAND (cond, 1);
429 return TREE_CODE (cond);
432 return ERROR_MARK;
435 /* Returns the fold of predicate C1 OR C2 at location LOC. */
437 static tree
438 fold_or_predicates (location_t loc, tree c1, tree c2)
440 tree op1a, op1b, op2a, op2b;
441 enum tree_code code1 = parse_predicate (c1, &op1a, &op1b);
442 enum tree_code code2 = parse_predicate (c2, &op2a, &op2b);
444 if (code1 != ERROR_MARK && code2 != ERROR_MARK)
446 tree t = maybe_fold_or_comparisons (boolean_type_node, code1, op1a, op1b,
447 code2, op2a, op2b);
448 if (t)
449 return t;
452 return fold_build2_loc (loc, TRUTH_OR_EXPR, boolean_type_node, c1, c2);
455 /* Returns either a COND_EXPR or the folded expression if the folded
456 expression is a MIN_EXPR, a MAX_EXPR, an ABS_EXPR,
457 a constant or a SSA_NAME. */
459 static tree
460 fold_build_cond_expr (tree type, tree cond, tree rhs, tree lhs)
462 /* If COND is comparison r != 0 and r has boolean type, convert COND
463 to SSA_NAME to accept by vect bool pattern. */
464 if (TREE_CODE (cond) == NE_EXPR)
466 tree op0 = TREE_OPERAND (cond, 0);
467 tree op1 = TREE_OPERAND (cond, 1);
468 if (TREE_CODE (op0) == SSA_NAME
469 && TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE
470 && (integer_zerop (op1)))
471 cond = op0;
474 gimple_match_op cexpr (gimple_match_cond::UNCOND, COND_EXPR,
475 type, cond, rhs, lhs);
476 if (cexpr.resimplify (NULL, follow_all_ssa_edges))
478 if (gimple_simplified_result_is_gimple_val (&cexpr))
479 return cexpr.ops[0];
480 else if (cexpr.code == ABS_EXPR)
481 return build1 (ABS_EXPR, type, cexpr.ops[0]);
482 else if (cexpr.code == MIN_EXPR
483 || cexpr.code == MAX_EXPR)
484 return build2 ((tree_code)cexpr.code, type, cexpr.ops[0], cexpr.ops[1]);
487 return build3 (COND_EXPR, type, cond, rhs, lhs);
490 /* Add condition NC to the predicate list of basic block BB. LOOP is
491 the loop to be if-converted. Use predicate of cd-equivalent block
492 for join bb if it exists: we call basic blocks bb1 and bb2
493 cd-equivalent if they are executed under the same condition. */
495 static inline void
496 add_to_predicate_list (class loop *loop, basic_block bb, tree nc)
498 tree bc, *tp;
499 basic_block dom_bb;
501 if (is_true_predicate (nc))
502 return;
504 /* If dominance tells us this basic block is always executed,
505 don't record any predicates for it. */
506 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
507 return;
509 dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb);
510 /* We use notion of cd equivalence to get simpler predicate for
511 join block, e.g. if join block has 2 predecessors with predicates
512 p1 & p2 and p1 & !p2, we'd like to get p1 for it instead of
513 p1 & p2 | p1 & !p2. */
514 if (dom_bb != loop->header
515 && get_immediate_dominator (CDI_POST_DOMINATORS, dom_bb) == bb)
517 gcc_assert (flow_bb_inside_loop_p (loop, dom_bb));
518 bc = bb_predicate (dom_bb);
519 if (!is_true_predicate (bc))
520 set_bb_predicate (bb, bc);
521 else
522 gcc_assert (is_true_predicate (bb_predicate (bb)));
523 if (dump_file && (dump_flags & TDF_DETAILS))
524 fprintf (dump_file, "Use predicate of bb#%d for bb#%d\n",
525 dom_bb->index, bb->index);
526 return;
529 if (!is_predicated (bb))
530 bc = nc;
531 else
533 bc = bb_predicate (bb);
534 bc = fold_or_predicates (EXPR_LOCATION (bc), nc, bc);
535 if (is_true_predicate (bc))
537 reset_bb_predicate (bb);
538 return;
542 /* Allow a TRUTH_NOT_EXPR around the main predicate. */
543 if (TREE_CODE (bc) == TRUTH_NOT_EXPR)
544 tp = &TREE_OPERAND (bc, 0);
545 else
546 tp = &bc;
547 if (!is_gimple_val (*tp))
549 gimple_seq stmts;
550 *tp = force_gimple_operand (*tp, &stmts, true, NULL_TREE);
551 add_bb_predicate_gimplified_stmts (bb, stmts);
553 set_bb_predicate (bb, bc);
556 /* Add the condition COND to the previous condition PREV_COND, and add
557 this to the predicate list of the destination of edge E. LOOP is
558 the loop to be if-converted. */
560 static void
561 add_to_dst_predicate_list (class loop *loop, edge e,
562 tree prev_cond, tree cond)
564 if (!flow_bb_inside_loop_p (loop, e->dest))
565 return;
567 if (!is_true_predicate (prev_cond))
568 cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
569 prev_cond, cond);
571 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, e->dest))
572 add_to_predicate_list (loop, e->dest, cond);
575 /* Return true if one of the successor edges of BB exits LOOP. */
577 static bool
578 bb_with_exit_edge_p (class loop *loop, basic_block bb)
580 edge e;
581 edge_iterator ei;
583 FOR_EACH_EDGE (e, ei, bb->succs)
584 if (loop_exit_edge_p (loop, e))
585 return true;
587 return false;
590 /* Given PHI which has more than two arguments, this function checks if
591 it's if-convertible by degenerating its arguments. Specifically, if
592 below two conditions are satisfied:
594 1) Number of PHI arguments with different values equals to 2 and one
595 argument has the only occurrence.
596 2) The edge corresponding to the unique argument isn't critical edge.
598 Such PHI can be handled as PHIs have only two arguments. For example,
599 below PHI:
601 res = PHI <A_1(e1), A_1(e2), A_2(e3)>;
603 can be transformed into:
605 res = (predicate of e3) ? A_2 : A_1;
607 Return TRUE if it is the case, FALSE otherwise. */
609 static bool
610 phi_convertible_by_degenerating_args (gphi *phi)
612 edge e;
613 tree arg, t1 = NULL, t2 = NULL;
614 unsigned int i, i1 = 0, i2 = 0, n1 = 0, n2 = 0;
615 unsigned int num_args = gimple_phi_num_args (phi);
617 gcc_assert (num_args > 2);
619 for (i = 0; i < num_args; i++)
621 arg = gimple_phi_arg_def (phi, i);
622 if (t1 == NULL || operand_equal_p (t1, arg, 0))
624 n1++;
625 i1 = i;
626 t1 = arg;
628 else if (t2 == NULL || operand_equal_p (t2, arg, 0))
630 n2++;
631 i2 = i;
632 t2 = arg;
634 else
635 return false;
638 if (n1 != 1 && n2 != 1)
639 return false;
641 /* Check if the edge corresponding to the unique arg is critical. */
642 e = gimple_phi_arg_edge (phi, (n1 == 1) ? i1 : i2);
643 if (EDGE_COUNT (e->src->succs) > 1)
644 return false;
646 return true;
649 /* Return true when PHI is if-convertible. PHI is part of loop LOOP
650 and it belongs to basic block BB. Note at this point, it is sure
651 that PHI is if-convertible. This function updates global variable
652 ANY_COMPLICATED_PHI if PHI is complicated. */
654 static bool
655 if_convertible_phi_p (class loop *loop, basic_block bb, gphi *phi)
657 if (dump_file && (dump_flags & TDF_DETAILS))
659 fprintf (dump_file, "-------------------------\n");
660 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
663 if (bb != loop->header
664 && gimple_phi_num_args (phi) > 2
665 && !phi_convertible_by_degenerating_args (phi))
666 any_complicated_phi = true;
668 return true;
671 /* Records the status of a data reference. This struct is attached to
672 each DR->aux field. */
674 struct ifc_dr {
675 bool rw_unconditionally;
676 bool w_unconditionally;
677 bool written_at_least_once;
679 tree rw_predicate;
680 tree w_predicate;
681 tree base_w_predicate;
684 #define IFC_DR(DR) ((struct ifc_dr *) (DR)->aux)
685 #define DR_BASE_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->written_at_least_once)
686 #define DR_RW_UNCONDITIONALLY(DR) (IFC_DR (DR)->rw_unconditionally)
687 #define DR_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->w_unconditionally)
689 /* Iterates over DR's and stores refs, DR and base refs, DR pairs in
690 HASH tables. While storing them in HASH table, it checks if the
691 reference is unconditionally read or written and stores that as a flag
692 information. For base reference it checks if it is written atlest once
693 unconditionally and stores it as flag information along with DR.
694 In other words for every data reference A in STMT there exist other
695 accesses to a data reference with the same base with predicates that
696 add up (OR-up) to the true predicate: this ensures that the data
697 reference A is touched (read or written) on every iteration of the
698 if-converted loop. */
699 static void
700 hash_memrefs_baserefs_and_store_DRs_read_written_info (data_reference_p a)
703 data_reference_p *master_dr, *base_master_dr;
704 tree base_ref = DR_BASE_OBJECT (a);
705 innermost_loop_behavior *innermost = &DR_INNERMOST (a);
706 tree ca = bb_predicate (gimple_bb (DR_STMT (a)));
707 bool exist1, exist2;
709 master_dr = &innermost_DR_map->get_or_insert (innermost, &exist1);
710 if (!exist1)
711 *master_dr = a;
713 if (DR_IS_WRITE (a))
715 IFC_DR (*master_dr)->w_predicate
716 = fold_or_predicates (UNKNOWN_LOCATION, ca,
717 IFC_DR (*master_dr)->w_predicate);
718 if (is_true_predicate (IFC_DR (*master_dr)->w_predicate))
719 DR_W_UNCONDITIONALLY (*master_dr) = true;
721 IFC_DR (*master_dr)->rw_predicate
722 = fold_or_predicates (UNKNOWN_LOCATION, ca,
723 IFC_DR (*master_dr)->rw_predicate);
724 if (is_true_predicate (IFC_DR (*master_dr)->rw_predicate))
725 DR_RW_UNCONDITIONALLY (*master_dr) = true;
727 if (DR_IS_WRITE (a))
729 base_master_dr = &baseref_DR_map->get_or_insert (base_ref, &exist2);
730 if (!exist2)
731 *base_master_dr = a;
732 IFC_DR (*base_master_dr)->base_w_predicate
733 = fold_or_predicates (UNKNOWN_LOCATION, ca,
734 IFC_DR (*base_master_dr)->base_w_predicate);
735 if (is_true_predicate (IFC_DR (*base_master_dr)->base_w_predicate))
736 DR_BASE_W_UNCONDITIONALLY (*base_master_dr) = true;
740 /* Return TRUE if can prove the index IDX of an array reference REF is
741 within array bound. Return false otherwise. */
743 static bool
744 idx_within_array_bound (tree ref, tree *idx, void *dta)
746 wi::overflow_type overflow;
747 widest_int niter, valid_niter, delta, wi_step;
748 tree ev, init, step;
749 tree low, high;
750 class loop *loop = (class loop*) dta;
752 /* Only support within-bound access for array references. */
753 if (TREE_CODE (ref) != ARRAY_REF)
754 return false;
756 /* For arrays at the end of the structure, we are not guaranteed that they
757 do not really extend over their declared size. However, for arrays of
758 size greater than one, this is unlikely to be intended. */
759 if (array_at_struct_end_p (ref))
760 return false;
762 ev = analyze_scalar_evolution (loop, *idx);
763 ev = instantiate_parameters (loop, ev);
764 init = initial_condition (ev);
765 step = evolution_part_in_loop_num (ev, loop->num);
767 if (!init || TREE_CODE (init) != INTEGER_CST
768 || (step && TREE_CODE (step) != INTEGER_CST))
769 return false;
771 low = array_ref_low_bound (ref);
772 high = array_ref_up_bound (ref);
774 /* The case of nonconstant bounds could be handled, but it would be
775 complicated. */
776 if (TREE_CODE (low) != INTEGER_CST
777 || !high || TREE_CODE (high) != INTEGER_CST)
778 return false;
780 /* Check if the intial idx is within bound. */
781 if (wi::to_widest (init) < wi::to_widest (low)
782 || wi::to_widest (init) > wi::to_widest (high))
783 return false;
785 /* The idx is always within bound. */
786 if (!step || integer_zerop (step))
787 return true;
789 if (!max_loop_iterations (loop, &niter))
790 return false;
792 if (wi::to_widest (step) < 0)
794 delta = wi::to_widest (init) - wi::to_widest (low);
795 wi_step = -wi::to_widest (step);
797 else
799 delta = wi::to_widest (high) - wi::to_widest (init);
800 wi_step = wi::to_widest (step);
803 valid_niter = wi::div_floor (delta, wi_step, SIGNED, &overflow);
804 /* The iteration space of idx is within array bound. */
805 if (!overflow && niter <= valid_niter)
806 return true;
808 return false;
811 /* Return TRUE if ref is a within bound array reference. */
813 static bool
814 ref_within_array_bound (gimple *stmt, tree ref)
816 class loop *loop = loop_containing_stmt (stmt);
818 gcc_assert (loop != NULL);
819 return for_each_index (&ref, idx_within_array_bound, loop);
823 /* Given a memory reference expression T, return TRUE if base object
824 it refers to is writable. The base object of a memory reference
825 is the main object being referenced, which is returned by function
826 get_base_address. */
828 static bool
829 base_object_writable (tree ref)
831 tree base_tree = get_base_address (ref);
833 return (base_tree
834 && DECL_P (base_tree)
835 && decl_binds_to_current_def_p (base_tree)
836 && !TREE_READONLY (base_tree));
839 /* Return true when the memory references of STMT won't trap in the
840 if-converted code. There are two things that we have to check for:
842 - writes to memory occur to writable memory: if-conversion of
843 memory writes transforms the conditional memory writes into
844 unconditional writes, i.e. "if (cond) A[i] = foo" is transformed
845 into "A[i] = cond ? foo : A[i]", and as the write to memory may not
846 be executed at all in the original code, it may be a readonly
847 memory. To check that A is not const-qualified, we check that
848 there exists at least an unconditional write to A in the current
849 function.
851 - reads or writes to memory are valid memory accesses for every
852 iteration. To check that the memory accesses are correctly formed
853 and that we are allowed to read and write in these locations, we
854 check that the memory accesses to be if-converted occur at every
855 iteration unconditionally.
857 Returns true for the memory reference in STMT, same memory reference
858 is read or written unconditionally atleast once and the base memory
859 reference is written unconditionally once. This is to check reference
860 will not write fault. Also retuns true if the memory reference is
861 unconditionally read once then we are conditionally writing to memory
862 which is defined as read and write and is bound to the definition
863 we are seeing. */
864 static bool
865 ifcvt_memrefs_wont_trap (gimple *stmt, vec<data_reference_p> drs)
867 /* If DR didn't see a reference here we can't use it to tell
868 whether the ref traps or not. */
869 if (gimple_uid (stmt) == 0)
870 return false;
872 data_reference_p *master_dr, *base_master_dr;
873 data_reference_p a = drs[gimple_uid (stmt) - 1];
875 tree base = DR_BASE_OBJECT (a);
876 innermost_loop_behavior *innermost = &DR_INNERMOST (a);
878 gcc_assert (DR_STMT (a) == stmt);
879 gcc_assert (DR_BASE_ADDRESS (a) || DR_OFFSET (a)
880 || DR_INIT (a) || DR_STEP (a));
882 master_dr = innermost_DR_map->get (innermost);
883 gcc_assert (master_dr != NULL);
885 base_master_dr = baseref_DR_map->get (base);
887 /* If a is unconditionally written to it doesn't trap. */
888 if (DR_W_UNCONDITIONALLY (*master_dr))
889 return true;
891 /* If a is unconditionally accessed then ...
893 Even a is conditional access, we can treat it as an unconditional
894 one if it's an array reference and all its index are within array
895 bound. */
896 if (DR_RW_UNCONDITIONALLY (*master_dr)
897 || ref_within_array_bound (stmt, DR_REF (a)))
899 /* an unconditional read won't trap. */
900 if (DR_IS_READ (a))
901 return true;
903 /* an unconditionaly write won't trap if the base is written
904 to unconditionally. */
905 if (base_master_dr
906 && DR_BASE_W_UNCONDITIONALLY (*base_master_dr))
907 return flag_store_data_races;
908 /* or the base is known to be not readonly. */
909 else if (base_object_writable (DR_REF (a)))
910 return flag_store_data_races;
913 return false;
916 /* Return true if STMT could be converted into a masked load or store
917 (conditional load or store based on a mask computed from bb predicate). */
919 static bool
920 ifcvt_can_use_mask_load_store (gimple *stmt)
922 /* Check whether this is a load or store. */
923 tree lhs = gimple_assign_lhs (stmt);
924 bool is_load;
925 tree ref;
926 if (gimple_store_p (stmt))
928 if (!is_gimple_val (gimple_assign_rhs1 (stmt)))
929 return false;
930 is_load = false;
931 ref = lhs;
933 else if (gimple_assign_load_p (stmt))
935 is_load = true;
936 ref = gimple_assign_rhs1 (stmt);
938 else
939 return false;
941 if (may_be_nonaddressable_p (ref))
942 return false;
944 /* Mask should be integer mode of the same size as the load/store
945 mode. */
946 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
947 if (!int_mode_for_mode (mode).exists () || VECTOR_MODE_P (mode))
948 return false;
950 if (can_vec_mask_load_store_p (mode, VOIDmode, is_load))
951 return true;
953 return false;
956 /* Return true if STMT could be converted from an operation that is
957 unconditional to one that is conditional on a bb predicate mask. */
959 static bool
960 ifcvt_can_predicate (gimple *stmt)
962 basic_block bb = gimple_bb (stmt);
964 if (!(flag_tree_loop_vectorize || bb->loop_father->force_vectorize)
965 || bb->loop_father->dont_vectorize
966 || gimple_has_volatile_ops (stmt))
967 return false;
969 if (gimple_assign_single_p (stmt))
970 return ifcvt_can_use_mask_load_store (stmt);
972 tree_code code = gimple_assign_rhs_code (stmt);
973 tree lhs_type = TREE_TYPE (gimple_assign_lhs (stmt));
974 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
975 if (!types_compatible_p (lhs_type, rhs_type))
976 return false;
977 internal_fn cond_fn = get_conditional_internal_fn (code);
978 return (cond_fn != IFN_LAST
979 && vectorized_internal_fn_supported_p (cond_fn, lhs_type));
982 /* Return true when STMT is if-convertible.
984 GIMPLE_ASSIGN statement is not if-convertible if,
985 - it is not movable,
986 - it could trap,
987 - LHS is not var decl. */
989 static bool
990 if_convertible_gimple_assign_stmt_p (gimple *stmt,
991 vec<data_reference_p> refs)
993 tree lhs = gimple_assign_lhs (stmt);
995 if (dump_file && (dump_flags & TDF_DETAILS))
997 fprintf (dump_file, "-------------------------\n");
998 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1001 if (!is_gimple_reg_type (TREE_TYPE (lhs)))
1002 return false;
1004 /* Some of these constrains might be too conservative. */
1005 if (stmt_ends_bb_p (stmt)
1006 || gimple_has_volatile_ops (stmt)
1007 || (TREE_CODE (lhs) == SSA_NAME
1008 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
1009 || gimple_has_side_effects (stmt))
1011 if (dump_file && (dump_flags & TDF_DETAILS))
1012 fprintf (dump_file, "stmt not suitable for ifcvt\n");
1013 return false;
1016 /* tree-into-ssa.cc uses GF_PLF_1, so avoid it, because
1017 in between if_convertible_loop_p and combine_blocks
1018 we can perform loop versioning. */
1019 gimple_set_plf (stmt, GF_PLF_2, false);
1021 if ((! gimple_vuse (stmt)
1022 || gimple_could_trap_p_1 (stmt, false, false)
1023 || ! ifcvt_memrefs_wont_trap (stmt, refs))
1024 && gimple_could_trap_p (stmt))
1026 if (ifcvt_can_predicate (stmt))
1028 gimple_set_plf (stmt, GF_PLF_2, true);
1029 need_to_predicate = true;
1030 return true;
1032 if (dump_file && (dump_flags & TDF_DETAILS))
1033 fprintf (dump_file, "tree could trap...\n");
1034 return false;
1036 else if ((INTEGRAL_TYPE_P (TREE_TYPE (lhs))
1037 || POINTER_TYPE_P (TREE_TYPE (lhs)))
1038 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs))
1039 && arith_code_with_undefined_signed_overflow
1040 (gimple_assign_rhs_code (stmt)))
1041 /* We have to rewrite stmts with undefined overflow. */
1042 need_to_rewrite_undefined = true;
1044 /* When if-converting stores force versioning, likewise if we
1045 ended up generating store data races. */
1046 if (gimple_vdef (stmt))
1047 need_to_predicate = true;
1049 return true;
1052 /* Return true when STMT is if-convertible.
1054 A statement is if-convertible if:
1055 - it is an if-convertible GIMPLE_ASSIGN,
1056 - it is a GIMPLE_LABEL or a GIMPLE_COND,
1057 - it is builtins call. */
1059 static bool
1060 if_convertible_stmt_p (gimple *stmt, vec<data_reference_p> refs)
1062 switch (gimple_code (stmt))
1064 case GIMPLE_LABEL:
1065 case GIMPLE_DEBUG:
1066 case GIMPLE_COND:
1067 return true;
1069 case GIMPLE_ASSIGN:
1070 return if_convertible_gimple_assign_stmt_p (stmt, refs);
1072 case GIMPLE_CALL:
1074 tree fndecl = gimple_call_fndecl (stmt);
1075 if (fndecl)
1077 int flags = gimple_call_flags (stmt);
1078 if ((flags & ECF_CONST)
1079 && !(flags & ECF_LOOPING_CONST_OR_PURE)
1080 /* We can only vectorize some builtins at the moment,
1081 so restrict if-conversion to those. */
1082 && fndecl_built_in_p (fndecl))
1083 return true;
1085 return false;
1088 default:
1089 /* Don't know what to do with 'em so don't do anything. */
1090 if (dump_file && (dump_flags & TDF_DETAILS))
1092 fprintf (dump_file, "don't know what to do\n");
1093 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1095 return false;
1099 /* Assumes that BB has more than 1 predecessors.
1100 Returns false if at least one successor is not on critical edge
1101 and true otherwise. */
1103 static inline bool
1104 all_preds_critical_p (basic_block bb)
1106 edge e;
1107 edge_iterator ei;
1109 FOR_EACH_EDGE (e, ei, bb->preds)
1110 if (EDGE_COUNT (e->src->succs) == 1)
1111 return false;
1112 return true;
1115 /* Return true when BB is if-convertible. This routine does not check
1116 basic block's statements and phis.
1118 A basic block is not if-convertible if:
1119 - it is non-empty and it is after the exit block (in BFS order),
1120 - it is after the exit block but before the latch,
1121 - its edges are not normal.
1123 EXIT_BB is the basic block containing the exit of the LOOP. BB is
1124 inside LOOP. */
1126 static bool
1127 if_convertible_bb_p (class loop *loop, basic_block bb, basic_block exit_bb)
1129 edge e;
1130 edge_iterator ei;
1132 if (dump_file && (dump_flags & TDF_DETAILS))
1133 fprintf (dump_file, "----------[%d]-------------\n", bb->index);
1135 if (EDGE_COUNT (bb->succs) > 2)
1136 return false;
1138 gimple *last = last_stmt (bb);
1139 if (gcall *call = safe_dyn_cast <gcall *> (last))
1140 if (gimple_call_ctrl_altering_p (call))
1141 return false;
1143 if (exit_bb)
1145 if (bb != loop->latch)
1147 if (dump_file && (dump_flags & TDF_DETAILS))
1148 fprintf (dump_file, "basic block after exit bb but before latch\n");
1149 return false;
1151 else if (!empty_block_p (bb))
1153 if (dump_file && (dump_flags & TDF_DETAILS))
1154 fprintf (dump_file, "non empty basic block after exit bb\n");
1155 return false;
1157 else if (bb == loop->latch
1158 && bb != exit_bb
1159 && !dominated_by_p (CDI_DOMINATORS, bb, exit_bb))
1161 if (dump_file && (dump_flags & TDF_DETAILS))
1162 fprintf (dump_file, "latch is not dominated by exit_block\n");
1163 return false;
1167 /* Be less adventurous and handle only normal edges. */
1168 FOR_EACH_EDGE (e, ei, bb->succs)
1169 if (e->flags & (EDGE_EH | EDGE_ABNORMAL | EDGE_IRREDUCIBLE_LOOP))
1171 if (dump_file && (dump_flags & TDF_DETAILS))
1172 fprintf (dump_file, "Difficult to handle edges\n");
1173 return false;
1176 return true;
1179 /* Return true when all predecessor blocks of BB are visited. The
1180 VISITED bitmap keeps track of the visited blocks. */
1182 static bool
1183 pred_blocks_visited_p (basic_block bb, bitmap *visited)
1185 edge e;
1186 edge_iterator ei;
1187 FOR_EACH_EDGE (e, ei, bb->preds)
1188 if (!bitmap_bit_p (*visited, e->src->index))
1189 return false;
1191 return true;
1194 /* Get body of a LOOP in suitable order for if-conversion. It is
1195 caller's responsibility to deallocate basic block list.
1196 If-conversion suitable order is, breadth first sort (BFS) order
1197 with an additional constraint: select a block only if all its
1198 predecessors are already selected. */
1200 static basic_block *
1201 get_loop_body_in_if_conv_order (const class loop *loop)
1203 basic_block *blocks, *blocks_in_bfs_order;
1204 basic_block bb;
1205 bitmap visited;
1206 unsigned int index = 0;
1207 unsigned int visited_count = 0;
1209 gcc_assert (loop->num_nodes);
1210 gcc_assert (loop->latch != EXIT_BLOCK_PTR_FOR_FN (cfun));
1212 blocks = XCNEWVEC (basic_block, loop->num_nodes);
1213 visited = BITMAP_ALLOC (NULL);
1215 blocks_in_bfs_order = get_loop_body_in_bfs_order (loop);
1217 index = 0;
1218 while (index < loop->num_nodes)
1220 bb = blocks_in_bfs_order [index];
1222 if (bb->flags & BB_IRREDUCIBLE_LOOP)
1224 free (blocks_in_bfs_order);
1225 BITMAP_FREE (visited);
1226 free (blocks);
1227 return NULL;
1230 if (!bitmap_bit_p (visited, bb->index))
1232 if (pred_blocks_visited_p (bb, &visited)
1233 || bb == loop->header)
1235 /* This block is now visited. */
1236 bitmap_set_bit (visited, bb->index);
1237 blocks[visited_count++] = bb;
1241 index++;
1243 if (index == loop->num_nodes
1244 && visited_count != loop->num_nodes)
1245 /* Not done yet. */
1246 index = 0;
1248 free (blocks_in_bfs_order);
1249 BITMAP_FREE (visited);
1250 return blocks;
1253 /* Returns true when the analysis of the predicates for all the basic
1254 blocks in LOOP succeeded.
1256 predicate_bbs first allocates the predicates of the basic blocks.
1257 These fields are then initialized with the tree expressions
1258 representing the predicates under which a basic block is executed
1259 in the LOOP. As the loop->header is executed at each iteration, it
1260 has the "true" predicate. Other statements executed under a
1261 condition are predicated with that condition, for example
1263 | if (x)
1264 | S1;
1265 | else
1266 | S2;
1268 S1 will be predicated with "x", and
1269 S2 will be predicated with "!x". */
1271 static void
1272 predicate_bbs (loop_p loop)
1274 unsigned int i;
1276 for (i = 0; i < loop->num_nodes; i++)
1277 init_bb_predicate (ifc_bbs[i]);
1279 for (i = 0; i < loop->num_nodes; i++)
1281 basic_block bb = ifc_bbs[i];
1282 tree cond;
1283 gimple *stmt;
1285 /* The loop latch and loop exit block are always executed and
1286 have no extra conditions to be processed: skip them. */
1287 if (bb == loop->latch
1288 || bb_with_exit_edge_p (loop, bb))
1290 reset_bb_predicate (bb);
1291 continue;
1294 cond = bb_predicate (bb);
1295 stmt = last_stmt (bb);
1296 if (stmt && gimple_code (stmt) == GIMPLE_COND)
1298 tree c2;
1299 edge true_edge, false_edge;
1300 location_t loc = gimple_location (stmt);
1301 tree c;
1302 /* gcc.dg/fold-bopcond-1.c shows that despite all forwprop passes
1303 conditions can remain unfolded because of multiple uses so
1304 try to re-fold here, especially to get precision changing
1305 conversions sorted out. Do not simply fold the stmt since
1306 this is analysis only. When conditions were embedded in
1307 COND_EXPRs those were folded separately before folding the
1308 COND_EXPR but as they are now outside we have to make sure
1309 to fold them. Do it here - another opportunity would be to
1310 fold predicates as they are inserted. */
1311 gimple_match_op cexpr (gimple_match_cond::UNCOND,
1312 gimple_cond_code (stmt),
1313 boolean_type_node,
1314 gimple_cond_lhs (stmt),
1315 gimple_cond_rhs (stmt));
1316 if (cexpr.resimplify (NULL, follow_all_ssa_edges)
1317 && cexpr.code.is_tree_code ()
1318 && TREE_CODE_CLASS ((tree_code)cexpr.code) == tcc_comparison)
1319 c = build2_loc (loc, (tree_code)cexpr.code, boolean_type_node,
1320 cexpr.ops[0], cexpr.ops[1]);
1321 else
1322 c = build2_loc (loc, gimple_cond_code (stmt),
1323 boolean_type_node,
1324 gimple_cond_lhs (stmt),
1325 gimple_cond_rhs (stmt));
1327 /* Add new condition into destination's predicate list. */
1328 extract_true_false_edges_from_block (gimple_bb (stmt),
1329 &true_edge, &false_edge);
1331 /* If C is true, then TRUE_EDGE is taken. */
1332 add_to_dst_predicate_list (loop, true_edge, unshare_expr (cond),
1333 unshare_expr (c));
1335 /* If C is false, then FALSE_EDGE is taken. */
1336 c2 = build1_loc (loc, TRUTH_NOT_EXPR, boolean_type_node,
1337 unshare_expr (c));
1338 add_to_dst_predicate_list (loop, false_edge,
1339 unshare_expr (cond), c2);
1341 cond = NULL_TREE;
1344 /* If current bb has only one successor, then consider it as an
1345 unconditional goto. */
1346 if (single_succ_p (bb))
1348 basic_block bb_n = single_succ (bb);
1350 /* The successor bb inherits the predicate of its
1351 predecessor. If there is no predicate in the predecessor
1352 bb, then consider the successor bb as always executed. */
1353 if (cond == NULL_TREE)
1354 cond = boolean_true_node;
1356 add_to_predicate_list (loop, bb_n, cond);
1360 /* The loop header is always executed. */
1361 reset_bb_predicate (loop->header);
1362 gcc_assert (bb_predicate_gimplified_stmts (loop->header) == NULL
1363 && bb_predicate_gimplified_stmts (loop->latch) == NULL);
1366 /* Build region by adding loop pre-header and post-header blocks. */
1368 static vec<basic_block>
1369 build_region (class loop *loop)
1371 vec<basic_block> region = vNULL;
1372 basic_block exit_bb = NULL;
1374 gcc_assert (ifc_bbs);
1375 /* The first element is loop pre-header. */
1376 region.safe_push (loop_preheader_edge (loop)->src);
1378 for (unsigned int i = 0; i < loop->num_nodes; i++)
1380 basic_block bb = ifc_bbs[i];
1381 region.safe_push (bb);
1382 /* Find loop postheader. */
1383 edge e;
1384 edge_iterator ei;
1385 FOR_EACH_EDGE (e, ei, bb->succs)
1386 if (loop_exit_edge_p (loop, e))
1388 exit_bb = e->dest;
1389 break;
1392 /* The last element is loop post-header. */
1393 gcc_assert (exit_bb);
1394 region.safe_push (exit_bb);
1395 return region;
1398 /* Return true when LOOP is if-convertible. This is a helper function
1399 for if_convertible_loop_p. REFS and DDRS are initialized and freed
1400 in if_convertible_loop_p. */
1402 static bool
1403 if_convertible_loop_p_1 (class loop *loop, vec<data_reference_p> *refs)
1405 unsigned int i;
1406 basic_block exit_bb = NULL;
1407 vec<basic_block> region;
1409 if (find_data_references_in_loop (loop, refs) == chrec_dont_know)
1410 return false;
1412 calculate_dominance_info (CDI_DOMINATORS);
1414 /* Allow statements that can be handled during if-conversion. */
1415 ifc_bbs = get_loop_body_in_if_conv_order (loop);
1416 if (!ifc_bbs)
1418 if (dump_file && (dump_flags & TDF_DETAILS))
1419 fprintf (dump_file, "Irreducible loop\n");
1420 return false;
1423 for (i = 0; i < loop->num_nodes; i++)
1425 basic_block bb = ifc_bbs[i];
1427 if (!if_convertible_bb_p (loop, bb, exit_bb))
1428 return false;
1430 if (bb_with_exit_edge_p (loop, bb))
1431 exit_bb = bb;
1434 for (i = 0; i < loop->num_nodes; i++)
1436 basic_block bb = ifc_bbs[i];
1437 gimple_stmt_iterator gsi;
1439 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1440 switch (gimple_code (gsi_stmt (gsi)))
1442 case GIMPLE_LABEL:
1443 case GIMPLE_ASSIGN:
1444 case GIMPLE_CALL:
1445 case GIMPLE_DEBUG:
1446 case GIMPLE_COND:
1447 gimple_set_uid (gsi_stmt (gsi), 0);
1448 break;
1449 default:
1450 return false;
1454 data_reference_p dr;
1456 innermost_DR_map
1457 = new hash_map<innermost_loop_behavior_hash, data_reference_p>;
1458 baseref_DR_map = new hash_map<tree_operand_hash, data_reference_p>;
1460 /* Compute post-dominator tree locally. */
1461 region = build_region (loop);
1462 calculate_dominance_info_for_region (CDI_POST_DOMINATORS, region);
1464 predicate_bbs (loop);
1466 /* Free post-dominator tree since it is not used after predication. */
1467 free_dominance_info_for_region (cfun, CDI_POST_DOMINATORS, region);
1468 region.release ();
1470 for (i = 0; refs->iterate (i, &dr); i++)
1472 tree ref = DR_REF (dr);
1474 dr->aux = XNEW (struct ifc_dr);
1475 DR_BASE_W_UNCONDITIONALLY (dr) = false;
1476 DR_RW_UNCONDITIONALLY (dr) = false;
1477 DR_W_UNCONDITIONALLY (dr) = false;
1478 IFC_DR (dr)->rw_predicate = boolean_false_node;
1479 IFC_DR (dr)->w_predicate = boolean_false_node;
1480 IFC_DR (dr)->base_w_predicate = boolean_false_node;
1481 if (gimple_uid (DR_STMT (dr)) == 0)
1482 gimple_set_uid (DR_STMT (dr), i + 1);
1484 /* If DR doesn't have innermost loop behavior or it's a compound
1485 memory reference, we synthesize its innermost loop behavior
1486 for hashing. */
1487 if (TREE_CODE (ref) == COMPONENT_REF
1488 || TREE_CODE (ref) == IMAGPART_EXPR
1489 || TREE_CODE (ref) == REALPART_EXPR
1490 || !(DR_BASE_ADDRESS (dr) || DR_OFFSET (dr)
1491 || DR_INIT (dr) || DR_STEP (dr)))
1493 while (TREE_CODE (ref) == COMPONENT_REF
1494 || TREE_CODE (ref) == IMAGPART_EXPR
1495 || TREE_CODE (ref) == REALPART_EXPR)
1496 ref = TREE_OPERAND (ref, 0);
1498 memset (&DR_INNERMOST (dr), 0, sizeof (DR_INNERMOST (dr)));
1499 DR_BASE_ADDRESS (dr) = ref;
1501 hash_memrefs_baserefs_and_store_DRs_read_written_info (dr);
1504 for (i = 0; i < loop->num_nodes; i++)
1506 basic_block bb = ifc_bbs[i];
1507 gimple_stmt_iterator itr;
1509 /* Check the if-convertibility of statements in predicated BBs. */
1510 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1511 for (itr = gsi_start_bb (bb); !gsi_end_p (itr); gsi_next (&itr))
1512 if (!if_convertible_stmt_p (gsi_stmt (itr), *refs))
1513 return false;
1516 /* Checking PHIs needs to be done after stmts, as the fact whether there
1517 are any masked loads or stores affects the tests. */
1518 for (i = 0; i < loop->num_nodes; i++)
1520 basic_block bb = ifc_bbs[i];
1521 gphi_iterator itr;
1523 for (itr = gsi_start_phis (bb); !gsi_end_p (itr); gsi_next (&itr))
1524 if (!if_convertible_phi_p (loop, bb, itr.phi ()))
1525 return false;
1528 if (dump_file)
1529 fprintf (dump_file, "Applying if-conversion\n");
1531 return true;
1534 /* Return true when LOOP is if-convertible.
1535 LOOP is if-convertible if:
1536 - it is innermost,
1537 - it has two or more basic blocks,
1538 - it has only one exit,
1539 - loop header is not the exit edge,
1540 - if its basic blocks and phi nodes are if convertible. */
1542 static bool
1543 if_convertible_loop_p (class loop *loop)
1545 edge e;
1546 edge_iterator ei;
1547 bool res = false;
1548 vec<data_reference_p> refs;
1550 /* Handle only innermost loop. */
1551 if (!loop || loop->inner)
1553 if (dump_file && (dump_flags & TDF_DETAILS))
1554 fprintf (dump_file, "not innermost loop\n");
1555 return false;
1558 /* If only one block, no need for if-conversion. */
1559 if (loop->num_nodes <= 2)
1561 if (dump_file && (dump_flags & TDF_DETAILS))
1562 fprintf (dump_file, "less than 2 basic blocks\n");
1563 return false;
1566 /* More than one loop exit is too much to handle. */
1567 if (!single_exit (loop))
1569 if (dump_file && (dump_flags & TDF_DETAILS))
1570 fprintf (dump_file, "multiple exits\n");
1571 return false;
1574 /* If one of the loop header's edge is an exit edge then do not
1575 apply if-conversion. */
1576 FOR_EACH_EDGE (e, ei, loop->header->succs)
1577 if (loop_exit_edge_p (loop, e))
1578 return false;
1580 refs.create (5);
1581 res = if_convertible_loop_p_1 (loop, &refs);
1583 data_reference_p dr;
1584 unsigned int i;
1585 for (i = 0; refs.iterate (i, &dr); i++)
1586 free (dr->aux);
1588 free_data_refs (refs);
1590 delete innermost_DR_map;
1591 innermost_DR_map = NULL;
1593 delete baseref_DR_map;
1594 baseref_DR_map = NULL;
1596 return res;
1599 /* Return reduc_1 if has_nop.
1601 if (...)
1602 tmp1 = (unsigned type) reduc_1;
1603 tmp2 = tmp1 + rhs2;
1604 reduc_3 = (signed type) tmp2. */
1605 static tree
1606 strip_nop_cond_scalar_reduction (bool has_nop, tree op)
1608 if (!has_nop)
1609 return op;
1611 if (TREE_CODE (op) != SSA_NAME)
1612 return NULL_TREE;
1614 gassign *stmt = safe_dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op));
1615 if (!stmt
1616 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt))
1617 || !tree_nop_conversion_p (TREE_TYPE (op), TREE_TYPE
1618 (gimple_assign_rhs1 (stmt))))
1619 return NULL_TREE;
1621 return gimple_assign_rhs1 (stmt);
1624 /* Returns true if def-stmt for phi argument ARG is simple increment/decrement
1625 which is in predicated basic block.
1626 In fact, the following PHI pattern is searching:
1627 loop-header:
1628 reduc_1 = PHI <..., reduc_2>
1630 if (...)
1631 reduc_3 = ...
1632 reduc_2 = PHI <reduc_1, reduc_3>
1634 ARG_0 and ARG_1 are correspondent PHI arguments.
1635 REDUC, OP0 and OP1 contain reduction stmt and its operands.
1636 EXTENDED is true if PHI has > 2 arguments. */
1638 static bool
1639 is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1,
1640 tree *op0, tree *op1, bool extended, bool* has_nop,
1641 gimple **nop_reduc)
1643 tree lhs, r_op1, r_op2, r_nop1, r_nop2;
1644 gimple *stmt;
1645 gimple *header_phi = NULL;
1646 enum tree_code reduction_op;
1647 basic_block bb = gimple_bb (phi);
1648 class loop *loop = bb->loop_father;
1649 edge latch_e = loop_latch_edge (loop);
1650 imm_use_iterator imm_iter;
1651 use_operand_p use_p;
1652 edge e;
1653 edge_iterator ei;
1654 bool result = *has_nop = false;
1655 if (TREE_CODE (arg_0) != SSA_NAME || TREE_CODE (arg_1) != SSA_NAME)
1656 return false;
1658 if (!extended && gimple_code (SSA_NAME_DEF_STMT (arg_0)) == GIMPLE_PHI)
1660 lhs = arg_1;
1661 header_phi = SSA_NAME_DEF_STMT (arg_0);
1662 stmt = SSA_NAME_DEF_STMT (arg_1);
1664 else if (gimple_code (SSA_NAME_DEF_STMT (arg_1)) == GIMPLE_PHI)
1666 lhs = arg_0;
1667 header_phi = SSA_NAME_DEF_STMT (arg_1);
1668 stmt = SSA_NAME_DEF_STMT (arg_0);
1670 else
1671 return false;
1672 if (gimple_bb (header_phi) != loop->header)
1673 return false;
1675 if (PHI_ARG_DEF_FROM_EDGE (header_phi, latch_e) != PHI_RESULT (phi))
1676 return false;
1678 if (gimple_code (stmt) != GIMPLE_ASSIGN
1679 || gimple_has_volatile_ops (stmt))
1680 return false;
1682 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
1683 return false;
1685 if (!is_predicated (gimple_bb (stmt)))
1686 return false;
1688 /* Check that stmt-block is predecessor of phi-block. */
1689 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->succs)
1690 if (e->dest == bb)
1692 result = true;
1693 break;
1695 if (!result)
1696 return false;
1698 if (!has_single_use (lhs))
1699 return false;
1701 reduction_op = gimple_assign_rhs_code (stmt);
1703 /* Catch something like below
1705 loop-header:
1706 reduc_1 = PHI <..., reduc_2>
1708 if (...)
1709 tmp1 = (unsigned type) reduc_1;
1710 tmp2 = tmp1 + rhs2;
1711 reduc_3 = (signed type) tmp2;
1713 reduc_2 = PHI <reduc_1, reduc_3>
1715 and convert to
1717 reduc_2 = PHI <0, reduc_3>
1718 tmp1 = (unsigned type)reduce_1;
1719 ifcvt = cond_expr ? rhs2 : 0
1720 tmp2 = tmp1 +/- ifcvt;
1721 reduce_1 = (signed type)tmp2; */
1723 if (CONVERT_EXPR_CODE_P (reduction_op))
1725 lhs = gimple_assign_rhs1 (stmt);
1726 if (TREE_CODE (lhs) != SSA_NAME
1727 || !has_single_use (lhs))
1728 return false;
1730 *nop_reduc = stmt;
1731 stmt = SSA_NAME_DEF_STMT (lhs);
1732 if (gimple_bb (stmt) != gimple_bb (*nop_reduc)
1733 || !is_gimple_assign (stmt))
1734 return false;
1736 *has_nop = true;
1737 reduction_op = gimple_assign_rhs_code (stmt);
1740 if (reduction_op != PLUS_EXPR
1741 && reduction_op != MINUS_EXPR
1742 && reduction_op != MULT_EXPR
1743 && reduction_op != BIT_IOR_EXPR
1744 && reduction_op != BIT_XOR_EXPR
1745 && reduction_op != BIT_AND_EXPR)
1746 return false;
1747 r_op1 = gimple_assign_rhs1 (stmt);
1748 r_op2 = gimple_assign_rhs2 (stmt);
1750 r_nop1 = strip_nop_cond_scalar_reduction (*has_nop, r_op1);
1751 r_nop2 = strip_nop_cond_scalar_reduction (*has_nop, r_op2);
1753 /* Make R_OP1 to hold reduction variable. */
1754 if (r_nop2 == PHI_RESULT (header_phi)
1755 && commutative_tree_code (reduction_op))
1757 std::swap (r_op1, r_op2);
1758 std::swap (r_nop1, r_nop2);
1760 else if (r_nop1 != PHI_RESULT (header_phi))
1761 return false;
1763 if (*has_nop)
1765 /* Check that R_NOP1 is used in nop_stmt or in PHI only. */
1766 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_nop1)
1768 gimple *use_stmt = USE_STMT (use_p);
1769 if (is_gimple_debug (use_stmt))
1770 continue;
1771 if (use_stmt == SSA_NAME_DEF_STMT (r_op1))
1772 continue;
1773 if (use_stmt != phi)
1774 return false;
1778 /* Check that R_OP1 is used in reduction stmt or in PHI only. */
1779 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_op1)
1781 gimple *use_stmt = USE_STMT (use_p);
1782 if (is_gimple_debug (use_stmt))
1783 continue;
1784 if (use_stmt == stmt)
1785 continue;
1786 if (gimple_code (use_stmt) != GIMPLE_PHI)
1787 return false;
1790 *op0 = r_op1; *op1 = r_op2;
1791 *reduc = stmt;
1792 return true;
1795 /* Converts conditional scalar reduction into unconditional form, e.g.
1796 bb_4
1797 if (_5 != 0) goto bb_5 else goto bb_6
1798 end_bb_4
1799 bb_5
1800 res_6 = res_13 + 1;
1801 end_bb_5
1802 bb_6
1803 # res_2 = PHI <res_13(4), res_6(5)>
1804 end_bb_6
1806 will be converted into sequence
1807 _ifc__1 = _5 != 0 ? 1 : 0;
1808 res_2 = res_13 + _ifc__1;
1809 Argument SWAP tells that arguments of conditional expression should be
1810 swapped.
1811 Returns rhs of resulting PHI assignment. */
1813 static tree
1814 convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi,
1815 tree cond, tree op0, tree op1, bool swap,
1816 bool has_nop, gimple* nop_reduc)
1818 gimple_stmt_iterator stmt_it;
1819 gimple *new_assign;
1820 tree rhs;
1821 tree rhs1 = gimple_assign_rhs1 (reduc);
1822 tree tmp = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, "_ifc_");
1823 tree c;
1824 enum tree_code reduction_op = gimple_assign_rhs_code (reduc);
1825 tree op_nochange = neutral_op_for_reduction (TREE_TYPE (rhs1), reduction_op, NULL);
1826 gimple_seq stmts = NULL;
1828 if (dump_file && (dump_flags & TDF_DETAILS))
1830 fprintf (dump_file, "Found cond scalar reduction.\n");
1831 print_gimple_stmt (dump_file, reduc, 0, TDF_SLIM);
1834 /* Build cond expression using COND and constant operand
1835 of reduction rhs. */
1836 c = fold_build_cond_expr (TREE_TYPE (rhs1),
1837 unshare_expr (cond),
1838 swap ? op_nochange : op1,
1839 swap ? op1 : op_nochange);
1841 /* Create assignment stmt and insert it at GSI. */
1842 new_assign = gimple_build_assign (tmp, c);
1843 gsi_insert_before (gsi, new_assign, GSI_SAME_STMT);
1844 /* Build rhs for unconditional increment/decrement/logic_operation. */
1845 rhs = gimple_build (&stmts, reduction_op,
1846 TREE_TYPE (rhs1), op0, tmp);
1848 if (has_nop)
1850 rhs = gimple_convert (&stmts,
1851 TREE_TYPE (gimple_assign_lhs (nop_reduc)), rhs);
1852 stmt_it = gsi_for_stmt (nop_reduc);
1853 gsi_remove (&stmt_it, true);
1854 release_defs (nop_reduc);
1856 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
1858 /* Delete original reduction stmt. */
1859 stmt_it = gsi_for_stmt (reduc);
1860 gsi_remove (&stmt_it, true);
1861 release_defs (reduc);
1862 return rhs;
1865 /* Produce condition for all occurrences of ARG in PHI node. */
1867 static tree
1868 gen_phi_arg_condition (gphi *phi, vec<int> *occur,
1869 gimple_stmt_iterator *gsi)
1871 int len;
1872 int i;
1873 tree cond = NULL_TREE;
1874 tree c;
1875 edge e;
1877 len = occur->length ();
1878 gcc_assert (len > 0);
1879 for (i = 0; i < len; i++)
1881 e = gimple_phi_arg_edge (phi, (*occur)[i]);
1882 c = bb_predicate (e->src);
1883 if (is_true_predicate (c))
1885 cond = c;
1886 break;
1888 c = force_gimple_operand_gsi (gsi, unshare_expr (c),
1889 true, NULL_TREE, true, GSI_SAME_STMT);
1890 if (cond != NULL_TREE)
1892 /* Must build OR expression. */
1893 cond = fold_or_predicates (EXPR_LOCATION (c), c, cond);
1894 cond = force_gimple_operand_gsi (gsi, unshare_expr (cond), true,
1895 NULL_TREE, true, GSI_SAME_STMT);
1897 else
1898 cond = c;
1900 gcc_assert (cond != NULL_TREE);
1901 return cond;
1904 /* Replace a scalar PHI node with a COND_EXPR using COND as condition.
1905 This routine can handle PHI nodes with more than two arguments.
1907 For example,
1908 S1: A = PHI <x1(1), x2(5)>
1909 is converted into,
1910 S2: A = cond ? x1 : x2;
1912 The generated code is inserted at GSI that points to the top of
1913 basic block's statement list.
1914 If PHI node has more than two arguments a chain of conditional
1915 expression is produced. */
1918 static void
1919 predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
1921 gimple *new_stmt = NULL, *reduc, *nop_reduc;
1922 tree rhs, res, arg0, arg1, op0, op1, scev;
1923 tree cond;
1924 unsigned int index0;
1925 unsigned int max, args_len;
1926 edge e;
1927 basic_block bb;
1928 unsigned int i;
1929 bool has_nop;
1931 res = gimple_phi_result (phi);
1932 if (virtual_operand_p (res))
1933 return;
1935 if ((rhs = degenerate_phi_result (phi))
1936 || ((scev = analyze_scalar_evolution (gimple_bb (phi)->loop_father,
1937 res))
1938 && !chrec_contains_undetermined (scev)
1939 && scev != res
1940 && (rhs = gimple_phi_arg_def (phi, 0))))
1942 if (dump_file && (dump_flags & TDF_DETAILS))
1944 fprintf (dump_file, "Degenerate phi!\n");
1945 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
1947 new_stmt = gimple_build_assign (res, rhs);
1948 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1949 update_stmt (new_stmt);
1950 return;
1953 bb = gimple_bb (phi);
1954 if (EDGE_COUNT (bb->preds) == 2)
1956 /* Predicate ordinary PHI node with 2 arguments. */
1957 edge first_edge, second_edge;
1958 basic_block true_bb;
1959 first_edge = EDGE_PRED (bb, 0);
1960 second_edge = EDGE_PRED (bb, 1);
1961 cond = bb_predicate (first_edge->src);
1962 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1963 std::swap (first_edge, second_edge);
1964 if (EDGE_COUNT (first_edge->src->succs) > 1)
1966 cond = bb_predicate (second_edge->src);
1967 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1968 cond = TREE_OPERAND (cond, 0);
1969 else
1970 first_edge = second_edge;
1972 else
1973 cond = bb_predicate (first_edge->src);
1974 /* Gimplify the condition to a valid cond-expr conditonal operand. */
1975 cond = force_gimple_operand_gsi (gsi, unshare_expr (cond), true,
1976 NULL_TREE, true, GSI_SAME_STMT);
1977 true_bb = first_edge->src;
1978 if (EDGE_PRED (bb, 1)->src == true_bb)
1980 arg0 = gimple_phi_arg_def (phi, 1);
1981 arg1 = gimple_phi_arg_def (phi, 0);
1983 else
1985 arg0 = gimple_phi_arg_def (phi, 0);
1986 arg1 = gimple_phi_arg_def (phi, 1);
1988 if (is_cond_scalar_reduction (phi, &reduc, arg0, arg1,
1989 &op0, &op1, false, &has_nop,
1990 &nop_reduc))
1992 /* Convert reduction stmt into vectorizable form. */
1993 rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
1994 true_bb != gimple_bb (reduc),
1995 has_nop, nop_reduc);
1996 redundant_ssa_names.safe_push (std::make_pair (res, rhs));
1998 else
1999 /* Build new RHS using selected condition and arguments. */
2000 rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
2001 arg0, arg1);
2002 new_stmt = gimple_build_assign (res, rhs);
2003 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2004 gimple_stmt_iterator new_gsi = gsi_for_stmt (new_stmt);
2005 if (fold_stmt (&new_gsi, follow_all_ssa_edges))
2007 new_stmt = gsi_stmt (new_gsi);
2008 update_stmt (new_stmt);
2011 if (dump_file && (dump_flags & TDF_DETAILS))
2013 fprintf (dump_file, "new phi replacement stmt\n");
2014 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
2016 return;
2019 /* Create hashmap for PHI node which contain vector of argument indexes
2020 having the same value. */
2021 bool swap = false;
2022 hash_map<tree_operand_hash, auto_vec<int> > phi_arg_map;
2023 unsigned int num_args = gimple_phi_num_args (phi);
2024 int max_ind = -1;
2025 /* Vector of different PHI argument values. */
2026 auto_vec<tree> args (num_args);
2028 /* Compute phi_arg_map. */
2029 for (i = 0; i < num_args; i++)
2031 tree arg;
2033 arg = gimple_phi_arg_def (phi, i);
2034 if (!phi_arg_map.get (arg))
2035 args.quick_push (arg);
2036 phi_arg_map.get_or_insert (arg).safe_push (i);
2039 /* Determine element with max number of occurrences. */
2040 max_ind = -1;
2041 max = 1;
2042 args_len = args.length ();
2043 for (i = 0; i < args_len; i++)
2045 unsigned int len;
2046 if ((len = phi_arg_map.get (args[i])->length ()) > max)
2048 max_ind = (int) i;
2049 max = len;
2053 /* Put element with max number of occurences to the end of ARGS. */
2054 if (max_ind != -1 && max_ind +1 != (int) args_len)
2055 std::swap (args[args_len - 1], args[max_ind]);
2057 /* Handle one special case when number of arguments with different values
2058 is equal 2 and one argument has the only occurrence. Such PHI can be
2059 handled as if would have only 2 arguments. */
2060 if (args_len == 2 && phi_arg_map.get (args[0])->length () == 1)
2062 vec<int> *indexes;
2063 indexes = phi_arg_map.get (args[0]);
2064 index0 = (*indexes)[0];
2065 arg0 = args[0];
2066 arg1 = args[1];
2067 e = gimple_phi_arg_edge (phi, index0);
2068 cond = bb_predicate (e->src);
2069 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2071 swap = true;
2072 cond = TREE_OPERAND (cond, 0);
2074 /* Gimplify the condition to a valid cond-expr conditonal operand. */
2075 cond = force_gimple_operand_gsi (gsi, unshare_expr (cond), true,
2076 NULL_TREE, true, GSI_SAME_STMT);
2077 if (!(is_cond_scalar_reduction (phi, &reduc, arg0 , arg1,
2078 &op0, &op1, true, &has_nop, &nop_reduc)))
2079 rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
2080 swap? arg1 : arg0,
2081 swap? arg0 : arg1);
2082 else
2084 /* Convert reduction stmt into vectorizable form. */
2085 rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
2086 swap,has_nop, nop_reduc);
2087 redundant_ssa_names.safe_push (std::make_pair (res, rhs));
2089 new_stmt = gimple_build_assign (res, rhs);
2090 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2091 update_stmt (new_stmt);
2093 else
2095 /* Common case. */
2096 vec<int> *indexes;
2097 tree type = TREE_TYPE (gimple_phi_result (phi));
2098 tree lhs;
2099 arg1 = args[1];
2100 for (i = 0; i < args_len; i++)
2102 arg0 = args[i];
2103 indexes = phi_arg_map.get (args[i]);
2104 if (i != args_len - 1)
2105 lhs = make_temp_ssa_name (type, NULL, "_ifc_");
2106 else
2107 lhs = res;
2108 cond = gen_phi_arg_condition (phi, indexes, gsi);
2109 rhs = fold_build_cond_expr (type, unshare_expr (cond),
2110 arg0, arg1);
2111 new_stmt = gimple_build_assign (lhs, rhs);
2112 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2113 update_stmt (new_stmt);
2114 arg1 = lhs;
2118 if (dump_file && (dump_flags & TDF_DETAILS))
2120 fprintf (dump_file, "new extended phi replacement stmt\n");
2121 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
2125 /* Replaces in LOOP all the scalar phi nodes other than those in the
2126 LOOP->header block with conditional modify expressions. */
2128 static void
2129 predicate_all_scalar_phis (class loop *loop)
2131 basic_block bb;
2132 unsigned int orig_loop_num_nodes = loop->num_nodes;
2133 unsigned int i;
2135 for (i = 1; i < orig_loop_num_nodes; i++)
2137 gphi *phi;
2138 gimple_stmt_iterator gsi;
2139 gphi_iterator phi_gsi;
2140 bb = ifc_bbs[i];
2142 if (bb == loop->header)
2143 continue;
2145 phi_gsi = gsi_start_phis (bb);
2146 if (gsi_end_p (phi_gsi))
2147 continue;
2149 gsi = gsi_after_labels (bb);
2150 while (!gsi_end_p (phi_gsi))
2152 phi = phi_gsi.phi ();
2153 if (virtual_operand_p (gimple_phi_result (phi)))
2154 gsi_next (&phi_gsi);
2155 else
2157 predicate_scalar_phi (phi, &gsi);
2158 remove_phi_node (&phi_gsi, false);
2164 /* Insert in each basic block of LOOP the statements produced by the
2165 gimplification of the predicates. */
2167 static void
2168 insert_gimplified_predicates (loop_p loop)
2170 unsigned int i;
2172 for (i = 0; i < loop->num_nodes; i++)
2174 basic_block bb = ifc_bbs[i];
2175 gimple_seq stmts;
2176 if (!is_predicated (bb))
2177 gcc_assert (bb_predicate_gimplified_stmts (bb) == NULL);
2178 if (!is_predicated (bb))
2180 /* Do not insert statements for a basic block that is not
2181 predicated. Also make sure that the predicate of the
2182 basic block is set to true. */
2183 reset_bb_predicate (bb);
2184 continue;
2187 stmts = bb_predicate_gimplified_stmts (bb);
2188 if (stmts)
2190 if (need_to_predicate)
2192 /* Insert the predicate of the BB just after the label,
2193 as the if-conversion of memory writes will use this
2194 predicate. */
2195 gimple_stmt_iterator gsi = gsi_after_labels (bb);
2196 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2198 else
2200 /* Insert the predicate of the BB at the end of the BB
2201 as this would reduce the register pressure: the only
2202 use of this predicate will be in successor BBs. */
2203 gimple_stmt_iterator gsi = gsi_last_bb (bb);
2205 if (gsi_end_p (gsi)
2206 || stmt_ends_bb_p (gsi_stmt (gsi)))
2207 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2208 else
2209 gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
2212 /* Once the sequence is code generated, set it to NULL. */
2213 set_bb_predicate_gimplified_stmts (bb, NULL);
2218 /* Helper function for predicate_statements. Returns index of existent
2219 mask if it was created for given SIZE and -1 otherwise. */
2221 static int
2222 mask_exists (int size, const vec<int> &vec)
2224 unsigned int ix;
2225 int v;
2226 FOR_EACH_VEC_ELT (vec, ix, v)
2227 if (v == size)
2228 return (int) ix;
2229 return -1;
2232 /* Helper function for predicate_statements. STMT is a memory read or
2233 write and it needs to be predicated by MASK. Return a statement
2234 that does so. */
2236 static gimple *
2237 predicate_load_or_store (gimple_stmt_iterator *gsi, gassign *stmt, tree mask)
2239 gcall *new_stmt;
2241 tree lhs = gimple_assign_lhs (stmt);
2242 tree rhs = gimple_assign_rhs1 (stmt);
2243 tree ref = TREE_CODE (lhs) == SSA_NAME ? rhs : lhs;
2244 mark_addressable (ref);
2245 tree addr = force_gimple_operand_gsi (gsi, build_fold_addr_expr (ref),
2246 true, NULL_TREE, true, GSI_SAME_STMT);
2247 tree ptr = build_int_cst (reference_alias_ptr_type (ref),
2248 get_object_alignment (ref));
2249 /* Copy points-to info if possible. */
2250 if (TREE_CODE (addr) == SSA_NAME && !SSA_NAME_PTR_INFO (addr))
2251 copy_ref_info (build2 (MEM_REF, TREE_TYPE (ref), addr, ptr),
2252 ref);
2253 if (TREE_CODE (lhs) == SSA_NAME)
2255 new_stmt
2256 = gimple_build_call_internal (IFN_MASK_LOAD, 3, addr,
2257 ptr, mask);
2258 gimple_call_set_lhs (new_stmt, lhs);
2259 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
2261 else
2263 new_stmt
2264 = gimple_build_call_internal (IFN_MASK_STORE, 4, addr, ptr,
2265 mask, rhs);
2266 gimple_move_vops (new_stmt, stmt);
2268 gimple_call_set_nothrow (new_stmt, true);
2269 return new_stmt;
2272 /* STMT uses OP_LHS. Check whether it is equivalent to:
2274 ... = OP_MASK ? OP_LHS : X;
2276 Return X if so, otherwise return null. OP_MASK is an SSA_NAME that is
2277 known to have value OP_COND. */
2279 static tree
2280 check_redundant_cond_expr (gimple *stmt, tree op_mask, tree op_cond,
2281 tree op_lhs)
2283 gassign *assign = dyn_cast <gassign *> (stmt);
2284 if (!assign || gimple_assign_rhs_code (assign) != COND_EXPR)
2285 return NULL_TREE;
2287 tree use_cond = gimple_assign_rhs1 (assign);
2288 tree if_true = gimple_assign_rhs2 (assign);
2289 tree if_false = gimple_assign_rhs3 (assign);
2291 if ((use_cond == op_mask || operand_equal_p (use_cond, op_cond, 0))
2292 && if_true == op_lhs)
2293 return if_false;
2295 if (inverse_conditions_p (use_cond, op_cond) && if_false == op_lhs)
2296 return if_true;
2298 return NULL_TREE;
2301 /* Return true if VALUE is available for use at STMT. SSA_NAMES is
2302 the set of SSA names defined earlier in STMT's block. */
2304 static bool
2305 value_available_p (gimple *stmt, hash_set<tree_ssa_name_hash> *ssa_names,
2306 tree value)
2308 if (is_gimple_min_invariant (value))
2309 return true;
2311 if (TREE_CODE (value) == SSA_NAME)
2313 if (SSA_NAME_IS_DEFAULT_DEF (value))
2314 return true;
2316 basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (value));
2317 basic_block use_bb = gimple_bb (stmt);
2318 return (def_bb == use_bb
2319 ? ssa_names->contains (value)
2320 : dominated_by_p (CDI_DOMINATORS, use_bb, def_bb));
2323 return false;
2326 /* Helper function for predicate_statements. STMT is a potentially-trapping
2327 arithmetic operation that needs to be predicated by MASK, an SSA_NAME that
2328 has value COND. Return a statement that does so. SSA_NAMES is the set of
2329 SSA names defined earlier in STMT's block. */
2331 static gimple *
2332 predicate_rhs_code (gassign *stmt, tree mask, tree cond,
2333 hash_set<tree_ssa_name_hash> *ssa_names)
2335 tree lhs = gimple_assign_lhs (stmt);
2336 tree_code code = gimple_assign_rhs_code (stmt);
2337 unsigned int nops = gimple_num_ops (stmt);
2338 internal_fn cond_fn = get_conditional_internal_fn (code);
2340 /* Construct the arguments to the conditional internal function. */
2341 auto_vec<tree, 8> args;
2342 args.safe_grow (nops + 1, true);
2343 args[0] = mask;
2344 for (unsigned int i = 1; i < nops; ++i)
2345 args[i] = gimple_op (stmt, i);
2346 args[nops] = NULL_TREE;
2348 /* Look for uses of the result to see whether they are COND_EXPRs that can
2349 be folded into the conditional call. */
2350 imm_use_iterator imm_iter;
2351 gimple *use_stmt;
2352 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs)
2354 tree new_else = check_redundant_cond_expr (use_stmt, mask, cond, lhs);
2355 if (new_else && value_available_p (stmt, ssa_names, new_else))
2357 if (!args[nops])
2358 args[nops] = new_else;
2359 if (operand_equal_p (new_else, args[nops], 0))
2361 /* We have:
2363 LHS = IFN_COND (MASK, ..., ELSE);
2364 X = MASK ? LHS : ELSE;
2366 which makes X equivalent to LHS. */
2367 tree use_lhs = gimple_assign_lhs (use_stmt);
2368 redundant_ssa_names.safe_push (std::make_pair (use_lhs, lhs));
2372 if (!args[nops])
2373 args[nops] = targetm.preferred_else_value (cond_fn, TREE_TYPE (lhs),
2374 nops - 1, &args[1]);
2376 /* Create and insert the call. */
2377 gcall *new_stmt = gimple_build_call_internal_vec (cond_fn, args);
2378 gimple_call_set_lhs (new_stmt, lhs);
2379 gimple_call_set_nothrow (new_stmt, true);
2381 return new_stmt;
2384 /* Predicate each write to memory in LOOP.
2386 This function transforms control flow constructs containing memory
2387 writes of the form:
2389 | for (i = 0; i < N; i++)
2390 | if (cond)
2391 | A[i] = expr;
2393 into the following form that does not contain control flow:
2395 | for (i = 0; i < N; i++)
2396 | A[i] = cond ? expr : A[i];
2398 The original CFG looks like this:
2400 | bb_0
2401 | i = 0
2402 | end_bb_0
2404 | bb_1
2405 | if (i < N) goto bb_5 else goto bb_2
2406 | end_bb_1
2408 | bb_2
2409 | cond = some_computation;
2410 | if (cond) goto bb_3 else goto bb_4
2411 | end_bb_2
2413 | bb_3
2414 | A[i] = expr;
2415 | goto bb_4
2416 | end_bb_3
2418 | bb_4
2419 | goto bb_1
2420 | end_bb_4
2422 insert_gimplified_predicates inserts the computation of the COND
2423 expression at the beginning of the destination basic block:
2425 | bb_0
2426 | i = 0
2427 | end_bb_0
2429 | bb_1
2430 | if (i < N) goto bb_5 else goto bb_2
2431 | end_bb_1
2433 | bb_2
2434 | cond = some_computation;
2435 | if (cond) goto bb_3 else goto bb_4
2436 | end_bb_2
2438 | bb_3
2439 | cond = some_computation;
2440 | A[i] = expr;
2441 | goto bb_4
2442 | end_bb_3
2444 | bb_4
2445 | goto bb_1
2446 | end_bb_4
2448 predicate_statements is then predicating the memory write as follows:
2450 | bb_0
2451 | i = 0
2452 | end_bb_0
2454 | bb_1
2455 | if (i < N) goto bb_5 else goto bb_2
2456 | end_bb_1
2458 | bb_2
2459 | if (cond) goto bb_3 else goto bb_4
2460 | end_bb_2
2462 | bb_3
2463 | cond = some_computation;
2464 | A[i] = cond ? expr : A[i];
2465 | goto bb_4
2466 | end_bb_3
2468 | bb_4
2469 | goto bb_1
2470 | end_bb_4
2472 and finally combine_blocks removes the basic block boundaries making
2473 the loop vectorizable:
2475 | bb_0
2476 | i = 0
2477 | if (i < N) goto bb_5 else goto bb_1
2478 | end_bb_0
2480 | bb_1
2481 | cond = some_computation;
2482 | A[i] = cond ? expr : A[i];
2483 | if (i < N) goto bb_5 else goto bb_4
2484 | end_bb_1
2486 | bb_4
2487 | goto bb_1
2488 | end_bb_4
2491 static void
2492 predicate_statements (loop_p loop)
2494 unsigned int i, orig_loop_num_nodes = loop->num_nodes;
2495 auto_vec<int, 1> vect_sizes;
2496 auto_vec<tree, 1> vect_masks;
2497 hash_set<tree_ssa_name_hash> ssa_names;
2499 for (i = 1; i < orig_loop_num_nodes; i++)
2501 gimple_stmt_iterator gsi;
2502 basic_block bb = ifc_bbs[i];
2503 tree cond = bb_predicate (bb);
2504 bool swap;
2505 int index;
2507 if (is_true_predicate (cond))
2508 continue;
2510 swap = false;
2511 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2513 swap = true;
2514 cond = TREE_OPERAND (cond, 0);
2517 vect_sizes.truncate (0);
2518 vect_masks.truncate (0);
2520 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
2522 gassign *stmt = dyn_cast <gassign *> (gsi_stmt (gsi));
2523 tree lhs;
2524 if (!stmt)
2526 else if (is_false_predicate (cond)
2527 && gimple_vdef (stmt))
2529 unlink_stmt_vdef (stmt);
2530 gsi_remove (&gsi, true);
2531 release_defs (stmt);
2532 continue;
2534 else if (gimple_plf (stmt, GF_PLF_2))
2536 tree lhs = gimple_assign_lhs (stmt);
2537 tree mask;
2538 gimple *new_stmt;
2539 gimple_seq stmts = NULL;
2540 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
2541 /* We checked before setting GF_PLF_2 that an equivalent
2542 integer mode exists. */
2543 int bitsize = GET_MODE_BITSIZE (mode).to_constant ();
2544 if (!vect_sizes.is_empty ()
2545 && (index = mask_exists (bitsize, vect_sizes)) != -1)
2546 /* Use created mask. */
2547 mask = vect_masks[index];
2548 else
2550 if (COMPARISON_CLASS_P (cond))
2551 mask = gimple_build (&stmts, TREE_CODE (cond),
2552 boolean_type_node,
2553 TREE_OPERAND (cond, 0),
2554 TREE_OPERAND (cond, 1));
2555 else
2556 mask = cond;
2558 if (swap)
2560 tree true_val
2561 = constant_boolean_node (true, TREE_TYPE (mask));
2562 mask = gimple_build (&stmts, BIT_XOR_EXPR,
2563 TREE_TYPE (mask), mask, true_val);
2565 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2567 /* Save mask and its size for further use. */
2568 vect_sizes.safe_push (bitsize);
2569 vect_masks.safe_push (mask);
2571 if (gimple_assign_single_p (stmt))
2572 new_stmt = predicate_load_or_store (&gsi, stmt, mask);
2573 else
2574 new_stmt = predicate_rhs_code (stmt, mask, cond, &ssa_names);
2576 gsi_replace (&gsi, new_stmt, true);
2578 else if (((lhs = gimple_assign_lhs (stmt)), true)
2579 && (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2580 || POINTER_TYPE_P (TREE_TYPE (lhs)))
2581 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs))
2582 && arith_code_with_undefined_signed_overflow
2583 (gimple_assign_rhs_code (stmt)))
2585 gsi_remove (&gsi, true);
2586 gimple_seq stmts = rewrite_to_defined_overflow (stmt);
2587 bool first = true;
2588 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
2589 !gsi_end_p (gsi2);)
2591 gassign *stmt2 = as_a <gassign *> (gsi_stmt (gsi2));
2592 gsi_remove (&gsi2, false);
2593 if (first)
2595 gsi_insert_before (&gsi, stmt2, GSI_NEW_STMT);
2596 first = false;
2598 else
2599 gsi_insert_after (&gsi, stmt2, GSI_NEW_STMT);
2602 else if (gimple_vdef (stmt))
2604 tree lhs = gimple_assign_lhs (stmt);
2605 tree rhs = gimple_assign_rhs1 (stmt);
2606 tree type = TREE_TYPE (lhs);
2608 lhs = ifc_temp_var (type, unshare_expr (lhs), &gsi);
2609 rhs = ifc_temp_var (type, unshare_expr (rhs), &gsi);
2610 if (swap)
2611 std::swap (lhs, rhs);
2612 cond = force_gimple_operand_gsi (&gsi, unshare_expr (cond), true,
2613 NULL_TREE, true, GSI_SAME_STMT);
2614 rhs = fold_build_cond_expr (type, unshare_expr (cond), rhs, lhs);
2615 gimple_assign_set_rhs1 (stmt, ifc_temp_var (type, rhs, &gsi));
2616 update_stmt (stmt);
2618 lhs = gimple_get_lhs (gsi_stmt (gsi));
2619 if (lhs && TREE_CODE (lhs) == SSA_NAME)
2620 ssa_names.add (lhs);
2621 gsi_next (&gsi);
2623 ssa_names.empty ();
2627 /* Remove all GIMPLE_CONDs and GIMPLE_LABELs of all the basic blocks
2628 other than the exit and latch of the LOOP. Also resets the
2629 GIMPLE_DEBUG information. */
2631 static void
2632 remove_conditions_and_labels (loop_p loop)
2634 gimple_stmt_iterator gsi;
2635 unsigned int i;
2637 for (i = 0; i < loop->num_nodes; i++)
2639 basic_block bb = ifc_bbs[i];
2641 if (bb_with_exit_edge_p (loop, bb)
2642 || bb == loop->latch)
2643 continue;
2645 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
2646 switch (gimple_code (gsi_stmt (gsi)))
2648 case GIMPLE_COND:
2649 case GIMPLE_LABEL:
2650 gsi_remove (&gsi, true);
2651 break;
2653 case GIMPLE_DEBUG:
2654 /* ??? Should there be conditional GIMPLE_DEBUG_BINDs? */
2655 if (gimple_debug_bind_p (gsi_stmt (gsi)))
2657 gimple_debug_bind_reset_value (gsi_stmt (gsi));
2658 update_stmt (gsi_stmt (gsi));
2660 gsi_next (&gsi);
2661 break;
2663 default:
2664 gsi_next (&gsi);
2669 /* Combine all the basic blocks from LOOP into one or two super basic
2670 blocks. Replace PHI nodes with conditional modify expressions. */
2672 static void
2673 combine_blocks (class loop *loop)
2675 basic_block bb, exit_bb, merge_target_bb;
2676 unsigned int orig_loop_num_nodes = loop->num_nodes;
2677 unsigned int i;
2678 edge e;
2679 edge_iterator ei;
2681 remove_conditions_and_labels (loop);
2682 insert_gimplified_predicates (loop);
2683 predicate_all_scalar_phis (loop);
2685 if (need_to_predicate || need_to_rewrite_undefined)
2686 predicate_statements (loop);
2688 /* Merge basic blocks. */
2689 exit_bb = NULL;
2690 bool *predicated = XNEWVEC (bool, orig_loop_num_nodes);
2691 for (i = 0; i < orig_loop_num_nodes; i++)
2693 bb = ifc_bbs[i];
2694 predicated[i] = !is_true_predicate (bb_predicate (bb));
2695 free_bb_predicate (bb);
2696 if (bb_with_exit_edge_p (loop, bb))
2698 gcc_assert (exit_bb == NULL);
2699 exit_bb = bb;
2702 gcc_assert (exit_bb != loop->latch);
2704 merge_target_bb = loop->header;
2706 /* Get at the virtual def valid for uses starting at the first block
2707 we merge into the header. Without a virtual PHI the loop has the
2708 same virtual use on all stmts. */
2709 gphi *vphi = get_virtual_phi (loop->header);
2710 tree last_vdef = NULL_TREE;
2711 if (vphi)
2713 last_vdef = gimple_phi_result (vphi);
2714 for (gimple_stmt_iterator gsi = gsi_start_bb (loop->header);
2715 ! gsi_end_p (gsi); gsi_next (&gsi))
2716 if (gimple_vdef (gsi_stmt (gsi)))
2717 last_vdef = gimple_vdef (gsi_stmt (gsi));
2719 for (i = 1; i < orig_loop_num_nodes; i++)
2721 gimple_stmt_iterator gsi;
2722 gimple_stmt_iterator last;
2724 bb = ifc_bbs[i];
2726 if (bb == exit_bb || bb == loop->latch)
2727 continue;
2729 /* We release virtual PHIs late because we have to propagate them
2730 out using the current VUSE. The def might be the one used
2731 after the loop. */
2732 vphi = get_virtual_phi (bb);
2733 if (vphi)
2735 /* When there's just loads inside the loop a stray virtual
2736 PHI merging the uses can appear, update last_vdef from
2737 it. */
2738 if (!last_vdef)
2739 last_vdef = gimple_phi_arg_def (vphi, 0);
2740 imm_use_iterator iter;
2741 use_operand_p use_p;
2742 gimple *use_stmt;
2743 FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi))
2745 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2746 SET_USE (use_p, last_vdef);
2748 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (vphi)))
2749 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (last_vdef) = 1;
2750 gsi = gsi_for_stmt (vphi);
2751 remove_phi_node (&gsi, true);
2754 /* Make stmts member of loop->header and clear range info from all stmts
2755 in BB which is now no longer executed conditional on a predicate we
2756 could have derived it from. */
2757 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2759 gimple *stmt = gsi_stmt (gsi);
2760 gimple_set_bb (stmt, merge_target_bb);
2761 /* Update virtual operands. */
2762 if (last_vdef)
2764 use_operand_p use_p = ssa_vuse_operand (stmt);
2765 if (use_p
2766 && USE_FROM_PTR (use_p) != last_vdef)
2767 SET_USE (use_p, last_vdef);
2768 if (gimple_vdef (stmt))
2769 last_vdef = gimple_vdef (stmt);
2771 else
2772 /* If this is the first load we arrive at update last_vdef
2773 so we handle stray PHIs correctly. */
2774 last_vdef = gimple_vuse (stmt);
2775 if (predicated[i])
2777 ssa_op_iter i;
2778 tree op;
2779 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF)
2780 reset_flow_sensitive_info (op);
2784 /* Update stmt list. */
2785 last = gsi_last_bb (merge_target_bb);
2786 gsi_insert_seq_after_without_update (&last, bb_seq (bb), GSI_NEW_STMT);
2787 set_bb_seq (bb, NULL);
2790 /* Fixup virtual operands in the exit block. */
2791 if (exit_bb
2792 && exit_bb != loop->header)
2794 /* We release virtual PHIs late because we have to propagate them
2795 out using the current VUSE. The def might be the one used
2796 after the loop. */
2797 vphi = get_virtual_phi (exit_bb);
2798 if (vphi)
2800 /* When there's just loads inside the loop a stray virtual
2801 PHI merging the uses can appear, update last_vdef from
2802 it. */
2803 if (!last_vdef)
2804 last_vdef = gimple_phi_arg_def (vphi, 0);
2805 imm_use_iterator iter;
2806 use_operand_p use_p;
2807 gimple *use_stmt;
2808 FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi))
2810 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2811 SET_USE (use_p, last_vdef);
2813 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (vphi)))
2814 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (last_vdef) = 1;
2815 gimple_stmt_iterator gsi = gsi_for_stmt (vphi);
2816 remove_phi_node (&gsi, true);
2820 /* Now remove all the edges in the loop, except for those from the exit
2821 block and delete the blocks we elided. */
2822 for (i = 1; i < orig_loop_num_nodes; i++)
2824 bb = ifc_bbs[i];
2826 for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));)
2828 if (e->src == exit_bb)
2829 ei_next (&ei);
2830 else
2831 remove_edge (e);
2834 for (i = 1; i < orig_loop_num_nodes; i++)
2836 bb = ifc_bbs[i];
2838 if (bb == exit_bb || bb == loop->latch)
2839 continue;
2841 delete_basic_block (bb);
2844 /* Re-connect the exit block. */
2845 if (exit_bb != NULL)
2847 if (exit_bb != loop->header)
2849 /* Connect this node to loop header. */
2850 make_single_succ_edge (loop->header, exit_bb, EDGE_FALLTHRU);
2851 set_immediate_dominator (CDI_DOMINATORS, exit_bb, loop->header);
2854 /* Redirect non-exit edges to loop->latch. */
2855 FOR_EACH_EDGE (e, ei, exit_bb->succs)
2857 if (!loop_exit_edge_p (loop, e))
2858 redirect_edge_and_branch (e, loop->latch);
2860 set_immediate_dominator (CDI_DOMINATORS, loop->latch, exit_bb);
2862 else
2864 /* If the loop does not have an exit, reconnect header and latch. */
2865 make_edge (loop->header, loop->latch, EDGE_FALLTHRU);
2866 set_immediate_dominator (CDI_DOMINATORS, loop->latch, loop->header);
2869 /* If possible, merge loop header to the block with the exit edge.
2870 This reduces the number of basic blocks to two, to please the
2871 vectorizer that handles only loops with two nodes. */
2872 if (exit_bb
2873 && exit_bb != loop->header)
2875 if (can_merge_blocks_p (loop->header, exit_bb))
2876 merge_blocks (loop->header, exit_bb);
2879 free (ifc_bbs);
2880 ifc_bbs = NULL;
2881 free (predicated);
2884 /* Version LOOP before if-converting it; the original loop
2885 will be if-converted, the new copy of the loop will not,
2886 and the LOOP_VECTORIZED internal call will be guarding which
2887 loop to execute. The vectorizer pass will fold this
2888 internal call into either true or false.
2890 Note that this function intentionally invalidates profile. Both edges
2891 out of LOOP_VECTORIZED must have 100% probability so the profile remains
2892 consistent after the condition is folded in the vectorizer. */
2894 static class loop *
2895 version_loop_for_if_conversion (class loop *loop, vec<gimple *> *preds)
2897 basic_block cond_bb;
2898 tree cond = make_ssa_name (boolean_type_node);
2899 class loop *new_loop;
2900 gimple *g;
2901 gimple_stmt_iterator gsi;
2902 unsigned int save_length;
2904 g = gimple_build_call_internal (IFN_LOOP_VECTORIZED, 2,
2905 build_int_cst (integer_type_node, loop->num),
2906 integer_zero_node);
2907 gimple_call_set_lhs (g, cond);
2909 /* Save BB->aux around loop_version as that uses the same field. */
2910 save_length = loop->inner ? loop->inner->num_nodes : loop->num_nodes;
2911 void **saved_preds = XALLOCAVEC (void *, save_length);
2912 for (unsigned i = 0; i < save_length; i++)
2913 saved_preds[i] = ifc_bbs[i]->aux;
2915 initialize_original_copy_tables ();
2916 /* At this point we invalidate porfile confistency until IFN_LOOP_VECTORIZED
2917 is re-merged in the vectorizer. */
2918 new_loop = loop_version (loop, cond, &cond_bb,
2919 profile_probability::always (),
2920 profile_probability::always (),
2921 profile_probability::always (),
2922 profile_probability::always (), true);
2923 free_original_copy_tables ();
2925 for (unsigned i = 0; i < save_length; i++)
2926 ifc_bbs[i]->aux = saved_preds[i];
2928 if (new_loop == NULL)
2929 return NULL;
2931 new_loop->dont_vectorize = true;
2932 new_loop->force_vectorize = false;
2933 gsi = gsi_last_bb (cond_bb);
2934 gimple_call_set_arg (g, 1, build_int_cst (integer_type_node, new_loop->num));
2935 if (preds)
2936 preds->safe_push (g);
2937 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2938 update_ssa (TODO_update_ssa_no_phi);
2939 return new_loop;
2942 /* Return true when LOOP satisfies the follow conditions that will
2943 allow it to be recognized by the vectorizer for outer-loop
2944 vectorization:
2945 - The loop is not the root node of the loop tree.
2946 - The loop has exactly one inner loop.
2947 - The loop has a single exit.
2948 - The loop header has a single successor, which is the inner
2949 loop header.
2950 - Each of the inner and outer loop latches have a single
2951 predecessor.
2952 - The loop exit block has a single predecessor, which is the
2953 inner loop's exit block. */
2955 static bool
2956 versionable_outer_loop_p (class loop *loop)
2958 if (!loop_outer (loop)
2959 || loop->dont_vectorize
2960 || !loop->inner
2961 || loop->inner->next
2962 || !single_exit (loop)
2963 || !single_succ_p (loop->header)
2964 || single_succ (loop->header) != loop->inner->header
2965 || !single_pred_p (loop->latch)
2966 || !single_pred_p (loop->inner->latch))
2967 return false;
2969 basic_block outer_exit = single_pred (loop->latch);
2970 basic_block inner_exit = single_pred (loop->inner->latch);
2972 if (!single_pred_p (outer_exit) || single_pred (outer_exit) != inner_exit)
2973 return false;
2975 if (dump_file)
2976 fprintf (dump_file, "Found vectorizable outer loop for versioning\n");
2978 return true;
2981 /* Performs splitting of critical edges. Skip splitting and return false
2982 if LOOP will not be converted because:
2984 - LOOP is not well formed.
2985 - LOOP has PHI with more than MAX_PHI_ARG_NUM arguments.
2987 Last restriction is valid only if AGGRESSIVE_IF_CONV is false. */
2989 static bool
2990 ifcvt_split_critical_edges (class loop *loop, bool aggressive_if_conv)
2992 basic_block *body;
2993 basic_block bb;
2994 unsigned int num = loop->num_nodes;
2995 unsigned int i;
2996 gimple *stmt;
2997 edge e;
2998 edge_iterator ei;
2999 auto_vec<edge> critical_edges;
3001 /* Loop is not well formed. */
3002 if (num <= 2 || loop->inner || !single_exit (loop))
3003 return false;
3005 body = get_loop_body (loop);
3006 for (i = 0; i < num; i++)
3008 bb = body[i];
3009 if (!aggressive_if_conv
3010 && phi_nodes (bb)
3011 && EDGE_COUNT (bb->preds) > MAX_PHI_ARG_NUM)
3013 if (dump_file && (dump_flags & TDF_DETAILS))
3014 fprintf (dump_file,
3015 "BB %d has complicated PHI with more than %u args.\n",
3016 bb->index, MAX_PHI_ARG_NUM);
3018 free (body);
3019 return false;
3021 if (bb == loop->latch || bb_with_exit_edge_p (loop, bb))
3022 continue;
3024 stmt = last_stmt (bb);
3025 /* Skip basic blocks not ending with conditional branch. */
3026 if (!stmt || gimple_code (stmt) != GIMPLE_COND)
3027 continue;
3029 FOR_EACH_EDGE (e, ei, bb->succs)
3030 if (EDGE_CRITICAL_P (e) && e->dest->loop_father == loop)
3031 critical_edges.safe_push (e);
3033 free (body);
3035 while (critical_edges.length () > 0)
3037 e = critical_edges.pop ();
3038 /* Don't split if bb can be predicated along non-critical edge. */
3039 if (EDGE_COUNT (e->dest->preds) > 2 || all_preds_critical_p (e->dest))
3040 split_edge (e);
3043 return true;
3046 /* Delete redundant statements produced by predication which prevents
3047 loop vectorization. */
3049 static void
3050 ifcvt_local_dce (class loop *loop)
3052 gimple *stmt;
3053 gimple *stmt1;
3054 gimple *phi;
3055 gimple_stmt_iterator gsi;
3056 auto_vec<gimple *> worklist;
3057 enum gimple_code code;
3058 use_operand_p use_p;
3059 imm_use_iterator imm_iter;
3061 /* The loop has a single BB only. */
3062 basic_block bb = loop->header;
3063 tree latch_vdef = NULL_TREE;
3065 worklist.create (64);
3066 /* Consider all phi as live statements. */
3067 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3069 phi = gsi_stmt (gsi);
3070 gimple_set_plf (phi, GF_PLF_2, true);
3071 worklist.safe_push (phi);
3072 if (virtual_operand_p (gimple_phi_result (phi)))
3073 latch_vdef = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
3075 /* Consider load/store statements, CALL and COND as live. */
3076 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3078 stmt = gsi_stmt (gsi);
3079 if (is_gimple_debug (stmt))
3081 gimple_set_plf (stmt, GF_PLF_2, true);
3082 continue;
3084 if (gimple_store_p (stmt) || gimple_assign_load_p (stmt))
3086 gimple_set_plf (stmt, GF_PLF_2, true);
3087 worklist.safe_push (stmt);
3088 continue;
3090 code = gimple_code (stmt);
3091 if (code == GIMPLE_COND || code == GIMPLE_CALL)
3093 gimple_set_plf (stmt, GF_PLF_2, true);
3094 worklist.safe_push (stmt);
3095 continue;
3097 gimple_set_plf (stmt, GF_PLF_2, false);
3099 if (code == GIMPLE_ASSIGN)
3101 tree lhs = gimple_assign_lhs (stmt);
3102 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
3104 stmt1 = USE_STMT (use_p);
3105 if (!is_gimple_debug (stmt1) && gimple_bb (stmt1) != bb)
3107 gimple_set_plf (stmt, GF_PLF_2, true);
3108 worklist.safe_push (stmt);
3109 break;
3114 /* Propagate liveness through arguments of live stmt. */
3115 while (worklist.length () > 0)
3117 ssa_op_iter iter;
3118 use_operand_p use_p;
3119 tree use;
3121 stmt = worklist.pop ();
3122 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
3124 use = USE_FROM_PTR (use_p);
3125 if (TREE_CODE (use) != SSA_NAME)
3126 continue;
3127 stmt1 = SSA_NAME_DEF_STMT (use);
3128 if (gimple_bb (stmt1) != bb || gimple_plf (stmt1, GF_PLF_2))
3129 continue;
3130 gimple_set_plf (stmt1, GF_PLF_2, true);
3131 worklist.safe_push (stmt1);
3134 /* Delete dead statements. */
3135 gsi = gsi_last_bb (bb);
3136 while (!gsi_end_p (gsi))
3138 gimple_stmt_iterator gsiprev = gsi;
3139 gsi_prev (&gsiprev);
3140 stmt = gsi_stmt (gsi);
3141 if (gimple_store_p (stmt) && gimple_vdef (stmt))
3143 tree lhs = gimple_get_lhs (stmt);
3144 ao_ref write;
3145 ao_ref_init (&write, lhs);
3147 if (dse_classify_store (&write, stmt, false, NULL, NULL, latch_vdef)
3148 == DSE_STORE_DEAD)
3149 delete_dead_or_redundant_assignment (&gsi, "dead");
3150 gsi = gsiprev;
3151 continue;
3154 if (gimple_plf (stmt, GF_PLF_2))
3156 gsi = gsiprev;
3157 continue;
3159 if (dump_file && (dump_flags & TDF_DETAILS))
3161 fprintf (dump_file, "Delete dead stmt in bb#%d\n", bb->index);
3162 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3164 gsi_remove (&gsi, true);
3165 release_defs (stmt);
3166 gsi = gsiprev;
3170 /* Return true if VALUE is already available on edge PE. */
3172 static bool
3173 ifcvt_available_on_edge_p (edge pe, tree value)
3175 if (is_gimple_min_invariant (value))
3176 return true;
3178 if (TREE_CODE (value) == SSA_NAME)
3180 basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (value));
3181 if (!def_bb || dominated_by_p (CDI_DOMINATORS, pe->dest, def_bb))
3182 return true;
3185 return false;
3188 /* Return true if STMT can be hoisted from if-converted loop LOOP to
3189 edge PE. */
3191 static bool
3192 ifcvt_can_hoist (class loop *loop, edge pe, gimple *stmt)
3194 if (auto *call = dyn_cast<gcall *> (stmt))
3196 if (gimple_call_internal_p (call)
3197 && internal_fn_mask_index (gimple_call_internal_fn (call)) >= 0)
3198 return false;
3200 else if (auto *assign = dyn_cast<gassign *> (stmt))
3202 if (gimple_assign_rhs_code (assign) == COND_EXPR)
3203 return false;
3205 else
3206 return false;
3208 if (gimple_has_side_effects (stmt)
3209 || gimple_could_trap_p (stmt)
3210 || stmt_could_throw_p (cfun, stmt)
3211 || gimple_vdef (stmt)
3212 || gimple_vuse (stmt))
3213 return false;
3215 int num_args = gimple_num_args (stmt);
3216 if (pe != loop_preheader_edge (loop))
3218 for (int i = 0; i < num_args; ++i)
3219 if (!ifcvt_available_on_edge_p (pe, gimple_arg (stmt, i)))
3220 return false;
3222 else
3224 for (int i = 0; i < num_args; ++i)
3225 if (!expr_invariant_in_loop_p (loop, gimple_arg (stmt, i)))
3226 return false;
3229 return true;
3232 /* Hoist invariant statements from LOOP to edge PE. */
3234 static void
3235 ifcvt_hoist_invariants (class loop *loop, edge pe)
3237 gimple_stmt_iterator hoist_gsi = {};
3238 unsigned int num_blocks = loop->num_nodes;
3239 basic_block *body = get_loop_body (loop);
3240 for (unsigned int i = 0; i < num_blocks; ++i)
3241 for (gimple_stmt_iterator gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi);)
3243 gimple *stmt = gsi_stmt (gsi);
3244 if (ifcvt_can_hoist (loop, pe, stmt))
3246 /* Once we've hoisted one statement, insert other statements
3247 after it. */
3248 gsi_remove (&gsi, false);
3249 if (hoist_gsi.ptr)
3250 gsi_insert_after (&hoist_gsi, stmt, GSI_NEW_STMT);
3251 else
3253 gsi_insert_on_edge_immediate (pe, stmt);
3254 hoist_gsi = gsi_for_stmt (stmt);
3256 continue;
3258 gsi_next (&gsi);
3260 free (body);
3263 /* If-convert LOOP when it is legal. For the moment this pass has no
3264 profitability analysis. Returns non-zero todo flags when something
3265 changed. */
3267 unsigned int
3268 tree_if_conversion (class loop *loop, vec<gimple *> *preds)
3270 unsigned int todo = 0;
3271 bool aggressive_if_conv;
3272 class loop *rloop;
3273 bitmap exit_bbs;
3274 edge pe;
3276 again:
3277 rloop = NULL;
3278 ifc_bbs = NULL;
3279 need_to_predicate = false;
3280 need_to_rewrite_undefined = false;
3281 any_complicated_phi = false;
3283 /* Apply more aggressive if-conversion when loop or its outer loop were
3284 marked with simd pragma. When that's the case, we try to if-convert
3285 loop containing PHIs with more than MAX_PHI_ARG_NUM arguments. */
3286 aggressive_if_conv = loop->force_vectorize;
3287 if (!aggressive_if_conv)
3289 class loop *outer_loop = loop_outer (loop);
3290 if (outer_loop && outer_loop->force_vectorize)
3291 aggressive_if_conv = true;
3294 if (!ifcvt_split_critical_edges (loop, aggressive_if_conv))
3295 goto cleanup;
3297 if (!if_convertible_loop_p (loop)
3298 || !dbg_cnt (if_conversion_tree))
3299 goto cleanup;
3301 if ((need_to_predicate || any_complicated_phi)
3302 && ((!flag_tree_loop_vectorize && !loop->force_vectorize)
3303 || loop->dont_vectorize))
3304 goto cleanup;
3306 /* The edge to insert invariant stmts on. */
3307 pe = loop_preheader_edge (loop);
3309 /* Since we have no cost model, always version loops unless the user
3310 specified -ftree-loop-if-convert or unless versioning is required.
3311 Either version this loop, or if the pattern is right for outer-loop
3312 vectorization, version the outer loop. In the latter case we will
3313 still if-convert the original inner loop. */
3314 if (need_to_predicate
3315 || any_complicated_phi
3316 || flag_tree_loop_if_convert != 1)
3318 class loop *vloop
3319 = (versionable_outer_loop_p (loop_outer (loop))
3320 ? loop_outer (loop) : loop);
3321 class loop *nloop = version_loop_for_if_conversion (vloop, preds);
3322 if (nloop == NULL)
3323 goto cleanup;
3324 if (vloop != loop)
3326 /* If versionable_outer_loop_p decided to version the
3327 outer loop, version also the inner loop of the non-vectorized
3328 loop copy. So we transform:
3329 loop1
3330 loop2
3331 into:
3332 if (LOOP_VECTORIZED (1, 3))
3334 loop1
3335 loop2
3337 else
3338 loop3 (copy of loop1)
3339 if (LOOP_VECTORIZED (4, 5))
3340 loop4 (copy of loop2)
3341 else
3342 loop5 (copy of loop4) */
3343 gcc_assert (nloop->inner && nloop->inner->next == NULL);
3344 rloop = nloop->inner;
3346 else
3347 /* If we versioned loop then make sure to insert invariant
3348 stmts before the .LOOP_VECTORIZED check since the vectorizer
3349 will re-use that for things like runtime alias versioning
3350 whose condition can end up using those invariants. */
3351 pe = single_pred_edge (gimple_bb (preds->last ()));
3354 /* Now all statements are if-convertible. Combine all the basic
3355 blocks into one huge basic block doing the if-conversion
3356 on-the-fly. */
3357 combine_blocks (loop);
3359 /* Perform local CSE, this esp. helps the vectorizer analysis if loads
3360 and stores are involved. CSE only the loop body, not the entry
3361 PHIs, those are to be kept in sync with the non-if-converted copy.
3362 ??? We'll still keep dead stores though. */
3363 exit_bbs = BITMAP_ALLOC (NULL);
3364 bitmap_set_bit (exit_bbs, single_exit (loop)->dest->index);
3365 bitmap_set_bit (exit_bbs, loop->latch->index);
3367 std::pair <tree, tree> *name_pair;
3368 unsigned ssa_names_idx;
3369 FOR_EACH_VEC_ELT (redundant_ssa_names, ssa_names_idx, name_pair)
3370 replace_uses_by (name_pair->first, name_pair->second);
3371 redundant_ssa_names.release ();
3373 todo |= do_rpo_vn (cfun, loop_preheader_edge (loop), exit_bbs);
3375 /* Delete dead predicate computations. */
3376 ifcvt_local_dce (loop);
3377 BITMAP_FREE (exit_bbs);
3379 ifcvt_hoist_invariants (loop, pe);
3381 todo |= TODO_cleanup_cfg;
3383 cleanup:
3384 if (ifc_bbs)
3386 unsigned int i;
3388 for (i = 0; i < loop->num_nodes; i++)
3389 free_bb_predicate (ifc_bbs[i]);
3391 free (ifc_bbs);
3392 ifc_bbs = NULL;
3394 if (rloop != NULL)
3396 loop = rloop;
3397 goto again;
3400 return todo;
3403 /* Tree if-conversion pass management. */
3405 namespace {
3407 const pass_data pass_data_if_conversion =
3409 GIMPLE_PASS, /* type */
3410 "ifcvt", /* name */
3411 OPTGROUP_NONE, /* optinfo_flags */
3412 TV_TREE_LOOP_IFCVT, /* tv_id */
3413 ( PROP_cfg | PROP_ssa ), /* properties_required */
3414 0, /* properties_provided */
3415 0, /* properties_destroyed */
3416 0, /* todo_flags_start */
3417 0, /* todo_flags_finish */
3420 class pass_if_conversion : public gimple_opt_pass
3422 public:
3423 pass_if_conversion (gcc::context *ctxt)
3424 : gimple_opt_pass (pass_data_if_conversion, ctxt)
3427 /* opt_pass methods: */
3428 bool gate (function *) final override;
3429 unsigned int execute (function *) final override;
3431 }; // class pass_if_conversion
3433 bool
3434 pass_if_conversion::gate (function *fun)
3436 return (((flag_tree_loop_vectorize || fun->has_force_vectorize_loops)
3437 && flag_tree_loop_if_convert != 0)
3438 || flag_tree_loop_if_convert == 1);
3441 unsigned int
3442 pass_if_conversion::execute (function *fun)
3444 unsigned todo = 0;
3446 if (number_of_loops (fun) <= 1)
3447 return 0;
3449 auto_vec<gimple *> preds;
3450 for (auto loop : loops_list (cfun, 0))
3451 if (flag_tree_loop_if_convert == 1
3452 || ((flag_tree_loop_vectorize || loop->force_vectorize)
3453 && !loop->dont_vectorize))
3454 todo |= tree_if_conversion (loop, &preds);
3456 if (todo)
3458 free_numbers_of_iterations_estimates (fun);
3459 scev_reset ();
3462 if (flag_checking)
3464 basic_block bb;
3465 FOR_EACH_BB_FN (bb, fun)
3466 gcc_assert (!bb->aux);
3469 /* Perform IL update now, it might elide some loops. */
3470 if (todo & TODO_cleanup_cfg)
3472 cleanup_tree_cfg ();
3473 if (need_ssa_update_p (fun))
3474 todo |= TODO_update_ssa;
3476 if (todo & TODO_update_ssa_any)
3477 update_ssa (todo & TODO_update_ssa_any);
3479 /* If if-conversion elided the loop fall back to the original one. */
3480 for (unsigned i = 0; i < preds.length (); ++i)
3482 gimple *g = preds[i];
3483 if (!gimple_bb (g))
3484 continue;
3485 unsigned ifcvt_loop = tree_to_uhwi (gimple_call_arg (g, 0));
3486 if (!get_loop (fun, ifcvt_loop))
3488 if (dump_file)
3489 fprintf (dump_file, "If-converted loop vanished\n");
3490 fold_loop_internal_call (g, boolean_false_node);
3494 return 0;
3497 } // anon namespace
3499 gimple_opt_pass *
3500 make_pass_if_conversion (gcc::context *ctxt)
3502 return new pass_if_conversion (ctxt);