d: Add testcase from PR108962
[official-gcc.git] / gcc / tree-if-conv.cc
blobe342532a343a3c066142adeec5fdfaf736a653e5
1 /* If-conversion for vectorizer.
2 Copyright (C) 2004-2023 Free Software Foundation, Inc.
3 Contributed by Devang Patel <dpatel@apple.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass implements a tree level if-conversion of loops. Its
22 initial goal is to help the vectorizer to vectorize loops with
23 conditions.
25 A short description of if-conversion:
27 o Decide if a loop is if-convertible or not.
28 o Walk all loop basic blocks in breadth first order (BFS order).
29 o Remove conditional statements (at the end of basic block)
30 and propagate condition into destination basic blocks'
31 predicate list.
32 o Replace modify expression with conditional modify expression
33 using current basic block's condition.
34 o Merge all basic blocks
35 o Replace phi nodes with conditional modify expr
36 o Merge all basic blocks into header
38 Sample transformation:
40 INPUT
41 -----
43 # i_23 = PHI <0(0), i_18(10)>;
44 <L0>:;
45 j_15 = A[i_23];
46 if (j_15 > 41) goto <L1>; else goto <L17>;
48 <L17>:;
49 goto <bb 3> (<L3>);
51 <L1>:;
53 # iftmp.2_4 = PHI <0(8), 42(2)>;
54 <L3>:;
55 A[i_23] = iftmp.2_4;
56 i_18 = i_23 + 1;
57 if (i_18 <= 15) goto <L19>; else goto <L18>;
59 <L19>:;
60 goto <bb 1> (<L0>);
62 <L18>:;
64 OUTPUT
65 ------
67 # i_23 = PHI <0(0), i_18(10)>;
68 <L0>:;
69 j_15 = A[i_23];
71 <L3>:;
72 iftmp.2_4 = j_15 > 41 ? 42 : 0;
73 A[i_23] = iftmp.2_4;
74 i_18 = i_23 + 1;
75 if (i_18 <= 15) goto <L19>; else goto <L18>;
77 <L19>:;
78 goto <bb 1> (<L0>);
80 <L18>:;
83 #include "config.h"
84 #include "system.h"
85 #include "coretypes.h"
86 #include "backend.h"
87 #include "rtl.h"
88 #include "tree.h"
89 #include "gimple.h"
90 #include "cfghooks.h"
91 #include "tree-pass.h"
92 #include "ssa.h"
93 #include "expmed.h"
94 #include "expr.h"
95 #include "optabs-tree.h"
96 #include "gimple-pretty-print.h"
97 #include "alias.h"
98 #include "fold-const.h"
99 #include "stor-layout.h"
100 #include "gimple-iterator.h"
101 #include "gimple-fold.h"
102 #include "gimplify.h"
103 #include "gimplify-me.h"
104 #include "tree-cfg.h"
105 #include "tree-into-ssa.h"
106 #include "tree-ssa.h"
107 #include "cfgloop.h"
108 #include "tree-data-ref.h"
109 #include "tree-scalar-evolution.h"
110 #include "tree-ssa-loop.h"
111 #include "tree-ssa-loop-niter.h"
112 #include "tree-ssa-loop-ivopts.h"
113 #include "tree-ssa-address.h"
114 #include "dbgcnt.h"
115 #include "tree-hash-traits.h"
116 #include "varasm.h"
117 #include "builtins.h"
118 #include "cfganal.h"
119 #include "internal-fn.h"
120 #include "fold-const.h"
121 #include "tree-ssa-sccvn.h"
122 #include "tree-cfgcleanup.h"
123 #include "tree-ssa-dse.h"
124 #include "tree-vectorizer.h"
125 #include "tree-eh.h"
126 #include "cgraph.h"
128 /* For lang_hooks.types.type_for_mode. */
129 #include "langhooks.h"
131 /* Only handle PHIs with no more arguments unless we are asked to by
132 simd pragma. */
133 #define MAX_PHI_ARG_NUM \
134 ((unsigned) param_max_tree_if_conversion_phi_args)
136 /* True if we've converted a statement that was only executed when some
137 condition C was true, and if for correctness we need to predicate the
138 statement to ensure that it is a no-op when C is false. See
139 predicate_statements for the kinds of predication we support. */
140 static bool need_to_predicate;
142 /* True if we have to rewrite stmts that may invoke undefined behavior
143 when a condition C was false so it doesn't if it is always executed.
144 See predicate_statements for the kinds of predication we support. */
145 static bool need_to_rewrite_undefined;
147 /* Indicate if there are any complicated PHIs that need to be handled in
148 if-conversion. Complicated PHI has more than two arguments and can't
149 be degenerated to two arguments PHI. See more information in comment
150 before phi_convertible_by_degenerating_args. */
151 static bool any_complicated_phi;
153 /* True if we have bitfield accesses we can lower. */
154 static bool need_to_lower_bitfields;
156 /* True if there is any ifcvting to be done. */
157 static bool need_to_ifcvt;
159 /* Hash for struct innermost_loop_behavior. It depends on the user to
160 free the memory. */
162 struct innermost_loop_behavior_hash : nofree_ptr_hash <innermost_loop_behavior>
164 static inline hashval_t hash (const value_type &);
165 static inline bool equal (const value_type &,
166 const compare_type &);
169 inline hashval_t
170 innermost_loop_behavior_hash::hash (const value_type &e)
172 hashval_t hash;
174 hash = iterative_hash_expr (e->base_address, 0);
175 hash = iterative_hash_expr (e->offset, hash);
176 hash = iterative_hash_expr (e->init, hash);
177 return iterative_hash_expr (e->step, hash);
180 inline bool
181 innermost_loop_behavior_hash::equal (const value_type &e1,
182 const compare_type &e2)
184 if ((e1->base_address && !e2->base_address)
185 || (!e1->base_address && e2->base_address)
186 || (!e1->offset && e2->offset)
187 || (e1->offset && !e2->offset)
188 || (!e1->init && e2->init)
189 || (e1->init && !e2->init)
190 || (!e1->step && e2->step)
191 || (e1->step && !e2->step))
192 return false;
194 if (e1->base_address && e2->base_address
195 && !operand_equal_p (e1->base_address, e2->base_address, 0))
196 return false;
197 if (e1->offset && e2->offset
198 && !operand_equal_p (e1->offset, e2->offset, 0))
199 return false;
200 if (e1->init && e2->init
201 && !operand_equal_p (e1->init, e2->init, 0))
202 return false;
203 if (e1->step && e2->step
204 && !operand_equal_p (e1->step, e2->step, 0))
205 return false;
207 return true;
210 /* List of basic blocks in if-conversion-suitable order. */
211 static basic_block *ifc_bbs;
213 /* Hash table to store <DR's innermost loop behavior, DR> pairs. */
214 static hash_map<innermost_loop_behavior_hash,
215 data_reference_p> *innermost_DR_map;
217 /* Hash table to store <base reference, DR> pairs. */
218 static hash_map<tree_operand_hash, data_reference_p> *baseref_DR_map;
220 /* List of redundant SSA names: the first should be replaced by the second. */
221 static vec< std::pair<tree, tree> > redundant_ssa_names;
223 /* Structure used to predicate basic blocks. This is attached to the
224 ->aux field of the BBs in the loop to be if-converted. */
225 struct bb_predicate {
227 /* The condition under which this basic block is executed. */
228 tree predicate;
230 /* PREDICATE is gimplified, and the sequence of statements is
231 recorded here, in order to avoid the duplication of computations
232 that occur in previous conditions. See PR44483. */
233 gimple_seq predicate_gimplified_stmts;
236 /* Returns true when the basic block BB has a predicate. */
238 static inline bool
239 bb_has_predicate (basic_block bb)
241 return bb->aux != NULL;
244 /* Returns the gimplified predicate for basic block BB. */
246 static inline tree
247 bb_predicate (basic_block bb)
249 return ((struct bb_predicate *) bb->aux)->predicate;
252 /* Sets the gimplified predicate COND for basic block BB. */
254 static inline void
255 set_bb_predicate (basic_block bb, tree cond)
257 gcc_assert ((TREE_CODE (cond) == TRUTH_NOT_EXPR
258 && is_gimple_val (TREE_OPERAND (cond, 0)))
259 || is_gimple_val (cond));
260 ((struct bb_predicate *) bb->aux)->predicate = cond;
263 /* Returns the sequence of statements of the gimplification of the
264 predicate for basic block BB. */
266 static inline gimple_seq
267 bb_predicate_gimplified_stmts (basic_block bb)
269 return ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts;
272 /* Sets the sequence of statements STMTS of the gimplification of the
273 predicate for basic block BB. */
275 static inline void
276 set_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
278 ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts = stmts;
281 /* Adds the sequence of statements STMTS to the sequence of statements
282 of the predicate for basic block BB. */
284 static inline void
285 add_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
287 /* We might have updated some stmts in STMTS via force_gimple_operand
288 calling fold_stmt and that producing multiple stmts. Delink immediate
289 uses so update_ssa after loop versioning doesn't get confused for
290 the not yet inserted predicates.
291 ??? This should go away once we reliably avoid updating stmts
292 not in any BB. */
293 for (gimple_stmt_iterator gsi = gsi_start (stmts);
294 !gsi_end_p (gsi); gsi_next (&gsi))
296 gimple *stmt = gsi_stmt (gsi);
297 delink_stmt_imm_use (stmt);
298 gimple_set_modified (stmt, true);
300 gimple_seq_add_seq_without_update
301 (&(((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts), stmts);
304 /* Initializes to TRUE the predicate of basic block BB. */
306 static inline void
307 init_bb_predicate (basic_block bb)
309 bb->aux = XNEW (struct bb_predicate);
310 set_bb_predicate_gimplified_stmts (bb, NULL);
311 set_bb_predicate (bb, boolean_true_node);
314 /* Release the SSA_NAMEs associated with the predicate of basic block BB. */
316 static inline void
317 release_bb_predicate (basic_block bb)
319 gimple_seq stmts = bb_predicate_gimplified_stmts (bb);
320 if (stmts)
322 /* Ensure that these stmts haven't yet been added to a bb. */
323 if (flag_checking)
324 for (gimple_stmt_iterator i = gsi_start (stmts);
325 !gsi_end_p (i); gsi_next (&i))
326 gcc_assert (! gimple_bb (gsi_stmt (i)));
328 /* Discard them. */
329 gimple_seq_discard (stmts);
330 set_bb_predicate_gimplified_stmts (bb, NULL);
334 /* Free the predicate of basic block BB. */
336 static inline void
337 free_bb_predicate (basic_block bb)
339 if (!bb_has_predicate (bb))
340 return;
342 release_bb_predicate (bb);
343 free (bb->aux);
344 bb->aux = NULL;
347 /* Reinitialize predicate of BB with the true predicate. */
349 static inline void
350 reset_bb_predicate (basic_block bb)
352 if (!bb_has_predicate (bb))
353 init_bb_predicate (bb);
354 else
356 release_bb_predicate (bb);
357 set_bb_predicate (bb, boolean_true_node);
361 /* Returns a new SSA_NAME of type TYPE that is assigned the value of
362 the expression EXPR. Inserts the statement created for this
363 computation before GSI and leaves the iterator GSI at the same
364 statement. */
366 static tree
367 ifc_temp_var (tree type, tree expr, gimple_stmt_iterator *gsi)
369 tree new_name = make_temp_ssa_name (type, NULL, "_ifc_");
370 gimple *stmt = gimple_build_assign (new_name, expr);
371 gimple_set_vuse (stmt, gimple_vuse (gsi_stmt (*gsi)));
372 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
373 return new_name;
376 /* Return true when COND is a false predicate. */
378 static inline bool
379 is_false_predicate (tree cond)
381 return (cond != NULL_TREE
382 && (cond == boolean_false_node
383 || integer_zerop (cond)));
386 /* Return true when COND is a true predicate. */
388 static inline bool
389 is_true_predicate (tree cond)
391 return (cond == NULL_TREE
392 || cond == boolean_true_node
393 || integer_onep (cond));
396 /* Returns true when BB has a predicate that is not trivial: true or
397 NULL_TREE. */
399 static inline bool
400 is_predicated (basic_block bb)
402 return !is_true_predicate (bb_predicate (bb));
405 /* Parses the predicate COND and returns its comparison code and
406 operands OP0 and OP1. */
408 static enum tree_code
409 parse_predicate (tree cond, tree *op0, tree *op1)
411 gimple *s;
413 if (TREE_CODE (cond) == SSA_NAME
414 && is_gimple_assign (s = SSA_NAME_DEF_STMT (cond)))
416 if (TREE_CODE_CLASS (gimple_assign_rhs_code (s)) == tcc_comparison)
418 *op0 = gimple_assign_rhs1 (s);
419 *op1 = gimple_assign_rhs2 (s);
420 return gimple_assign_rhs_code (s);
423 else if (gimple_assign_rhs_code (s) == TRUTH_NOT_EXPR)
425 tree op = gimple_assign_rhs1 (s);
426 tree type = TREE_TYPE (op);
427 enum tree_code code = parse_predicate (op, op0, op1);
429 return code == ERROR_MARK ? ERROR_MARK
430 : invert_tree_comparison (code, HONOR_NANS (type));
433 return ERROR_MARK;
436 if (COMPARISON_CLASS_P (cond))
438 *op0 = TREE_OPERAND (cond, 0);
439 *op1 = TREE_OPERAND (cond, 1);
440 return TREE_CODE (cond);
443 return ERROR_MARK;
446 /* Returns the fold of predicate C1 OR C2 at location LOC. */
448 static tree
449 fold_or_predicates (location_t loc, tree c1, tree c2)
451 tree op1a, op1b, op2a, op2b;
452 enum tree_code code1 = parse_predicate (c1, &op1a, &op1b);
453 enum tree_code code2 = parse_predicate (c2, &op2a, &op2b);
455 if (code1 != ERROR_MARK && code2 != ERROR_MARK)
457 tree t = maybe_fold_or_comparisons (boolean_type_node, code1, op1a, op1b,
458 code2, op2a, op2b);
459 if (t)
460 return t;
463 return fold_build2_loc (loc, TRUTH_OR_EXPR, boolean_type_node, c1, c2);
466 /* Returns either a COND_EXPR or the folded expression if the folded
467 expression is a MIN_EXPR, a MAX_EXPR, an ABS_EXPR,
468 a constant or a SSA_NAME. */
470 static tree
471 fold_build_cond_expr (tree type, tree cond, tree rhs, tree lhs)
473 /* If COND is comparison r != 0 and r has boolean type, convert COND
474 to SSA_NAME to accept by vect bool pattern. */
475 if (TREE_CODE (cond) == NE_EXPR)
477 tree op0 = TREE_OPERAND (cond, 0);
478 tree op1 = TREE_OPERAND (cond, 1);
479 if (TREE_CODE (op0) == SSA_NAME
480 && TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE
481 && (integer_zerop (op1)))
482 cond = op0;
485 gimple_match_op cexpr (gimple_match_cond::UNCOND, COND_EXPR,
486 type, cond, rhs, lhs);
487 if (cexpr.resimplify (NULL, follow_all_ssa_edges))
489 if (gimple_simplified_result_is_gimple_val (&cexpr))
490 return cexpr.ops[0];
491 else if (cexpr.code == ABS_EXPR)
492 return build1 (ABS_EXPR, type, cexpr.ops[0]);
493 else if (cexpr.code == MIN_EXPR
494 || cexpr.code == MAX_EXPR)
495 return build2 ((tree_code)cexpr.code, type, cexpr.ops[0], cexpr.ops[1]);
498 return build3 (COND_EXPR, type, cond, rhs, lhs);
501 /* Add condition NC to the predicate list of basic block BB. LOOP is
502 the loop to be if-converted. Use predicate of cd-equivalent block
503 for join bb if it exists: we call basic blocks bb1 and bb2
504 cd-equivalent if they are executed under the same condition. */
506 static inline void
507 add_to_predicate_list (class loop *loop, basic_block bb, tree nc)
509 tree bc, *tp;
510 basic_block dom_bb;
512 if (is_true_predicate (nc))
513 return;
515 /* If dominance tells us this basic block is always executed,
516 don't record any predicates for it. */
517 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
518 return;
520 dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb);
521 /* We use notion of cd equivalence to get simpler predicate for
522 join block, e.g. if join block has 2 predecessors with predicates
523 p1 & p2 and p1 & !p2, we'd like to get p1 for it instead of
524 p1 & p2 | p1 & !p2. */
525 if (dom_bb != loop->header
526 && get_immediate_dominator (CDI_POST_DOMINATORS, dom_bb) == bb)
528 gcc_assert (flow_bb_inside_loop_p (loop, dom_bb));
529 bc = bb_predicate (dom_bb);
530 if (!is_true_predicate (bc))
531 set_bb_predicate (bb, bc);
532 else
533 gcc_assert (is_true_predicate (bb_predicate (bb)));
534 if (dump_file && (dump_flags & TDF_DETAILS))
535 fprintf (dump_file, "Use predicate of bb#%d for bb#%d\n",
536 dom_bb->index, bb->index);
537 return;
540 if (!is_predicated (bb))
541 bc = nc;
542 else
544 bc = bb_predicate (bb);
545 bc = fold_or_predicates (EXPR_LOCATION (bc), nc, bc);
546 if (is_true_predicate (bc))
548 reset_bb_predicate (bb);
549 return;
553 /* Allow a TRUTH_NOT_EXPR around the main predicate. */
554 if (TREE_CODE (bc) == TRUTH_NOT_EXPR)
555 tp = &TREE_OPERAND (bc, 0);
556 else
557 tp = &bc;
558 if (!is_gimple_val (*tp))
560 gimple_seq stmts;
561 *tp = force_gimple_operand (*tp, &stmts, true, NULL_TREE);
562 add_bb_predicate_gimplified_stmts (bb, stmts);
564 set_bb_predicate (bb, bc);
567 /* Add the condition COND to the previous condition PREV_COND, and add
568 this to the predicate list of the destination of edge E. LOOP is
569 the loop to be if-converted. */
571 static void
572 add_to_dst_predicate_list (class loop *loop, edge e,
573 tree prev_cond, tree cond)
575 if (!flow_bb_inside_loop_p (loop, e->dest))
576 return;
578 if (!is_true_predicate (prev_cond))
579 cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
580 prev_cond, cond);
582 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, e->dest))
583 add_to_predicate_list (loop, e->dest, cond);
586 /* Return true if one of the successor edges of BB exits LOOP. */
588 static bool
589 bb_with_exit_edge_p (class loop *loop, basic_block bb)
591 edge e;
592 edge_iterator ei;
594 FOR_EACH_EDGE (e, ei, bb->succs)
595 if (loop_exit_edge_p (loop, e))
596 return true;
598 return false;
601 /* Given PHI which has more than two arguments, this function checks if
602 it's if-convertible by degenerating its arguments. Specifically, if
603 below two conditions are satisfied:
605 1) Number of PHI arguments with different values equals to 2 and one
606 argument has the only occurrence.
607 2) The edge corresponding to the unique argument isn't critical edge.
609 Such PHI can be handled as PHIs have only two arguments. For example,
610 below PHI:
612 res = PHI <A_1(e1), A_1(e2), A_2(e3)>;
614 can be transformed into:
616 res = (predicate of e3) ? A_2 : A_1;
618 Return TRUE if it is the case, FALSE otherwise. */
620 static bool
621 phi_convertible_by_degenerating_args (gphi *phi)
623 edge e;
624 tree arg, t1 = NULL, t2 = NULL;
625 unsigned int i, i1 = 0, i2 = 0, n1 = 0, n2 = 0;
626 unsigned int num_args = gimple_phi_num_args (phi);
628 gcc_assert (num_args > 2);
630 for (i = 0; i < num_args; i++)
632 arg = gimple_phi_arg_def (phi, i);
633 if (t1 == NULL || operand_equal_p (t1, arg, 0))
635 n1++;
636 i1 = i;
637 t1 = arg;
639 else if (t2 == NULL || operand_equal_p (t2, arg, 0))
641 n2++;
642 i2 = i;
643 t2 = arg;
645 else
646 return false;
649 if (n1 != 1 && n2 != 1)
650 return false;
652 /* Check if the edge corresponding to the unique arg is critical. */
653 e = gimple_phi_arg_edge (phi, (n1 == 1) ? i1 : i2);
654 if (EDGE_COUNT (e->src->succs) > 1)
655 return false;
657 return true;
660 /* Return true when PHI is if-convertible. PHI is part of loop LOOP
661 and it belongs to basic block BB. Note at this point, it is sure
662 that PHI is if-convertible. This function updates global variable
663 ANY_COMPLICATED_PHI if PHI is complicated. */
665 static bool
666 if_convertible_phi_p (class loop *loop, basic_block bb, gphi *phi)
668 if (dump_file && (dump_flags & TDF_DETAILS))
670 fprintf (dump_file, "-------------------------\n");
671 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
674 if (bb != loop->header
675 && gimple_phi_num_args (phi) > 2
676 && !phi_convertible_by_degenerating_args (phi))
677 any_complicated_phi = true;
679 return true;
682 /* Records the status of a data reference. This struct is attached to
683 each DR->aux field. */
685 struct ifc_dr {
686 bool rw_unconditionally;
687 bool w_unconditionally;
688 bool written_at_least_once;
690 tree rw_predicate;
691 tree w_predicate;
692 tree base_w_predicate;
695 #define IFC_DR(DR) ((struct ifc_dr *) (DR)->aux)
696 #define DR_BASE_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->written_at_least_once)
697 #define DR_RW_UNCONDITIONALLY(DR) (IFC_DR (DR)->rw_unconditionally)
698 #define DR_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->w_unconditionally)
700 /* Iterates over DR's and stores refs, DR and base refs, DR pairs in
701 HASH tables. While storing them in HASH table, it checks if the
702 reference is unconditionally read or written and stores that as a flag
703 information. For base reference it checks if it is written atlest once
704 unconditionally and stores it as flag information along with DR.
705 In other words for every data reference A in STMT there exist other
706 accesses to a data reference with the same base with predicates that
707 add up (OR-up) to the true predicate: this ensures that the data
708 reference A is touched (read or written) on every iteration of the
709 if-converted loop. */
710 static void
711 hash_memrefs_baserefs_and_store_DRs_read_written_info (data_reference_p a)
714 data_reference_p *master_dr, *base_master_dr;
715 tree base_ref = DR_BASE_OBJECT (a);
716 innermost_loop_behavior *innermost = &DR_INNERMOST (a);
717 tree ca = bb_predicate (gimple_bb (DR_STMT (a)));
718 bool exist1, exist2;
720 master_dr = &innermost_DR_map->get_or_insert (innermost, &exist1);
721 if (!exist1)
722 *master_dr = a;
724 if (DR_IS_WRITE (a))
726 IFC_DR (*master_dr)->w_predicate
727 = fold_or_predicates (UNKNOWN_LOCATION, ca,
728 IFC_DR (*master_dr)->w_predicate);
729 if (is_true_predicate (IFC_DR (*master_dr)->w_predicate))
730 DR_W_UNCONDITIONALLY (*master_dr) = true;
732 IFC_DR (*master_dr)->rw_predicate
733 = fold_or_predicates (UNKNOWN_LOCATION, ca,
734 IFC_DR (*master_dr)->rw_predicate);
735 if (is_true_predicate (IFC_DR (*master_dr)->rw_predicate))
736 DR_RW_UNCONDITIONALLY (*master_dr) = true;
738 if (DR_IS_WRITE (a))
740 base_master_dr = &baseref_DR_map->get_or_insert (base_ref, &exist2);
741 if (!exist2)
742 *base_master_dr = a;
743 IFC_DR (*base_master_dr)->base_w_predicate
744 = fold_or_predicates (UNKNOWN_LOCATION, ca,
745 IFC_DR (*base_master_dr)->base_w_predicate);
746 if (is_true_predicate (IFC_DR (*base_master_dr)->base_w_predicate))
747 DR_BASE_W_UNCONDITIONALLY (*base_master_dr) = true;
751 /* Return TRUE if can prove the index IDX of an array reference REF is
752 within array bound. Return false otherwise. */
754 static bool
755 idx_within_array_bound (tree ref, tree *idx, void *dta)
757 wi::overflow_type overflow;
758 widest_int niter, valid_niter, delta, wi_step;
759 tree ev, init, step;
760 tree low, high;
761 class loop *loop = (class loop*) dta;
763 /* Only support within-bound access for array references. */
764 if (TREE_CODE (ref) != ARRAY_REF)
765 return false;
767 /* For arrays that might have flexible sizes, it is not guaranteed that they
768 do not extend over their declared size. */
769 if (array_ref_flexible_size_p (ref))
770 return false;
772 ev = analyze_scalar_evolution (loop, *idx);
773 ev = instantiate_parameters (loop, ev);
774 init = initial_condition (ev);
775 step = evolution_part_in_loop_num (ev, loop->num);
777 if (!init || TREE_CODE (init) != INTEGER_CST
778 || (step && TREE_CODE (step) != INTEGER_CST))
779 return false;
781 low = array_ref_low_bound (ref);
782 high = array_ref_up_bound (ref);
784 /* The case of nonconstant bounds could be handled, but it would be
785 complicated. */
786 if (TREE_CODE (low) != INTEGER_CST
787 || !high || TREE_CODE (high) != INTEGER_CST)
788 return false;
790 /* Check if the intial idx is within bound. */
791 if (wi::to_widest (init) < wi::to_widest (low)
792 || wi::to_widest (init) > wi::to_widest (high))
793 return false;
795 /* The idx is always within bound. */
796 if (!step || integer_zerop (step))
797 return true;
799 if (!max_loop_iterations (loop, &niter))
800 return false;
802 if (wi::to_widest (step) < 0)
804 delta = wi::to_widest (init) - wi::to_widest (low);
805 wi_step = -wi::to_widest (step);
807 else
809 delta = wi::to_widest (high) - wi::to_widest (init);
810 wi_step = wi::to_widest (step);
813 valid_niter = wi::div_floor (delta, wi_step, SIGNED, &overflow);
814 /* The iteration space of idx is within array bound. */
815 if (!overflow && niter <= valid_niter)
816 return true;
818 return false;
821 /* Return TRUE if ref is a within bound array reference. */
823 static bool
824 ref_within_array_bound (gimple *stmt, tree ref)
826 class loop *loop = loop_containing_stmt (stmt);
828 gcc_assert (loop != NULL);
829 return for_each_index (&ref, idx_within_array_bound, loop);
833 /* Given a memory reference expression T, return TRUE if base object
834 it refers to is writable. The base object of a memory reference
835 is the main object being referenced, which is returned by function
836 get_base_address. */
838 static bool
839 base_object_writable (tree ref)
841 tree base_tree = get_base_address (ref);
843 return (base_tree
844 && DECL_P (base_tree)
845 && decl_binds_to_current_def_p (base_tree)
846 && !TREE_READONLY (base_tree));
849 /* Return true when the memory references of STMT won't trap in the
850 if-converted code. There are two things that we have to check for:
852 - writes to memory occur to writable memory: if-conversion of
853 memory writes transforms the conditional memory writes into
854 unconditional writes, i.e. "if (cond) A[i] = foo" is transformed
855 into "A[i] = cond ? foo : A[i]", and as the write to memory may not
856 be executed at all in the original code, it may be a readonly
857 memory. To check that A is not const-qualified, we check that
858 there exists at least an unconditional write to A in the current
859 function.
861 - reads or writes to memory are valid memory accesses for every
862 iteration. To check that the memory accesses are correctly formed
863 and that we are allowed to read and write in these locations, we
864 check that the memory accesses to be if-converted occur at every
865 iteration unconditionally.
867 Returns true for the memory reference in STMT, same memory reference
868 is read or written unconditionally atleast once and the base memory
869 reference is written unconditionally once. This is to check reference
870 will not write fault. Also retuns true if the memory reference is
871 unconditionally read once then we are conditionally writing to memory
872 which is defined as read and write and is bound to the definition
873 we are seeing. */
874 static bool
875 ifcvt_memrefs_wont_trap (gimple *stmt, vec<data_reference_p> drs)
877 /* If DR didn't see a reference here we can't use it to tell
878 whether the ref traps or not. */
879 if (gimple_uid (stmt) == 0)
880 return false;
882 data_reference_p *master_dr, *base_master_dr;
883 data_reference_p a = drs[gimple_uid (stmt) - 1];
885 tree base = DR_BASE_OBJECT (a);
886 innermost_loop_behavior *innermost = &DR_INNERMOST (a);
888 gcc_assert (DR_STMT (a) == stmt);
889 gcc_assert (DR_BASE_ADDRESS (a) || DR_OFFSET (a)
890 || DR_INIT (a) || DR_STEP (a));
892 master_dr = innermost_DR_map->get (innermost);
893 gcc_assert (master_dr != NULL);
895 base_master_dr = baseref_DR_map->get (base);
897 /* If a is unconditionally written to it doesn't trap. */
898 if (DR_W_UNCONDITIONALLY (*master_dr))
899 return true;
901 /* If a is unconditionally accessed then ...
903 Even a is conditional access, we can treat it as an unconditional
904 one if it's an array reference and all its index are within array
905 bound. */
906 if (DR_RW_UNCONDITIONALLY (*master_dr)
907 || ref_within_array_bound (stmt, DR_REF (a)))
909 /* an unconditional read won't trap. */
910 if (DR_IS_READ (a))
911 return true;
913 /* an unconditionaly write won't trap if the base is written
914 to unconditionally. */
915 if (base_master_dr
916 && DR_BASE_W_UNCONDITIONALLY (*base_master_dr))
917 return flag_store_data_races;
918 /* or the base is known to be not readonly. */
919 else if (base_object_writable (DR_REF (a)))
920 return flag_store_data_races;
923 return false;
926 /* Return true if STMT could be converted into a masked load or store
927 (conditional load or store based on a mask computed from bb predicate). */
929 static bool
930 ifcvt_can_use_mask_load_store (gimple *stmt)
932 /* Check whether this is a load or store. */
933 tree lhs = gimple_assign_lhs (stmt);
934 bool is_load;
935 tree ref;
936 if (gimple_store_p (stmt))
938 if (!is_gimple_val (gimple_assign_rhs1 (stmt)))
939 return false;
940 is_load = false;
941 ref = lhs;
943 else if (gimple_assign_load_p (stmt))
945 is_load = true;
946 ref = gimple_assign_rhs1 (stmt);
948 else
949 return false;
951 if (may_be_nonaddressable_p (ref))
952 return false;
954 /* Mask should be integer mode of the same size as the load/store
955 mode. */
956 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
957 if (!int_mode_for_mode (mode).exists () || VECTOR_MODE_P (mode))
958 return false;
960 if (can_vec_mask_load_store_p (mode, VOIDmode, is_load))
961 return true;
963 return false;
966 /* Return true if STMT could be converted from an operation that is
967 unconditional to one that is conditional on a bb predicate mask. */
969 static bool
970 ifcvt_can_predicate (gimple *stmt)
972 basic_block bb = gimple_bb (stmt);
974 if (!(flag_tree_loop_vectorize || bb->loop_father->force_vectorize)
975 || bb->loop_father->dont_vectorize
976 || gimple_has_volatile_ops (stmt))
977 return false;
979 if (gimple_assign_single_p (stmt))
980 return ifcvt_can_use_mask_load_store (stmt);
982 tree_code code = gimple_assign_rhs_code (stmt);
983 tree lhs_type = TREE_TYPE (gimple_assign_lhs (stmt));
984 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
985 if (!types_compatible_p (lhs_type, rhs_type))
986 return false;
987 internal_fn cond_fn = get_conditional_internal_fn (code);
988 return (cond_fn != IFN_LAST
989 && vectorized_internal_fn_supported_p (cond_fn, lhs_type));
992 /* Return true when STMT is if-convertible.
994 GIMPLE_ASSIGN statement is not if-convertible if,
995 - it is not movable,
996 - it could trap,
997 - LHS is not var decl. */
999 static bool
1000 if_convertible_gimple_assign_stmt_p (gimple *stmt,
1001 vec<data_reference_p> refs)
1003 tree lhs = gimple_assign_lhs (stmt);
1005 if (dump_file && (dump_flags & TDF_DETAILS))
1007 fprintf (dump_file, "-------------------------\n");
1008 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1011 if (!is_gimple_reg_type (TREE_TYPE (lhs)))
1012 return false;
1014 /* Some of these constrains might be too conservative. */
1015 if (stmt_ends_bb_p (stmt)
1016 || gimple_has_volatile_ops (stmt)
1017 || (TREE_CODE (lhs) == SSA_NAME
1018 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
1019 || gimple_has_side_effects (stmt))
1021 if (dump_file && (dump_flags & TDF_DETAILS))
1022 fprintf (dump_file, "stmt not suitable for ifcvt\n");
1023 return false;
1026 /* tree-into-ssa.cc uses GF_PLF_1, so avoid it, because
1027 in between if_convertible_loop_p and combine_blocks
1028 we can perform loop versioning. */
1029 gimple_set_plf (stmt, GF_PLF_2, false);
1031 if ((! gimple_vuse (stmt)
1032 || gimple_could_trap_p_1 (stmt, false, false)
1033 || ! ifcvt_memrefs_wont_trap (stmt, refs))
1034 && gimple_could_trap_p (stmt))
1036 if (ifcvt_can_predicate (stmt))
1038 gimple_set_plf (stmt, GF_PLF_2, true);
1039 need_to_predicate = true;
1040 return true;
1042 if (dump_file && (dump_flags & TDF_DETAILS))
1043 fprintf (dump_file, "tree could trap...\n");
1044 return false;
1046 else if ((INTEGRAL_TYPE_P (TREE_TYPE (lhs))
1047 || POINTER_TYPE_P (TREE_TYPE (lhs)))
1048 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs))
1049 && arith_code_with_undefined_signed_overflow
1050 (gimple_assign_rhs_code (stmt)))
1051 /* We have to rewrite stmts with undefined overflow. */
1052 need_to_rewrite_undefined = true;
1054 /* When if-converting stores force versioning, likewise if we
1055 ended up generating store data races. */
1056 if (gimple_vdef (stmt))
1057 need_to_predicate = true;
1059 return true;
1062 /* Return true when STMT is if-convertible.
1064 A statement is if-convertible if:
1065 - it is an if-convertible GIMPLE_ASSIGN,
1066 - it is a GIMPLE_LABEL or a GIMPLE_COND,
1067 - it is builtins call,
1068 - it is a call to a function with a SIMD clone. */
1070 static bool
1071 if_convertible_stmt_p (gimple *stmt, vec<data_reference_p> refs)
1073 switch (gimple_code (stmt))
1075 case GIMPLE_LABEL:
1076 case GIMPLE_DEBUG:
1077 case GIMPLE_COND:
1078 return true;
1080 case GIMPLE_ASSIGN:
1081 return if_convertible_gimple_assign_stmt_p (stmt, refs);
1083 case GIMPLE_CALL:
1085 tree fndecl = gimple_call_fndecl (stmt);
1086 if (fndecl)
1088 /* We can vectorize some builtins and functions with SIMD
1089 "inbranch" clones. */
1090 int flags = gimple_call_flags (stmt);
1091 struct cgraph_node *node = cgraph_node::get (fndecl);
1092 if ((flags & ECF_CONST)
1093 && !(flags & ECF_LOOPING_CONST_OR_PURE)
1094 && fndecl_built_in_p (fndecl))
1095 return true;
1096 if (node && node->simd_clones != NULL)
1097 /* Ensure that at least one clone can be "inbranch". */
1098 for (struct cgraph_node *n = node->simd_clones; n != NULL;
1099 n = n->simdclone->next_clone)
1100 if (n->simdclone->inbranch)
1102 gimple_set_plf (stmt, GF_PLF_2, true);
1103 need_to_predicate = true;
1104 return true;
1107 return false;
1110 default:
1111 /* Don't know what to do with 'em so don't do anything. */
1112 if (dump_file && (dump_flags & TDF_DETAILS))
1114 fprintf (dump_file, "don't know what to do\n");
1115 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1117 return false;
1121 /* Assumes that BB has more than 1 predecessors.
1122 Returns false if at least one successor is not on critical edge
1123 and true otherwise. */
1125 static inline bool
1126 all_preds_critical_p (basic_block bb)
1128 edge e;
1129 edge_iterator ei;
1131 FOR_EACH_EDGE (e, ei, bb->preds)
1132 if (EDGE_COUNT (e->src->succs) == 1)
1133 return false;
1134 return true;
1137 /* Return true when BB is if-convertible. This routine does not check
1138 basic block's statements and phis.
1140 A basic block is not if-convertible if:
1141 - it is non-empty and it is after the exit block (in BFS order),
1142 - it is after the exit block but before the latch,
1143 - its edges are not normal.
1145 EXIT_BB is the basic block containing the exit of the LOOP. BB is
1146 inside LOOP. */
1148 static bool
1149 if_convertible_bb_p (class loop *loop, basic_block bb, basic_block exit_bb)
1151 edge e;
1152 edge_iterator ei;
1154 if (dump_file && (dump_flags & TDF_DETAILS))
1155 fprintf (dump_file, "----------[%d]-------------\n", bb->index);
1157 if (EDGE_COUNT (bb->succs) > 2)
1158 return false;
1160 if (gcall *call = safe_dyn_cast <gcall *> (*gsi_last_bb (bb)))
1161 if (gimple_call_ctrl_altering_p (call))
1162 return false;
1164 if (exit_bb)
1166 if (bb != loop->latch)
1168 if (dump_file && (dump_flags & TDF_DETAILS))
1169 fprintf (dump_file, "basic block after exit bb but before latch\n");
1170 return false;
1172 else if (!empty_block_p (bb))
1174 if (dump_file && (dump_flags & TDF_DETAILS))
1175 fprintf (dump_file, "non empty basic block after exit bb\n");
1176 return false;
1178 else if (bb == loop->latch
1179 && bb != exit_bb
1180 && !dominated_by_p (CDI_DOMINATORS, bb, exit_bb))
1182 if (dump_file && (dump_flags & TDF_DETAILS))
1183 fprintf (dump_file, "latch is not dominated by exit_block\n");
1184 return false;
1188 /* Be less adventurous and handle only normal edges. */
1189 FOR_EACH_EDGE (e, ei, bb->succs)
1190 if (e->flags & (EDGE_EH | EDGE_ABNORMAL | EDGE_IRREDUCIBLE_LOOP))
1192 if (dump_file && (dump_flags & TDF_DETAILS))
1193 fprintf (dump_file, "Difficult to handle edges\n");
1194 return false;
1197 return true;
1200 /* Return true when all predecessor blocks of BB are visited. The
1201 VISITED bitmap keeps track of the visited blocks. */
1203 static bool
1204 pred_blocks_visited_p (basic_block bb, bitmap *visited)
1206 edge e;
1207 edge_iterator ei;
1208 FOR_EACH_EDGE (e, ei, bb->preds)
1209 if (!bitmap_bit_p (*visited, e->src->index))
1210 return false;
1212 return true;
1215 /* Get body of a LOOP in suitable order for if-conversion. It is
1216 caller's responsibility to deallocate basic block list.
1217 If-conversion suitable order is, breadth first sort (BFS) order
1218 with an additional constraint: select a block only if all its
1219 predecessors are already selected. */
1221 static basic_block *
1222 get_loop_body_in_if_conv_order (const class loop *loop)
1224 basic_block *blocks, *blocks_in_bfs_order;
1225 basic_block bb;
1226 bitmap visited;
1227 unsigned int index = 0;
1228 unsigned int visited_count = 0;
1230 gcc_assert (loop->num_nodes);
1231 gcc_assert (loop->latch != EXIT_BLOCK_PTR_FOR_FN (cfun));
1233 blocks = XCNEWVEC (basic_block, loop->num_nodes);
1234 visited = BITMAP_ALLOC (NULL);
1236 blocks_in_bfs_order = get_loop_body_in_bfs_order (loop);
1238 index = 0;
1239 while (index < loop->num_nodes)
1241 bb = blocks_in_bfs_order [index];
1243 if (bb->flags & BB_IRREDUCIBLE_LOOP)
1245 free (blocks_in_bfs_order);
1246 BITMAP_FREE (visited);
1247 free (blocks);
1248 return NULL;
1251 if (!bitmap_bit_p (visited, bb->index))
1253 if (pred_blocks_visited_p (bb, &visited)
1254 || bb == loop->header)
1256 /* This block is now visited. */
1257 bitmap_set_bit (visited, bb->index);
1258 blocks[visited_count++] = bb;
1262 index++;
1264 if (index == loop->num_nodes
1265 && visited_count != loop->num_nodes)
1266 /* Not done yet. */
1267 index = 0;
1269 free (blocks_in_bfs_order);
1270 BITMAP_FREE (visited);
1271 return blocks;
1274 /* Returns true when the analysis of the predicates for all the basic
1275 blocks in LOOP succeeded.
1277 predicate_bbs first allocates the predicates of the basic blocks.
1278 These fields are then initialized with the tree expressions
1279 representing the predicates under which a basic block is executed
1280 in the LOOP. As the loop->header is executed at each iteration, it
1281 has the "true" predicate. Other statements executed under a
1282 condition are predicated with that condition, for example
1284 | if (x)
1285 | S1;
1286 | else
1287 | S2;
1289 S1 will be predicated with "x", and
1290 S2 will be predicated with "!x". */
1292 static void
1293 predicate_bbs (loop_p loop)
1295 unsigned int i;
1297 for (i = 0; i < loop->num_nodes; i++)
1298 init_bb_predicate (ifc_bbs[i]);
1300 for (i = 0; i < loop->num_nodes; i++)
1302 basic_block bb = ifc_bbs[i];
1303 tree cond;
1305 /* The loop latch and loop exit block are always executed and
1306 have no extra conditions to be processed: skip them. */
1307 if (bb == loop->latch
1308 || bb_with_exit_edge_p (loop, bb))
1310 reset_bb_predicate (bb);
1311 continue;
1314 cond = bb_predicate (bb);
1315 if (gcond *stmt = safe_dyn_cast <gcond *> (*gsi_last_bb (bb)))
1317 tree c2;
1318 edge true_edge, false_edge;
1319 location_t loc = gimple_location (stmt);
1320 tree c;
1321 /* gcc.dg/fold-bopcond-1.c shows that despite all forwprop passes
1322 conditions can remain unfolded because of multiple uses so
1323 try to re-fold here, especially to get precision changing
1324 conversions sorted out. Do not simply fold the stmt since
1325 this is analysis only. When conditions were embedded in
1326 COND_EXPRs those were folded separately before folding the
1327 COND_EXPR but as they are now outside we have to make sure
1328 to fold them. Do it here - another opportunity would be to
1329 fold predicates as they are inserted. */
1330 gimple_match_op cexpr (gimple_match_cond::UNCOND,
1331 gimple_cond_code (stmt),
1332 boolean_type_node,
1333 gimple_cond_lhs (stmt),
1334 gimple_cond_rhs (stmt));
1335 if (cexpr.resimplify (NULL, follow_all_ssa_edges)
1336 && cexpr.code.is_tree_code ()
1337 && TREE_CODE_CLASS ((tree_code)cexpr.code) == tcc_comparison)
1338 c = build2_loc (loc, (tree_code)cexpr.code, boolean_type_node,
1339 cexpr.ops[0], cexpr.ops[1]);
1340 else
1341 c = build2_loc (loc, gimple_cond_code (stmt),
1342 boolean_type_node,
1343 gimple_cond_lhs (stmt),
1344 gimple_cond_rhs (stmt));
1346 /* Add new condition into destination's predicate list. */
1347 extract_true_false_edges_from_block (gimple_bb (stmt),
1348 &true_edge, &false_edge);
1350 /* If C is true, then TRUE_EDGE is taken. */
1351 add_to_dst_predicate_list (loop, true_edge, unshare_expr (cond),
1352 unshare_expr (c));
1354 /* If C is false, then FALSE_EDGE is taken. */
1355 c2 = build1_loc (loc, TRUTH_NOT_EXPR, boolean_type_node,
1356 unshare_expr (c));
1357 add_to_dst_predicate_list (loop, false_edge,
1358 unshare_expr (cond), c2);
1360 cond = NULL_TREE;
1363 /* If current bb has only one successor, then consider it as an
1364 unconditional goto. */
1365 if (single_succ_p (bb))
1367 basic_block bb_n = single_succ (bb);
1369 /* The successor bb inherits the predicate of its
1370 predecessor. If there is no predicate in the predecessor
1371 bb, then consider the successor bb as always executed. */
1372 if (cond == NULL_TREE)
1373 cond = boolean_true_node;
1375 add_to_predicate_list (loop, bb_n, cond);
1379 /* The loop header is always executed. */
1380 reset_bb_predicate (loop->header);
1381 gcc_assert (bb_predicate_gimplified_stmts (loop->header) == NULL
1382 && bb_predicate_gimplified_stmts (loop->latch) == NULL);
1385 /* Build region by adding loop pre-header and post-header blocks. */
1387 static vec<basic_block>
1388 build_region (class loop *loop)
1390 vec<basic_block> region = vNULL;
1391 basic_block exit_bb = NULL;
1393 gcc_assert (ifc_bbs);
1394 /* The first element is loop pre-header. */
1395 region.safe_push (loop_preheader_edge (loop)->src);
1397 for (unsigned int i = 0; i < loop->num_nodes; i++)
1399 basic_block bb = ifc_bbs[i];
1400 region.safe_push (bb);
1401 /* Find loop postheader. */
1402 edge e;
1403 edge_iterator ei;
1404 FOR_EACH_EDGE (e, ei, bb->succs)
1405 if (loop_exit_edge_p (loop, e))
1407 exit_bb = e->dest;
1408 break;
1411 /* The last element is loop post-header. */
1412 gcc_assert (exit_bb);
1413 region.safe_push (exit_bb);
1414 return region;
1417 /* Return true when LOOP is if-convertible. This is a helper function
1418 for if_convertible_loop_p. REFS and DDRS are initialized and freed
1419 in if_convertible_loop_p. */
1421 static bool
1422 if_convertible_loop_p_1 (class loop *loop, vec<data_reference_p> *refs)
1424 unsigned int i;
1425 basic_block exit_bb = NULL;
1426 vec<basic_block> region;
1428 calculate_dominance_info (CDI_DOMINATORS);
1430 for (i = 0; i < loop->num_nodes; i++)
1432 basic_block bb = ifc_bbs[i];
1434 if (!if_convertible_bb_p (loop, bb, exit_bb))
1435 return false;
1437 if (bb_with_exit_edge_p (loop, bb))
1438 exit_bb = bb;
1441 for (i = 0; i < loop->num_nodes; i++)
1443 basic_block bb = ifc_bbs[i];
1444 gimple_stmt_iterator gsi;
1446 bool may_have_nonlocal_labels
1447 = bb_with_exit_edge_p (loop, bb) || bb == loop->latch;
1448 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1449 switch (gimple_code (gsi_stmt (gsi)))
1451 case GIMPLE_LABEL:
1452 if (!may_have_nonlocal_labels)
1454 tree label
1455 = gimple_label_label (as_a <glabel *> (gsi_stmt (gsi)));
1456 if (DECL_NONLOCAL (label) || FORCED_LABEL (label))
1457 return false;
1459 /* Fallthru. */
1460 case GIMPLE_ASSIGN:
1461 case GIMPLE_CALL:
1462 case GIMPLE_DEBUG:
1463 case GIMPLE_COND:
1464 gimple_set_uid (gsi_stmt (gsi), 0);
1465 break;
1466 default:
1467 return false;
1471 data_reference_p dr;
1473 innermost_DR_map
1474 = new hash_map<innermost_loop_behavior_hash, data_reference_p>;
1475 baseref_DR_map = new hash_map<tree_operand_hash, data_reference_p>;
1477 /* Compute post-dominator tree locally. */
1478 region = build_region (loop);
1479 calculate_dominance_info_for_region (CDI_POST_DOMINATORS, region);
1481 predicate_bbs (loop);
1483 /* Free post-dominator tree since it is not used after predication. */
1484 free_dominance_info_for_region (cfun, CDI_POST_DOMINATORS, region);
1485 region.release ();
1487 for (i = 0; refs->iterate (i, &dr); i++)
1489 tree ref = DR_REF (dr);
1491 dr->aux = XNEW (struct ifc_dr);
1492 DR_BASE_W_UNCONDITIONALLY (dr) = false;
1493 DR_RW_UNCONDITIONALLY (dr) = false;
1494 DR_W_UNCONDITIONALLY (dr) = false;
1495 IFC_DR (dr)->rw_predicate = boolean_false_node;
1496 IFC_DR (dr)->w_predicate = boolean_false_node;
1497 IFC_DR (dr)->base_w_predicate = boolean_false_node;
1498 if (gimple_uid (DR_STMT (dr)) == 0)
1499 gimple_set_uid (DR_STMT (dr), i + 1);
1501 /* If DR doesn't have innermost loop behavior or it's a compound
1502 memory reference, we synthesize its innermost loop behavior
1503 for hashing. */
1504 if (TREE_CODE (ref) == COMPONENT_REF
1505 || TREE_CODE (ref) == IMAGPART_EXPR
1506 || TREE_CODE (ref) == REALPART_EXPR
1507 || !(DR_BASE_ADDRESS (dr) || DR_OFFSET (dr)
1508 || DR_INIT (dr) || DR_STEP (dr)))
1510 while (TREE_CODE (ref) == COMPONENT_REF
1511 || TREE_CODE (ref) == IMAGPART_EXPR
1512 || TREE_CODE (ref) == REALPART_EXPR)
1513 ref = TREE_OPERAND (ref, 0);
1515 memset (&DR_INNERMOST (dr), 0, sizeof (DR_INNERMOST (dr)));
1516 DR_BASE_ADDRESS (dr) = ref;
1518 hash_memrefs_baserefs_and_store_DRs_read_written_info (dr);
1521 for (i = 0; i < loop->num_nodes; i++)
1523 basic_block bb = ifc_bbs[i];
1524 gimple_stmt_iterator itr;
1526 /* Check the if-convertibility of statements in predicated BBs. */
1527 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1528 for (itr = gsi_start_bb (bb); !gsi_end_p (itr); gsi_next (&itr))
1529 if (!if_convertible_stmt_p (gsi_stmt (itr), *refs))
1530 return false;
1533 /* Checking PHIs needs to be done after stmts, as the fact whether there
1534 are any masked loads or stores affects the tests. */
1535 for (i = 0; i < loop->num_nodes; i++)
1537 basic_block bb = ifc_bbs[i];
1538 gphi_iterator itr;
1540 for (itr = gsi_start_phis (bb); !gsi_end_p (itr); gsi_next (&itr))
1541 if (!if_convertible_phi_p (loop, bb, itr.phi ()))
1542 return false;
1545 if (dump_file)
1546 fprintf (dump_file, "Applying if-conversion\n");
1548 return true;
1551 /* Return true when LOOP is if-convertible.
1552 LOOP is if-convertible if:
1553 - it is innermost,
1554 - it has two or more basic blocks,
1555 - it has only one exit,
1556 - loop header is not the exit edge,
1557 - if its basic blocks and phi nodes are if convertible. */
1559 static bool
1560 if_convertible_loop_p (class loop *loop, vec<data_reference_p> *refs)
1562 edge e;
1563 edge_iterator ei;
1564 bool res = false;
1566 /* Handle only innermost loop. */
1567 if (!loop || loop->inner)
1569 if (dump_file && (dump_flags & TDF_DETAILS))
1570 fprintf (dump_file, "not innermost loop\n");
1571 return false;
1574 /* If only one block, no need for if-conversion. */
1575 if (loop->num_nodes <= 2)
1577 if (dump_file && (dump_flags & TDF_DETAILS))
1578 fprintf (dump_file, "less than 2 basic blocks\n");
1579 return false;
1582 /* More than one loop exit is too much to handle. */
1583 if (!single_exit (loop))
1585 if (dump_file && (dump_flags & TDF_DETAILS))
1586 fprintf (dump_file, "multiple exits\n");
1587 return false;
1590 /* If one of the loop header's edge is an exit edge then do not
1591 apply if-conversion. */
1592 FOR_EACH_EDGE (e, ei, loop->header->succs)
1593 if (loop_exit_edge_p (loop, e))
1594 return false;
1596 res = if_convertible_loop_p_1 (loop, refs);
1598 delete innermost_DR_map;
1599 innermost_DR_map = NULL;
1601 delete baseref_DR_map;
1602 baseref_DR_map = NULL;
1604 return res;
1607 /* Return reduc_1 if has_nop.
1609 if (...)
1610 tmp1 = (unsigned type) reduc_1;
1611 tmp2 = tmp1 + rhs2;
1612 reduc_3 = (signed type) tmp2. */
1613 static tree
1614 strip_nop_cond_scalar_reduction (bool has_nop, tree op)
1616 if (!has_nop)
1617 return op;
1619 if (TREE_CODE (op) != SSA_NAME)
1620 return NULL_TREE;
1622 gassign *stmt = safe_dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op));
1623 if (!stmt
1624 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt))
1625 || !tree_nop_conversion_p (TREE_TYPE (op), TREE_TYPE
1626 (gimple_assign_rhs1 (stmt))))
1627 return NULL_TREE;
1629 return gimple_assign_rhs1 (stmt);
1632 /* Returns true if def-stmt for phi argument ARG is simple increment/decrement
1633 which is in predicated basic block.
1634 In fact, the following PHI pattern is searching:
1635 loop-header:
1636 reduc_1 = PHI <..., reduc_2>
1638 if (...)
1639 reduc_3 = ...
1640 reduc_2 = PHI <reduc_1, reduc_3>
1642 ARG_0 and ARG_1 are correspondent PHI arguments.
1643 REDUC, OP0 and OP1 contain reduction stmt and its operands.
1644 EXTENDED is true if PHI has > 2 arguments. */
1646 static bool
1647 is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1,
1648 tree *op0, tree *op1, bool extended, bool* has_nop,
1649 gimple **nop_reduc)
1651 tree lhs, r_op1, r_op2, r_nop1, r_nop2;
1652 gimple *stmt;
1653 gimple *header_phi = NULL;
1654 enum tree_code reduction_op;
1655 basic_block bb = gimple_bb (phi);
1656 class loop *loop = bb->loop_father;
1657 edge latch_e = loop_latch_edge (loop);
1658 imm_use_iterator imm_iter;
1659 use_operand_p use_p;
1660 edge e;
1661 edge_iterator ei;
1662 bool result = *has_nop = false;
1663 if (TREE_CODE (arg_0) != SSA_NAME || TREE_CODE (arg_1) != SSA_NAME)
1664 return false;
1666 if (!extended && gimple_code (SSA_NAME_DEF_STMT (arg_0)) == GIMPLE_PHI)
1668 lhs = arg_1;
1669 header_phi = SSA_NAME_DEF_STMT (arg_0);
1670 stmt = SSA_NAME_DEF_STMT (arg_1);
1672 else if (gimple_code (SSA_NAME_DEF_STMT (arg_1)) == GIMPLE_PHI)
1674 lhs = arg_0;
1675 header_phi = SSA_NAME_DEF_STMT (arg_1);
1676 stmt = SSA_NAME_DEF_STMT (arg_0);
1678 else
1679 return false;
1680 if (gimple_bb (header_phi) != loop->header)
1681 return false;
1683 if (PHI_ARG_DEF_FROM_EDGE (header_phi, latch_e) != PHI_RESULT (phi))
1684 return false;
1686 if (gimple_code (stmt) != GIMPLE_ASSIGN
1687 || gimple_has_volatile_ops (stmt))
1688 return false;
1690 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
1691 return false;
1693 if (!is_predicated (gimple_bb (stmt)))
1694 return false;
1696 /* Check that stmt-block is predecessor of phi-block. */
1697 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->succs)
1698 if (e->dest == bb)
1700 result = true;
1701 break;
1703 if (!result)
1704 return false;
1706 if (!has_single_use (lhs))
1707 return false;
1709 reduction_op = gimple_assign_rhs_code (stmt);
1711 /* Catch something like below
1713 loop-header:
1714 reduc_1 = PHI <..., reduc_2>
1716 if (...)
1717 tmp1 = (unsigned type) reduc_1;
1718 tmp2 = tmp1 + rhs2;
1719 reduc_3 = (signed type) tmp2;
1721 reduc_2 = PHI <reduc_1, reduc_3>
1723 and convert to
1725 reduc_2 = PHI <0, reduc_3>
1726 tmp1 = (unsigned type)reduce_1;
1727 ifcvt = cond_expr ? rhs2 : 0
1728 tmp2 = tmp1 +/- ifcvt;
1729 reduce_1 = (signed type)tmp2; */
1731 if (CONVERT_EXPR_CODE_P (reduction_op))
1733 lhs = gimple_assign_rhs1 (stmt);
1734 if (TREE_CODE (lhs) != SSA_NAME
1735 || !has_single_use (lhs))
1736 return false;
1738 *nop_reduc = stmt;
1739 stmt = SSA_NAME_DEF_STMT (lhs);
1740 if (gimple_bb (stmt) != gimple_bb (*nop_reduc)
1741 || !is_gimple_assign (stmt))
1742 return false;
1744 *has_nop = true;
1745 reduction_op = gimple_assign_rhs_code (stmt);
1748 if (reduction_op != PLUS_EXPR
1749 && reduction_op != MINUS_EXPR
1750 && reduction_op != MULT_EXPR
1751 && reduction_op != BIT_IOR_EXPR
1752 && reduction_op != BIT_XOR_EXPR
1753 && reduction_op != BIT_AND_EXPR)
1754 return false;
1755 r_op1 = gimple_assign_rhs1 (stmt);
1756 r_op2 = gimple_assign_rhs2 (stmt);
1758 r_nop1 = strip_nop_cond_scalar_reduction (*has_nop, r_op1);
1759 r_nop2 = strip_nop_cond_scalar_reduction (*has_nop, r_op2);
1761 /* Make R_OP1 to hold reduction variable. */
1762 if (r_nop2 == PHI_RESULT (header_phi)
1763 && commutative_tree_code (reduction_op))
1765 std::swap (r_op1, r_op2);
1766 std::swap (r_nop1, r_nop2);
1768 else if (r_nop1 != PHI_RESULT (header_phi))
1769 return false;
1771 if (*has_nop)
1773 /* Check that R_NOP1 is used in nop_stmt or in PHI only. */
1774 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_nop1)
1776 gimple *use_stmt = USE_STMT (use_p);
1777 if (is_gimple_debug (use_stmt))
1778 continue;
1779 if (use_stmt == SSA_NAME_DEF_STMT (r_op1))
1780 continue;
1781 if (use_stmt != phi)
1782 return false;
1786 /* Check that R_OP1 is used in reduction stmt or in PHI only. */
1787 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_op1)
1789 gimple *use_stmt = USE_STMT (use_p);
1790 if (is_gimple_debug (use_stmt))
1791 continue;
1792 if (use_stmt == stmt)
1793 continue;
1794 if (gimple_code (use_stmt) != GIMPLE_PHI)
1795 return false;
1798 *op0 = r_op1; *op1 = r_op2;
1799 *reduc = stmt;
1800 return true;
1803 /* Converts conditional scalar reduction into unconditional form, e.g.
1804 bb_4
1805 if (_5 != 0) goto bb_5 else goto bb_6
1806 end_bb_4
1807 bb_5
1808 res_6 = res_13 + 1;
1809 end_bb_5
1810 bb_6
1811 # res_2 = PHI <res_13(4), res_6(5)>
1812 end_bb_6
1814 will be converted into sequence
1815 _ifc__1 = _5 != 0 ? 1 : 0;
1816 res_2 = res_13 + _ifc__1;
1817 Argument SWAP tells that arguments of conditional expression should be
1818 swapped.
1819 Returns rhs of resulting PHI assignment. */
1821 static tree
1822 convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi,
1823 tree cond, tree op0, tree op1, bool swap,
1824 bool has_nop, gimple* nop_reduc)
1826 gimple_stmt_iterator stmt_it;
1827 gimple *new_assign;
1828 tree rhs;
1829 tree rhs1 = gimple_assign_rhs1 (reduc);
1830 tree tmp = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, "_ifc_");
1831 tree c;
1832 enum tree_code reduction_op = gimple_assign_rhs_code (reduc);
1833 tree op_nochange = neutral_op_for_reduction (TREE_TYPE (rhs1), reduction_op, NULL);
1834 gimple_seq stmts = NULL;
1836 if (dump_file && (dump_flags & TDF_DETAILS))
1838 fprintf (dump_file, "Found cond scalar reduction.\n");
1839 print_gimple_stmt (dump_file, reduc, 0, TDF_SLIM);
1842 /* Build cond expression using COND and constant operand
1843 of reduction rhs. */
1844 c = fold_build_cond_expr (TREE_TYPE (rhs1),
1845 unshare_expr (cond),
1846 swap ? op_nochange : op1,
1847 swap ? op1 : op_nochange);
1849 /* Create assignment stmt and insert it at GSI. */
1850 new_assign = gimple_build_assign (tmp, c);
1851 gsi_insert_before (gsi, new_assign, GSI_SAME_STMT);
1852 /* Build rhs for unconditional increment/decrement/logic_operation. */
1853 rhs = gimple_build (&stmts, reduction_op,
1854 TREE_TYPE (rhs1), op0, tmp);
1856 if (has_nop)
1858 rhs = gimple_convert (&stmts,
1859 TREE_TYPE (gimple_assign_lhs (nop_reduc)), rhs);
1860 stmt_it = gsi_for_stmt (nop_reduc);
1861 gsi_remove (&stmt_it, true);
1862 release_defs (nop_reduc);
1864 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
1866 /* Delete original reduction stmt. */
1867 stmt_it = gsi_for_stmt (reduc);
1868 gsi_remove (&stmt_it, true);
1869 release_defs (reduc);
1870 return rhs;
1873 /* Produce condition for all occurrences of ARG in PHI node. Set *INVERT
1874 as to whether the condition is inverted. */
1876 static tree
1877 gen_phi_arg_condition (gphi *phi, vec<int> *occur,
1878 gimple_stmt_iterator *gsi, bool *invert)
1880 int len;
1881 int i;
1882 tree cond = NULL_TREE;
1883 tree c;
1884 edge e;
1886 *invert = false;
1887 len = occur->length ();
1888 gcc_assert (len > 0);
1889 for (i = 0; i < len; i++)
1891 e = gimple_phi_arg_edge (phi, (*occur)[i]);
1892 c = bb_predicate (e->src);
1893 if (is_true_predicate (c))
1895 cond = c;
1896 break;
1898 /* If we have just a single inverted predicate, signal that and
1899 instead invert the COND_EXPR arms. */
1900 if (len == 1 && TREE_CODE (c) == TRUTH_NOT_EXPR)
1902 c = TREE_OPERAND (c, 0);
1903 *invert = true;
1905 c = force_gimple_operand_gsi (gsi, unshare_expr (c),
1906 true, NULL_TREE, true, GSI_SAME_STMT);
1907 if (cond != NULL_TREE)
1909 /* Must build OR expression. */
1910 cond = fold_or_predicates (EXPR_LOCATION (c), c, cond);
1911 cond = force_gimple_operand_gsi (gsi, unshare_expr (cond), true,
1912 NULL_TREE, true, GSI_SAME_STMT);
1914 else
1915 cond = c;
1917 gcc_assert (cond != NULL_TREE);
1918 return cond;
1921 /* Replace a scalar PHI node with a COND_EXPR using COND as condition.
1922 This routine can handle PHI nodes with more than two arguments.
1924 For example,
1925 S1: A = PHI <x1(1), x2(5)>
1926 is converted into,
1927 S2: A = cond ? x1 : x2;
1929 The generated code is inserted at GSI that points to the top of
1930 basic block's statement list.
1931 If PHI node has more than two arguments a chain of conditional
1932 expression is produced. */
1935 static void
1936 predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
1938 gimple *new_stmt = NULL, *reduc, *nop_reduc;
1939 tree rhs, res, arg0, arg1, op0, op1, scev;
1940 tree cond;
1941 unsigned int index0;
1942 unsigned int max, args_len;
1943 edge e;
1944 basic_block bb;
1945 unsigned int i;
1946 bool has_nop;
1948 res = gimple_phi_result (phi);
1949 if (virtual_operand_p (res))
1950 return;
1952 if ((rhs = degenerate_phi_result (phi))
1953 || ((scev = analyze_scalar_evolution (gimple_bb (phi)->loop_father,
1954 res))
1955 && !chrec_contains_undetermined (scev)
1956 && scev != res
1957 && (rhs = gimple_phi_arg_def (phi, 0))))
1959 if (dump_file && (dump_flags & TDF_DETAILS))
1961 fprintf (dump_file, "Degenerate phi!\n");
1962 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
1964 new_stmt = gimple_build_assign (res, rhs);
1965 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1966 update_stmt (new_stmt);
1967 return;
1970 bb = gimple_bb (phi);
1971 if (EDGE_COUNT (bb->preds) == 2)
1973 /* Predicate ordinary PHI node with 2 arguments. */
1974 edge first_edge, second_edge;
1975 basic_block true_bb;
1976 first_edge = EDGE_PRED (bb, 0);
1977 second_edge = EDGE_PRED (bb, 1);
1978 cond = bb_predicate (first_edge->src);
1979 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1980 std::swap (first_edge, second_edge);
1981 if (EDGE_COUNT (first_edge->src->succs) > 1)
1983 cond = bb_predicate (second_edge->src);
1984 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1985 cond = TREE_OPERAND (cond, 0);
1986 else
1987 first_edge = second_edge;
1989 else
1990 cond = bb_predicate (first_edge->src);
1991 /* Gimplify the condition to a valid cond-expr conditonal operand. */
1992 cond = force_gimple_operand_gsi (gsi, unshare_expr (cond), true,
1993 NULL_TREE, true, GSI_SAME_STMT);
1994 true_bb = first_edge->src;
1995 if (EDGE_PRED (bb, 1)->src == true_bb)
1997 arg0 = gimple_phi_arg_def (phi, 1);
1998 arg1 = gimple_phi_arg_def (phi, 0);
2000 else
2002 arg0 = gimple_phi_arg_def (phi, 0);
2003 arg1 = gimple_phi_arg_def (phi, 1);
2005 if (is_cond_scalar_reduction (phi, &reduc, arg0, arg1,
2006 &op0, &op1, false, &has_nop,
2007 &nop_reduc))
2009 /* Convert reduction stmt into vectorizable form. */
2010 rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
2011 true_bb != gimple_bb (reduc),
2012 has_nop, nop_reduc);
2013 redundant_ssa_names.safe_push (std::make_pair (res, rhs));
2015 else
2016 /* Build new RHS using selected condition and arguments. */
2017 rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
2018 arg0, arg1);
2019 new_stmt = gimple_build_assign (res, rhs);
2020 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2021 gimple_stmt_iterator new_gsi = gsi_for_stmt (new_stmt);
2022 if (fold_stmt (&new_gsi, follow_all_ssa_edges))
2024 new_stmt = gsi_stmt (new_gsi);
2025 update_stmt (new_stmt);
2028 if (dump_file && (dump_flags & TDF_DETAILS))
2030 fprintf (dump_file, "new phi replacement stmt\n");
2031 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
2033 return;
2036 /* Create hashmap for PHI node which contain vector of argument indexes
2037 having the same value. */
2038 bool swap = false;
2039 hash_map<tree_operand_hash, auto_vec<int> > phi_arg_map;
2040 unsigned int num_args = gimple_phi_num_args (phi);
2041 int max_ind = -1;
2042 /* Vector of different PHI argument values. */
2043 auto_vec<tree> args (num_args);
2045 /* Compute phi_arg_map. */
2046 for (i = 0; i < num_args; i++)
2048 tree arg;
2050 arg = gimple_phi_arg_def (phi, i);
2051 if (!phi_arg_map.get (arg))
2052 args.quick_push (arg);
2053 phi_arg_map.get_or_insert (arg).safe_push (i);
2056 /* Determine element with max number of occurrences. */
2057 max_ind = -1;
2058 max = 1;
2059 args_len = args.length ();
2060 for (i = 0; i < args_len; i++)
2062 unsigned int len;
2063 if ((len = phi_arg_map.get (args[i])->length ()) > max)
2065 max_ind = (int) i;
2066 max = len;
2070 /* Put element with max number of occurences to the end of ARGS. */
2071 if (max_ind != -1 && max_ind + 1 != (int) args_len)
2072 std::swap (args[args_len - 1], args[max_ind]);
2074 /* Handle one special case when number of arguments with different values
2075 is equal 2 and one argument has the only occurrence. Such PHI can be
2076 handled as if would have only 2 arguments. */
2077 if (args_len == 2 && phi_arg_map.get (args[0])->length () == 1)
2079 vec<int> *indexes;
2080 indexes = phi_arg_map.get (args[0]);
2081 index0 = (*indexes)[0];
2082 arg0 = args[0];
2083 arg1 = args[1];
2084 e = gimple_phi_arg_edge (phi, index0);
2085 cond = bb_predicate (e->src);
2086 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2088 swap = true;
2089 cond = TREE_OPERAND (cond, 0);
2091 /* Gimplify the condition to a valid cond-expr conditonal operand. */
2092 cond = force_gimple_operand_gsi (gsi, unshare_expr (cond), true,
2093 NULL_TREE, true, GSI_SAME_STMT);
2094 if (!(is_cond_scalar_reduction (phi, &reduc, arg0 , arg1,
2095 &op0, &op1, true, &has_nop, &nop_reduc)))
2096 rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
2097 swap? arg1 : arg0,
2098 swap? arg0 : arg1);
2099 else
2101 /* Convert reduction stmt into vectorizable form. */
2102 rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
2103 swap,has_nop, nop_reduc);
2104 redundant_ssa_names.safe_push (std::make_pair (res, rhs));
2106 new_stmt = gimple_build_assign (res, rhs);
2107 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2108 update_stmt (new_stmt);
2110 else
2112 /* Common case. */
2113 vec<int> *indexes;
2114 tree type = TREE_TYPE (gimple_phi_result (phi));
2115 tree lhs;
2116 arg1 = args[args_len - 1];
2117 for (i = args_len - 1; i > 0; i--)
2119 arg0 = args[i - 1];
2120 indexes = phi_arg_map.get (args[i - 1]);
2121 if (i != 1)
2122 lhs = make_temp_ssa_name (type, NULL, "_ifc_");
2123 else
2124 lhs = res;
2125 bool invert;
2126 cond = gen_phi_arg_condition (phi, indexes, gsi, &invert);
2127 if (invert)
2128 rhs = fold_build_cond_expr (type, unshare_expr (cond),
2129 arg1, arg0);
2130 else
2131 rhs = fold_build_cond_expr (type, unshare_expr (cond),
2132 arg0, arg1);
2133 new_stmt = gimple_build_assign (lhs, rhs);
2134 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2135 update_stmt (new_stmt);
2136 arg1 = lhs;
2140 if (dump_file && (dump_flags & TDF_DETAILS))
2142 fprintf (dump_file, "new extended phi replacement stmt\n");
2143 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
2147 /* Replaces in LOOP all the scalar phi nodes other than those in the
2148 LOOP->header block with conditional modify expressions. */
2150 static void
2151 predicate_all_scalar_phis (class loop *loop)
2153 basic_block bb;
2154 unsigned int orig_loop_num_nodes = loop->num_nodes;
2155 unsigned int i;
2157 for (i = 1; i < orig_loop_num_nodes; i++)
2159 gphi *phi;
2160 gimple_stmt_iterator gsi;
2161 gphi_iterator phi_gsi;
2162 bb = ifc_bbs[i];
2164 if (bb == loop->header)
2165 continue;
2167 phi_gsi = gsi_start_phis (bb);
2168 if (gsi_end_p (phi_gsi))
2169 continue;
2171 gsi = gsi_after_labels (bb);
2172 while (!gsi_end_p (phi_gsi))
2174 phi = phi_gsi.phi ();
2175 if (virtual_operand_p (gimple_phi_result (phi)))
2176 gsi_next (&phi_gsi);
2177 else
2179 predicate_scalar_phi (phi, &gsi);
2180 remove_phi_node (&phi_gsi, false);
2186 /* Insert in each basic block of LOOP the statements produced by the
2187 gimplification of the predicates. */
2189 static void
2190 insert_gimplified_predicates (loop_p loop)
2192 unsigned int i;
2194 for (i = 0; i < loop->num_nodes; i++)
2196 basic_block bb = ifc_bbs[i];
2197 gimple_seq stmts;
2198 if (!is_predicated (bb))
2199 gcc_assert (bb_predicate_gimplified_stmts (bb) == NULL);
2200 if (!is_predicated (bb))
2202 /* Do not insert statements for a basic block that is not
2203 predicated. Also make sure that the predicate of the
2204 basic block is set to true. */
2205 reset_bb_predicate (bb);
2206 continue;
2209 stmts = bb_predicate_gimplified_stmts (bb);
2210 if (stmts)
2212 if (need_to_predicate)
2214 /* Insert the predicate of the BB just after the label,
2215 as the if-conversion of memory writes will use this
2216 predicate. */
2217 gimple_stmt_iterator gsi = gsi_after_labels (bb);
2218 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2220 else
2222 /* Insert the predicate of the BB at the end of the BB
2223 as this would reduce the register pressure: the only
2224 use of this predicate will be in successor BBs. */
2225 gimple_stmt_iterator gsi = gsi_last_bb (bb);
2227 if (gsi_end_p (gsi)
2228 || stmt_ends_bb_p (gsi_stmt (gsi)))
2229 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2230 else
2231 gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
2234 /* Once the sequence is code generated, set it to NULL. */
2235 set_bb_predicate_gimplified_stmts (bb, NULL);
2240 /* Helper function for predicate_statements. Returns index of existent
2241 mask if it was created for given SIZE and -1 otherwise. */
2243 static int
2244 mask_exists (int size, const vec<int> &vec)
2246 unsigned int ix;
2247 int v;
2248 FOR_EACH_VEC_ELT (vec, ix, v)
2249 if (v == size)
2250 return (int) ix;
2251 return -1;
2254 /* Helper function for predicate_statements. STMT is a memory read or
2255 write and it needs to be predicated by MASK. Return a statement
2256 that does so. */
2258 static gimple *
2259 predicate_load_or_store (gimple_stmt_iterator *gsi, gassign *stmt, tree mask)
2261 gcall *new_stmt;
2263 tree lhs = gimple_assign_lhs (stmt);
2264 tree rhs = gimple_assign_rhs1 (stmt);
2265 tree ref = TREE_CODE (lhs) == SSA_NAME ? rhs : lhs;
2266 mark_addressable (ref);
2267 tree addr = force_gimple_operand_gsi (gsi, build_fold_addr_expr (ref),
2268 true, NULL_TREE, true, GSI_SAME_STMT);
2269 tree ptr = build_int_cst (reference_alias_ptr_type (ref),
2270 get_object_alignment (ref));
2271 /* Copy points-to info if possible. */
2272 if (TREE_CODE (addr) == SSA_NAME && !SSA_NAME_PTR_INFO (addr))
2273 copy_ref_info (build2 (MEM_REF, TREE_TYPE (ref), addr, ptr),
2274 ref);
2275 if (TREE_CODE (lhs) == SSA_NAME)
2277 new_stmt
2278 = gimple_build_call_internal (IFN_MASK_LOAD, 3, addr,
2279 ptr, mask);
2280 gimple_call_set_lhs (new_stmt, lhs);
2281 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
2283 else
2285 new_stmt
2286 = gimple_build_call_internal (IFN_MASK_STORE, 4, addr, ptr,
2287 mask, rhs);
2288 gimple_move_vops (new_stmt, stmt);
2290 gimple_call_set_nothrow (new_stmt, true);
2291 return new_stmt;
2294 /* STMT uses OP_LHS. Check whether it is equivalent to:
2296 ... = OP_MASK ? OP_LHS : X;
2298 Return X if so, otherwise return null. OP_MASK is an SSA_NAME that is
2299 known to have value OP_COND. */
2301 static tree
2302 check_redundant_cond_expr (gimple *stmt, tree op_mask, tree op_cond,
2303 tree op_lhs)
2305 gassign *assign = dyn_cast <gassign *> (stmt);
2306 if (!assign || gimple_assign_rhs_code (assign) != COND_EXPR)
2307 return NULL_TREE;
2309 tree use_cond = gimple_assign_rhs1 (assign);
2310 tree if_true = gimple_assign_rhs2 (assign);
2311 tree if_false = gimple_assign_rhs3 (assign);
2313 if ((use_cond == op_mask || operand_equal_p (use_cond, op_cond, 0))
2314 && if_true == op_lhs)
2315 return if_false;
2317 if (inverse_conditions_p (use_cond, op_cond) && if_false == op_lhs)
2318 return if_true;
2320 return NULL_TREE;
2323 /* Return true if VALUE is available for use at STMT. SSA_NAMES is
2324 the set of SSA names defined earlier in STMT's block. */
2326 static bool
2327 value_available_p (gimple *stmt, hash_set<tree_ssa_name_hash> *ssa_names,
2328 tree value)
2330 if (is_gimple_min_invariant (value))
2331 return true;
2333 if (TREE_CODE (value) == SSA_NAME)
2335 if (SSA_NAME_IS_DEFAULT_DEF (value))
2336 return true;
2338 basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (value));
2339 basic_block use_bb = gimple_bb (stmt);
2340 return (def_bb == use_bb
2341 ? ssa_names->contains (value)
2342 : dominated_by_p (CDI_DOMINATORS, use_bb, def_bb));
2345 return false;
2348 /* Helper function for predicate_statements. STMT is a potentially-trapping
2349 arithmetic operation that needs to be predicated by MASK, an SSA_NAME that
2350 has value COND. Return a statement that does so. SSA_NAMES is the set of
2351 SSA names defined earlier in STMT's block. */
2353 static gimple *
2354 predicate_rhs_code (gassign *stmt, tree mask, tree cond,
2355 hash_set<tree_ssa_name_hash> *ssa_names)
2357 tree lhs = gimple_assign_lhs (stmt);
2358 tree_code code = gimple_assign_rhs_code (stmt);
2359 unsigned int nops = gimple_num_ops (stmt);
2360 internal_fn cond_fn = get_conditional_internal_fn (code);
2362 /* Construct the arguments to the conditional internal function. */
2363 auto_vec<tree, 8> args;
2364 args.safe_grow (nops + 1, true);
2365 args[0] = mask;
2366 for (unsigned int i = 1; i < nops; ++i)
2367 args[i] = gimple_op (stmt, i);
2368 args[nops] = NULL_TREE;
2370 /* Look for uses of the result to see whether they are COND_EXPRs that can
2371 be folded into the conditional call. */
2372 imm_use_iterator imm_iter;
2373 gimple *use_stmt;
2374 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs)
2376 tree new_else = check_redundant_cond_expr (use_stmt, mask, cond, lhs);
2377 if (new_else && value_available_p (stmt, ssa_names, new_else))
2379 if (!args[nops])
2380 args[nops] = new_else;
2381 if (operand_equal_p (new_else, args[nops], 0))
2383 /* We have:
2385 LHS = IFN_COND (MASK, ..., ELSE);
2386 X = MASK ? LHS : ELSE;
2388 which makes X equivalent to LHS. */
2389 tree use_lhs = gimple_assign_lhs (use_stmt);
2390 redundant_ssa_names.safe_push (std::make_pair (use_lhs, lhs));
2394 if (!args[nops])
2395 args[nops] = targetm.preferred_else_value (cond_fn, TREE_TYPE (lhs),
2396 nops - 1, &args[1]);
2398 /* Create and insert the call. */
2399 gcall *new_stmt = gimple_build_call_internal_vec (cond_fn, args);
2400 gimple_call_set_lhs (new_stmt, lhs);
2401 gimple_call_set_nothrow (new_stmt, true);
2403 return new_stmt;
2406 /* Predicate each write to memory in LOOP.
2408 This function transforms control flow constructs containing memory
2409 writes of the form:
2411 | for (i = 0; i < N; i++)
2412 | if (cond)
2413 | A[i] = expr;
2415 into the following form that does not contain control flow:
2417 | for (i = 0; i < N; i++)
2418 | A[i] = cond ? expr : A[i];
2420 The original CFG looks like this:
2422 | bb_0
2423 | i = 0
2424 | end_bb_0
2426 | bb_1
2427 | if (i < N) goto bb_5 else goto bb_2
2428 | end_bb_1
2430 | bb_2
2431 | cond = some_computation;
2432 | if (cond) goto bb_3 else goto bb_4
2433 | end_bb_2
2435 | bb_3
2436 | A[i] = expr;
2437 | goto bb_4
2438 | end_bb_3
2440 | bb_4
2441 | goto bb_1
2442 | end_bb_4
2444 insert_gimplified_predicates inserts the computation of the COND
2445 expression at the beginning of the destination basic block:
2447 | bb_0
2448 | i = 0
2449 | end_bb_0
2451 | bb_1
2452 | if (i < N) goto bb_5 else goto bb_2
2453 | end_bb_1
2455 | bb_2
2456 | cond = some_computation;
2457 | if (cond) goto bb_3 else goto bb_4
2458 | end_bb_2
2460 | bb_3
2461 | cond = some_computation;
2462 | A[i] = expr;
2463 | goto bb_4
2464 | end_bb_3
2466 | bb_4
2467 | goto bb_1
2468 | end_bb_4
2470 predicate_statements is then predicating the memory write as follows:
2472 | bb_0
2473 | i = 0
2474 | end_bb_0
2476 | bb_1
2477 | if (i < N) goto bb_5 else goto bb_2
2478 | end_bb_1
2480 | bb_2
2481 | if (cond) goto bb_3 else goto bb_4
2482 | end_bb_2
2484 | bb_3
2485 | cond = some_computation;
2486 | A[i] = cond ? expr : A[i];
2487 | goto bb_4
2488 | end_bb_3
2490 | bb_4
2491 | goto bb_1
2492 | end_bb_4
2494 and finally combine_blocks removes the basic block boundaries making
2495 the loop vectorizable:
2497 | bb_0
2498 | i = 0
2499 | if (i < N) goto bb_5 else goto bb_1
2500 | end_bb_0
2502 | bb_1
2503 | cond = some_computation;
2504 | A[i] = cond ? expr : A[i];
2505 | if (i < N) goto bb_5 else goto bb_4
2506 | end_bb_1
2508 | bb_4
2509 | goto bb_1
2510 | end_bb_4
2513 static void
2514 predicate_statements (loop_p loop)
2516 unsigned int i, orig_loop_num_nodes = loop->num_nodes;
2517 auto_vec<int, 1> vect_sizes;
2518 auto_vec<tree, 1> vect_masks;
2519 hash_set<tree_ssa_name_hash> ssa_names;
2521 for (i = 1; i < orig_loop_num_nodes; i++)
2523 gimple_stmt_iterator gsi;
2524 basic_block bb = ifc_bbs[i];
2525 tree cond = bb_predicate (bb);
2526 bool swap;
2527 int index;
2529 if (is_true_predicate (cond))
2530 continue;
2532 swap = false;
2533 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2535 swap = true;
2536 cond = TREE_OPERAND (cond, 0);
2539 vect_sizes.truncate (0);
2540 vect_masks.truncate (0);
2542 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
2544 gassign *stmt = dyn_cast <gassign *> (gsi_stmt (gsi));
2545 tree lhs;
2546 if (!stmt)
2548 else if (is_false_predicate (cond)
2549 && gimple_vdef (stmt))
2551 unlink_stmt_vdef (stmt);
2552 gsi_remove (&gsi, true);
2553 release_defs (stmt);
2554 continue;
2556 else if (gimple_plf (stmt, GF_PLF_2)
2557 && is_gimple_assign (stmt))
2559 tree lhs = gimple_assign_lhs (stmt);
2560 tree mask;
2561 gimple *new_stmt;
2562 gimple_seq stmts = NULL;
2563 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
2564 /* We checked before setting GF_PLF_2 that an equivalent
2565 integer mode exists. */
2566 int bitsize = GET_MODE_BITSIZE (mode).to_constant ();
2567 if (!vect_sizes.is_empty ()
2568 && (index = mask_exists (bitsize, vect_sizes)) != -1)
2569 /* Use created mask. */
2570 mask = vect_masks[index];
2571 else
2573 if (COMPARISON_CLASS_P (cond))
2574 mask = gimple_build (&stmts, TREE_CODE (cond),
2575 boolean_type_node,
2576 TREE_OPERAND (cond, 0),
2577 TREE_OPERAND (cond, 1));
2578 else
2579 mask = cond;
2581 if (swap)
2583 tree true_val
2584 = constant_boolean_node (true, TREE_TYPE (mask));
2585 mask = gimple_build (&stmts, BIT_XOR_EXPR,
2586 TREE_TYPE (mask), mask, true_val);
2588 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2590 /* Save mask and its size for further use. */
2591 vect_sizes.safe_push (bitsize);
2592 vect_masks.safe_push (mask);
2594 if (gimple_assign_single_p (stmt))
2595 new_stmt = predicate_load_or_store (&gsi, stmt, mask);
2596 else
2597 new_stmt = predicate_rhs_code (stmt, mask, cond, &ssa_names);
2599 gsi_replace (&gsi, new_stmt, true);
2601 else if (((lhs = gimple_assign_lhs (stmt)), true)
2602 && (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2603 || POINTER_TYPE_P (TREE_TYPE (lhs)))
2604 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs))
2605 && arith_code_with_undefined_signed_overflow
2606 (gimple_assign_rhs_code (stmt)))
2608 gsi_remove (&gsi, true);
2609 gimple_seq stmts = rewrite_to_defined_overflow (stmt);
2610 bool first = true;
2611 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
2612 !gsi_end_p (gsi2);)
2614 gassign *stmt2 = as_a <gassign *> (gsi_stmt (gsi2));
2615 gsi_remove (&gsi2, false);
2616 if (first)
2618 gsi_insert_before (&gsi, stmt2, GSI_NEW_STMT);
2619 first = false;
2621 else
2622 gsi_insert_after (&gsi, stmt2, GSI_NEW_STMT);
2625 else if (gimple_vdef (stmt))
2627 tree lhs = gimple_assign_lhs (stmt);
2628 tree rhs = gimple_assign_rhs1 (stmt);
2629 tree type = TREE_TYPE (lhs);
2631 lhs = ifc_temp_var (type, unshare_expr (lhs), &gsi);
2632 rhs = ifc_temp_var (type, unshare_expr (rhs), &gsi);
2633 if (swap)
2634 std::swap (lhs, rhs);
2635 cond = force_gimple_operand_gsi (&gsi, unshare_expr (cond), true,
2636 NULL_TREE, true, GSI_SAME_STMT);
2637 rhs = fold_build_cond_expr (type, unshare_expr (cond), rhs, lhs);
2638 gimple_assign_set_rhs1 (stmt, ifc_temp_var (type, rhs, &gsi));
2639 update_stmt (stmt);
2642 if (gimple_plf (gsi_stmt (gsi), GF_PLF_2)
2643 && is_gimple_call (gsi_stmt (gsi)))
2645 /* Convert functions that have a SIMD clone to IFN_MASK_CALL.
2646 This will cause the vectorizer to match the "in branch"
2647 clone variants, and serves to build the mask vector
2648 in a natural way. */
2649 gcall *call = dyn_cast <gcall *> (gsi_stmt (gsi));
2650 tree orig_fn = gimple_call_fn (call);
2651 int orig_nargs = gimple_call_num_args (call);
2652 auto_vec<tree> args;
2653 args.safe_push (orig_fn);
2654 for (int i = 0; i < orig_nargs; i++)
2655 args.safe_push (gimple_call_arg (call, i));
2656 args.safe_push (cond);
2658 /* Replace the call with a IFN_MASK_CALL that has the extra
2659 condition parameter. */
2660 gcall *new_call = gimple_build_call_internal_vec (IFN_MASK_CALL,
2661 args);
2662 gimple_call_set_lhs (new_call, gimple_call_lhs (call));
2663 gsi_replace (&gsi, new_call, true);
2666 lhs = gimple_get_lhs (gsi_stmt (gsi));
2667 if (lhs && TREE_CODE (lhs) == SSA_NAME)
2668 ssa_names.add (lhs);
2669 gsi_next (&gsi);
2671 ssa_names.empty ();
2675 /* Remove all GIMPLE_CONDs and GIMPLE_LABELs of all the basic blocks
2676 other than the exit and latch of the LOOP. Also resets the
2677 GIMPLE_DEBUG information. */
2679 static void
2680 remove_conditions_and_labels (loop_p loop)
2682 gimple_stmt_iterator gsi;
2683 unsigned int i;
2685 for (i = 0; i < loop->num_nodes; i++)
2687 basic_block bb = ifc_bbs[i];
2689 if (bb_with_exit_edge_p (loop, bb)
2690 || bb == loop->latch)
2691 continue;
2693 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
2694 switch (gimple_code (gsi_stmt (gsi)))
2696 case GIMPLE_COND:
2697 case GIMPLE_LABEL:
2698 gsi_remove (&gsi, true);
2699 break;
2701 case GIMPLE_DEBUG:
2702 /* ??? Should there be conditional GIMPLE_DEBUG_BINDs? */
2703 if (gimple_debug_bind_p (gsi_stmt (gsi)))
2705 gimple_debug_bind_reset_value (gsi_stmt (gsi));
2706 update_stmt (gsi_stmt (gsi));
2708 gsi_next (&gsi);
2709 break;
2711 default:
2712 gsi_next (&gsi);
2717 /* Combine all the basic blocks from LOOP into one or two super basic
2718 blocks. Replace PHI nodes with conditional modify expressions. */
2720 static void
2721 combine_blocks (class loop *loop)
2723 basic_block bb, exit_bb, merge_target_bb;
2724 unsigned int orig_loop_num_nodes = loop->num_nodes;
2725 unsigned int i;
2726 edge e;
2727 edge_iterator ei;
2729 remove_conditions_and_labels (loop);
2730 insert_gimplified_predicates (loop);
2731 predicate_all_scalar_phis (loop);
2733 if (need_to_predicate || need_to_rewrite_undefined)
2734 predicate_statements (loop);
2736 /* Merge basic blocks. */
2737 exit_bb = NULL;
2738 bool *predicated = XNEWVEC (bool, orig_loop_num_nodes);
2739 for (i = 0; i < orig_loop_num_nodes; i++)
2741 bb = ifc_bbs[i];
2742 predicated[i] = !is_true_predicate (bb_predicate (bb));
2743 free_bb_predicate (bb);
2744 if (bb_with_exit_edge_p (loop, bb))
2746 gcc_assert (exit_bb == NULL);
2747 exit_bb = bb;
2750 gcc_assert (exit_bb != loop->latch);
2752 merge_target_bb = loop->header;
2754 /* Get at the virtual def valid for uses starting at the first block
2755 we merge into the header. Without a virtual PHI the loop has the
2756 same virtual use on all stmts. */
2757 gphi *vphi = get_virtual_phi (loop->header);
2758 tree last_vdef = NULL_TREE;
2759 if (vphi)
2761 last_vdef = gimple_phi_result (vphi);
2762 for (gimple_stmt_iterator gsi = gsi_start_bb (loop->header);
2763 ! gsi_end_p (gsi); gsi_next (&gsi))
2764 if (gimple_vdef (gsi_stmt (gsi)))
2765 last_vdef = gimple_vdef (gsi_stmt (gsi));
2767 for (i = 1; i < orig_loop_num_nodes; i++)
2769 gimple_stmt_iterator gsi;
2770 gimple_stmt_iterator last;
2772 bb = ifc_bbs[i];
2774 if (bb == exit_bb || bb == loop->latch)
2775 continue;
2777 /* We release virtual PHIs late because we have to propagate them
2778 out using the current VUSE. The def might be the one used
2779 after the loop. */
2780 vphi = get_virtual_phi (bb);
2781 if (vphi)
2783 /* When there's just loads inside the loop a stray virtual
2784 PHI merging the uses can appear, update last_vdef from
2785 it. */
2786 if (!last_vdef)
2787 last_vdef = gimple_phi_arg_def (vphi, 0);
2788 imm_use_iterator iter;
2789 use_operand_p use_p;
2790 gimple *use_stmt;
2791 FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi))
2793 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2794 SET_USE (use_p, last_vdef);
2796 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (vphi)))
2797 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (last_vdef) = 1;
2798 gsi = gsi_for_stmt (vphi);
2799 remove_phi_node (&gsi, true);
2802 /* Make stmts member of loop->header and clear range info from all stmts
2803 in BB which is now no longer executed conditional on a predicate we
2804 could have derived it from. */
2805 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2807 gimple *stmt = gsi_stmt (gsi);
2808 gimple_set_bb (stmt, merge_target_bb);
2809 /* Update virtual operands. */
2810 if (last_vdef)
2812 use_operand_p use_p = ssa_vuse_operand (stmt);
2813 if (use_p
2814 && USE_FROM_PTR (use_p) != last_vdef)
2815 SET_USE (use_p, last_vdef);
2816 if (gimple_vdef (stmt))
2817 last_vdef = gimple_vdef (stmt);
2819 else
2820 /* If this is the first load we arrive at update last_vdef
2821 so we handle stray PHIs correctly. */
2822 last_vdef = gimple_vuse (stmt);
2823 if (predicated[i])
2825 ssa_op_iter i;
2826 tree op;
2827 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF)
2828 reset_flow_sensitive_info (op);
2832 /* Update stmt list. */
2833 last = gsi_last_bb (merge_target_bb);
2834 gsi_insert_seq_after_without_update (&last, bb_seq (bb), GSI_NEW_STMT);
2835 set_bb_seq (bb, NULL);
2838 /* Fixup virtual operands in the exit block. */
2839 if (exit_bb
2840 && exit_bb != loop->header)
2842 /* We release virtual PHIs late because we have to propagate them
2843 out using the current VUSE. The def might be the one used
2844 after the loop. */
2845 vphi = get_virtual_phi (exit_bb);
2846 if (vphi)
2848 /* When there's just loads inside the loop a stray virtual
2849 PHI merging the uses can appear, update last_vdef from
2850 it. */
2851 if (!last_vdef)
2852 last_vdef = gimple_phi_arg_def (vphi, 0);
2853 imm_use_iterator iter;
2854 use_operand_p use_p;
2855 gimple *use_stmt;
2856 FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi))
2858 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2859 SET_USE (use_p, last_vdef);
2861 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (vphi)))
2862 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (last_vdef) = 1;
2863 gimple_stmt_iterator gsi = gsi_for_stmt (vphi);
2864 remove_phi_node (&gsi, true);
2868 /* Now remove all the edges in the loop, except for those from the exit
2869 block and delete the blocks we elided. */
2870 for (i = 1; i < orig_loop_num_nodes; i++)
2872 bb = ifc_bbs[i];
2874 for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));)
2876 if (e->src == exit_bb)
2877 ei_next (&ei);
2878 else
2879 remove_edge (e);
2882 for (i = 1; i < orig_loop_num_nodes; i++)
2884 bb = ifc_bbs[i];
2886 if (bb == exit_bb || bb == loop->latch)
2887 continue;
2889 delete_basic_block (bb);
2892 /* Re-connect the exit block. */
2893 if (exit_bb != NULL)
2895 if (exit_bb != loop->header)
2897 /* Connect this node to loop header. */
2898 make_single_succ_edge (loop->header, exit_bb, EDGE_FALLTHRU);
2899 set_immediate_dominator (CDI_DOMINATORS, exit_bb, loop->header);
2902 /* Redirect non-exit edges to loop->latch. */
2903 FOR_EACH_EDGE (e, ei, exit_bb->succs)
2905 if (!loop_exit_edge_p (loop, e))
2906 redirect_edge_and_branch (e, loop->latch);
2908 set_immediate_dominator (CDI_DOMINATORS, loop->latch, exit_bb);
2910 else
2912 /* If the loop does not have an exit, reconnect header and latch. */
2913 make_edge (loop->header, loop->latch, EDGE_FALLTHRU);
2914 set_immediate_dominator (CDI_DOMINATORS, loop->latch, loop->header);
2917 /* If possible, merge loop header to the block with the exit edge.
2918 This reduces the number of basic blocks to two, to please the
2919 vectorizer that handles only loops with two nodes. */
2920 if (exit_bb
2921 && exit_bb != loop->header)
2923 if (can_merge_blocks_p (loop->header, exit_bb))
2924 merge_blocks (loop->header, exit_bb);
2927 free (ifc_bbs);
2928 ifc_bbs = NULL;
2929 free (predicated);
2932 /* Version LOOP before if-converting it; the original loop
2933 will be if-converted, the new copy of the loop will not,
2934 and the LOOP_VECTORIZED internal call will be guarding which
2935 loop to execute. The vectorizer pass will fold this
2936 internal call into either true or false.
2938 Note that this function intentionally invalidates profile. Both edges
2939 out of LOOP_VECTORIZED must have 100% probability so the profile remains
2940 consistent after the condition is folded in the vectorizer. */
2942 static class loop *
2943 version_loop_for_if_conversion (class loop *loop, vec<gimple *> *preds)
2945 basic_block cond_bb;
2946 tree cond = make_ssa_name (boolean_type_node);
2947 class loop *new_loop;
2948 gimple *g;
2949 gimple_stmt_iterator gsi;
2950 unsigned int save_length = 0;
2952 g = gimple_build_call_internal (IFN_LOOP_VECTORIZED, 2,
2953 build_int_cst (integer_type_node, loop->num),
2954 integer_zero_node);
2955 gimple_call_set_lhs (g, cond);
2957 void **saved_preds = NULL;
2958 if (any_complicated_phi || need_to_predicate)
2960 /* Save BB->aux around loop_version as that uses the same field. */
2961 save_length = loop->inner ? loop->inner->num_nodes : loop->num_nodes;
2962 saved_preds = XALLOCAVEC (void *, save_length);
2963 for (unsigned i = 0; i < save_length; i++)
2964 saved_preds[i] = ifc_bbs[i]->aux;
2967 initialize_original_copy_tables ();
2968 /* At this point we invalidate porfile confistency until IFN_LOOP_VECTORIZED
2969 is re-merged in the vectorizer. */
2970 new_loop = loop_version (loop, cond, &cond_bb,
2971 profile_probability::always (),
2972 profile_probability::always (),
2973 profile_probability::always (),
2974 profile_probability::always (), true);
2975 free_original_copy_tables ();
2977 if (any_complicated_phi || need_to_predicate)
2978 for (unsigned i = 0; i < save_length; i++)
2979 ifc_bbs[i]->aux = saved_preds[i];
2981 if (new_loop == NULL)
2982 return NULL;
2984 new_loop->dont_vectorize = true;
2985 new_loop->force_vectorize = false;
2986 gsi = gsi_last_bb (cond_bb);
2987 gimple_call_set_arg (g, 1, build_int_cst (integer_type_node, new_loop->num));
2988 if (preds)
2989 preds->safe_push (g);
2990 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2991 update_ssa (TODO_update_ssa_no_phi);
2992 return new_loop;
2995 /* Return true when LOOP satisfies the follow conditions that will
2996 allow it to be recognized by the vectorizer for outer-loop
2997 vectorization:
2998 - The loop is not the root node of the loop tree.
2999 - The loop has exactly one inner loop.
3000 - The loop has a single exit.
3001 - The loop header has a single successor, which is the inner
3002 loop header.
3003 - Each of the inner and outer loop latches have a single
3004 predecessor.
3005 - The loop exit block has a single predecessor, which is the
3006 inner loop's exit block. */
3008 static bool
3009 versionable_outer_loop_p (class loop *loop)
3011 if (!loop_outer (loop)
3012 || loop->dont_vectorize
3013 || !loop->inner
3014 || loop->inner->next
3015 || !single_exit (loop)
3016 || !single_succ_p (loop->header)
3017 || single_succ (loop->header) != loop->inner->header
3018 || !single_pred_p (loop->latch)
3019 || !single_pred_p (loop->inner->latch))
3020 return false;
3022 basic_block outer_exit = single_pred (loop->latch);
3023 basic_block inner_exit = single_pred (loop->inner->latch);
3025 if (!single_pred_p (outer_exit) || single_pred (outer_exit) != inner_exit)
3026 return false;
3028 if (dump_file)
3029 fprintf (dump_file, "Found vectorizable outer loop for versioning\n");
3031 return true;
3034 /* Performs splitting of critical edges. Skip splitting and return false
3035 if LOOP will not be converted because:
3037 - LOOP is not well formed.
3038 - LOOP has PHI with more than MAX_PHI_ARG_NUM arguments.
3040 Last restriction is valid only if AGGRESSIVE_IF_CONV is false. */
3042 static bool
3043 ifcvt_split_critical_edges (class loop *loop, bool aggressive_if_conv)
3045 basic_block *body;
3046 basic_block bb;
3047 unsigned int num = loop->num_nodes;
3048 unsigned int i;
3049 edge e;
3050 edge_iterator ei;
3051 auto_vec<edge> critical_edges;
3053 /* Loop is not well formed. */
3054 if (loop->inner)
3055 return false;
3057 body = get_loop_body (loop);
3058 for (i = 0; i < num; i++)
3060 bb = body[i];
3061 if (!aggressive_if_conv
3062 && phi_nodes (bb)
3063 && EDGE_COUNT (bb->preds) > MAX_PHI_ARG_NUM)
3065 if (dump_file && (dump_flags & TDF_DETAILS))
3066 fprintf (dump_file,
3067 "BB %d has complicated PHI with more than %u args.\n",
3068 bb->index, MAX_PHI_ARG_NUM);
3070 free (body);
3071 return false;
3073 if (bb == loop->latch || bb_with_exit_edge_p (loop, bb))
3074 continue;
3076 /* Skip basic blocks not ending with conditional branch. */
3077 if (!safe_is_a <gcond *> (*gsi_last_bb (bb)))
3078 continue;
3080 FOR_EACH_EDGE (e, ei, bb->succs)
3081 if (EDGE_CRITICAL_P (e) && e->dest->loop_father == loop)
3082 critical_edges.safe_push (e);
3084 free (body);
3086 while (critical_edges.length () > 0)
3088 e = critical_edges.pop ();
3089 /* Don't split if bb can be predicated along non-critical edge. */
3090 if (EDGE_COUNT (e->dest->preds) > 2 || all_preds_critical_p (e->dest))
3091 split_edge (e);
3094 return true;
3097 /* Delete redundant statements produced by predication which prevents
3098 loop vectorization. */
3100 static void
3101 ifcvt_local_dce (class loop *loop)
3103 gimple *stmt;
3104 gimple *stmt1;
3105 gimple *phi;
3106 gimple_stmt_iterator gsi;
3107 auto_vec<gimple *> worklist;
3108 enum gimple_code code;
3109 use_operand_p use_p;
3110 imm_use_iterator imm_iter;
3112 /* The loop has a single BB only. */
3113 basic_block bb = loop->header;
3114 tree latch_vdef = NULL_TREE;
3116 worklist.create (64);
3117 /* Consider all phi as live statements. */
3118 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3120 phi = gsi_stmt (gsi);
3121 gimple_set_plf (phi, GF_PLF_2, true);
3122 worklist.safe_push (phi);
3123 if (virtual_operand_p (gimple_phi_result (phi)))
3124 latch_vdef = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
3126 /* Consider load/store statements, CALL and COND as live. */
3127 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3129 stmt = gsi_stmt (gsi);
3130 if (is_gimple_debug (stmt))
3132 gimple_set_plf (stmt, GF_PLF_2, true);
3133 continue;
3135 if (gimple_store_p (stmt) || gimple_assign_load_p (stmt))
3137 gimple_set_plf (stmt, GF_PLF_2, true);
3138 worklist.safe_push (stmt);
3139 continue;
3141 code = gimple_code (stmt);
3142 if (code == GIMPLE_COND || code == GIMPLE_CALL)
3144 gimple_set_plf (stmt, GF_PLF_2, true);
3145 worklist.safe_push (stmt);
3146 continue;
3148 gimple_set_plf (stmt, GF_PLF_2, false);
3150 if (code == GIMPLE_ASSIGN)
3152 tree lhs = gimple_assign_lhs (stmt);
3153 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
3155 stmt1 = USE_STMT (use_p);
3156 if (!is_gimple_debug (stmt1) && gimple_bb (stmt1) != bb)
3158 gimple_set_plf (stmt, GF_PLF_2, true);
3159 worklist.safe_push (stmt);
3160 break;
3165 /* Propagate liveness through arguments of live stmt. */
3166 while (worklist.length () > 0)
3168 ssa_op_iter iter;
3169 use_operand_p use_p;
3170 tree use;
3172 stmt = worklist.pop ();
3173 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
3175 use = USE_FROM_PTR (use_p);
3176 if (TREE_CODE (use) != SSA_NAME)
3177 continue;
3178 stmt1 = SSA_NAME_DEF_STMT (use);
3179 if (gimple_bb (stmt1) != bb || gimple_plf (stmt1, GF_PLF_2))
3180 continue;
3181 gimple_set_plf (stmt1, GF_PLF_2, true);
3182 worklist.safe_push (stmt1);
3185 /* Delete dead statements. */
3186 gsi = gsi_last_bb (bb);
3187 while (!gsi_end_p (gsi))
3189 gimple_stmt_iterator gsiprev = gsi;
3190 gsi_prev (&gsiprev);
3191 stmt = gsi_stmt (gsi);
3192 if (gimple_store_p (stmt) && gimple_vdef (stmt))
3194 tree lhs = gimple_get_lhs (stmt);
3195 ao_ref write;
3196 ao_ref_init (&write, lhs);
3198 if (dse_classify_store (&write, stmt, false, NULL, NULL, latch_vdef)
3199 == DSE_STORE_DEAD)
3200 delete_dead_or_redundant_assignment (&gsi, "dead");
3201 gsi = gsiprev;
3202 continue;
3205 if (gimple_plf (stmt, GF_PLF_2))
3207 gsi = gsiprev;
3208 continue;
3210 if (dump_file && (dump_flags & TDF_DETAILS))
3212 fprintf (dump_file, "Delete dead stmt in bb#%d\n", bb->index);
3213 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3215 gsi_remove (&gsi, true);
3216 release_defs (stmt);
3217 gsi = gsiprev;
3221 /* Return true if VALUE is already available on edge PE. */
3223 static bool
3224 ifcvt_available_on_edge_p (edge pe, tree value)
3226 if (is_gimple_min_invariant (value))
3227 return true;
3229 if (TREE_CODE (value) == SSA_NAME)
3231 basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (value));
3232 if (!def_bb || dominated_by_p (CDI_DOMINATORS, pe->dest, def_bb))
3233 return true;
3236 return false;
3239 /* Return true if STMT can be hoisted from if-converted loop LOOP to
3240 edge PE. */
3242 static bool
3243 ifcvt_can_hoist (class loop *loop, edge pe, gimple *stmt)
3245 if (auto *call = dyn_cast<gcall *> (stmt))
3247 if (gimple_call_internal_p (call)
3248 && internal_fn_mask_index (gimple_call_internal_fn (call)) >= 0)
3249 return false;
3251 else if (auto *assign = dyn_cast<gassign *> (stmt))
3253 if (gimple_assign_rhs_code (assign) == COND_EXPR)
3254 return false;
3256 else
3257 return false;
3259 if (gimple_has_side_effects (stmt)
3260 || gimple_could_trap_p (stmt)
3261 || stmt_could_throw_p (cfun, stmt)
3262 || gimple_vdef (stmt)
3263 || gimple_vuse (stmt))
3264 return false;
3266 int num_args = gimple_num_args (stmt);
3267 if (pe != loop_preheader_edge (loop))
3269 for (int i = 0; i < num_args; ++i)
3270 if (!ifcvt_available_on_edge_p (pe, gimple_arg (stmt, i)))
3271 return false;
3273 else
3275 for (int i = 0; i < num_args; ++i)
3276 if (!expr_invariant_in_loop_p (loop, gimple_arg (stmt, i)))
3277 return false;
3280 return true;
3283 /* Hoist invariant statements from LOOP to edge PE. */
3285 static void
3286 ifcvt_hoist_invariants (class loop *loop, edge pe)
3288 gimple_stmt_iterator hoist_gsi = {};
3289 unsigned int num_blocks = loop->num_nodes;
3290 basic_block *body = get_loop_body (loop);
3291 for (unsigned int i = 0; i < num_blocks; ++i)
3292 for (gimple_stmt_iterator gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi);)
3294 gimple *stmt = gsi_stmt (gsi);
3295 if (ifcvt_can_hoist (loop, pe, stmt))
3297 /* Once we've hoisted one statement, insert other statements
3298 after it. */
3299 gsi_remove (&gsi, false);
3300 if (hoist_gsi.ptr)
3301 gsi_insert_after (&hoist_gsi, stmt, GSI_NEW_STMT);
3302 else
3304 gsi_insert_on_edge_immediate (pe, stmt);
3305 hoist_gsi = gsi_for_stmt (stmt);
3307 continue;
3309 gsi_next (&gsi);
3311 free (body);
3314 /* Returns the DECL_FIELD_BIT_OFFSET of the bitfield accesse in stmt iff its
3315 type mode is not BLKmode. If BITPOS is not NULL it will hold the poly_int64
3316 value of the DECL_FIELD_BIT_OFFSET of the bitfield access and STRUCT_EXPR,
3317 if not NULL, will hold the tree representing the base struct of this
3318 bitfield. */
3320 static tree
3321 get_bitfield_rep (gassign *stmt, bool write, tree *bitpos,
3322 tree *struct_expr)
3324 tree comp_ref = write ? gimple_assign_lhs (stmt)
3325 : gimple_assign_rhs1 (stmt);
3327 tree field_decl = TREE_OPERAND (comp_ref, 1);
3328 tree rep_decl = DECL_BIT_FIELD_REPRESENTATIVE (field_decl);
3330 /* Bail out if the representative is not a suitable type for a scalar
3331 register variable. */
3332 if (!is_gimple_reg_type (TREE_TYPE (rep_decl)))
3333 return NULL_TREE;
3335 /* Bail out if the DECL_SIZE of the field_decl isn't the same as the BF's
3336 precision. */
3337 unsigned HOST_WIDE_INT bf_prec
3338 = TYPE_PRECISION (TREE_TYPE (gimple_assign_lhs (stmt)));
3339 if (compare_tree_int (DECL_SIZE (field_decl), bf_prec) != 0)
3340 return NULL_TREE;
3342 if (struct_expr)
3343 *struct_expr = TREE_OPERAND (comp_ref, 0);
3345 if (bitpos)
3347 /* To calculate the bitposition of the BITFIELD_REF we have to determine
3348 where our bitfield starts in relation to the container REP_DECL. The
3349 DECL_FIELD_OFFSET of the original bitfield's member FIELD_DECL tells
3350 us how many bytes from the start of the structure there are until the
3351 start of the group of bitfield members the FIELD_DECL belongs to,
3352 whereas DECL_FIELD_BIT_OFFSET will tell us how many bits from that
3353 position our actual bitfield member starts. For the container
3354 REP_DECL adding DECL_FIELD_OFFSET and DECL_FIELD_BIT_OFFSET will tell
3355 us the distance between the start of the structure and the start of
3356 the container, though the first is in bytes and the later other in
3357 bits. With this in mind we calculate the bit position of our new
3358 BITFIELD_REF by subtracting the number of bits between the start of
3359 the structure and the container from the number of bits from the start
3360 of the structure and the actual bitfield member. */
3361 tree bf_pos = fold_build2 (MULT_EXPR, bitsizetype,
3362 DECL_FIELD_OFFSET (field_decl),
3363 build_int_cst (bitsizetype, BITS_PER_UNIT));
3364 bf_pos = fold_build2 (PLUS_EXPR, bitsizetype, bf_pos,
3365 DECL_FIELD_BIT_OFFSET (field_decl));
3366 tree rep_pos = fold_build2 (MULT_EXPR, bitsizetype,
3367 DECL_FIELD_OFFSET (rep_decl),
3368 build_int_cst (bitsizetype, BITS_PER_UNIT));
3369 rep_pos = fold_build2 (PLUS_EXPR, bitsizetype, rep_pos,
3370 DECL_FIELD_BIT_OFFSET (rep_decl));
3372 *bitpos = fold_build2 (MINUS_EXPR, bitsizetype, bf_pos, rep_pos);
3375 return rep_decl;
3379 /* Lowers the bitfield described by DATA.
3380 For a write like:
3382 struct.bf = _1;
3384 lower to:
3386 __ifc_1 = struct.<representative>;
3387 __ifc_2 = BIT_INSERT_EXPR (__ifc_1, _1, bitpos);
3388 struct.<representative> = __ifc_2;
3390 For a read:
3392 _1 = struct.bf;
3394 lower to:
3396 __ifc_1 = struct.<representative>;
3397 _1 = BIT_FIELD_REF (__ifc_1, bitsize, bitpos);
3399 where representative is a legal load that contains the bitfield value,
3400 bitsize is the size of the bitfield and bitpos the offset to the start of
3401 the bitfield within the representative. */
3403 static void
3404 lower_bitfield (gassign *stmt, bool write)
3406 tree struct_expr;
3407 tree bitpos;
3408 tree rep_decl = get_bitfield_rep (stmt, write, &bitpos, &struct_expr);
3409 tree rep_type = TREE_TYPE (rep_decl);
3410 tree bf_type = TREE_TYPE (gimple_assign_lhs (stmt));
3412 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3413 if (dump_file && (dump_flags & TDF_DETAILS))
3415 fprintf (dump_file, "Lowering:\n");
3416 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3417 fprintf (dump_file, "to:\n");
3420 /* REP_COMP_REF is a COMPONENT_REF for the representative. NEW_VAL is it's
3421 defining SSA_NAME. */
3422 tree rep_comp_ref = build3 (COMPONENT_REF, rep_type, struct_expr, rep_decl,
3423 NULL_TREE);
3424 tree new_val = ifc_temp_var (rep_type, rep_comp_ref, &gsi);
3426 if (dump_file && (dump_flags & TDF_DETAILS))
3427 print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (new_val), 0, TDF_SLIM);
3429 if (write)
3431 new_val = ifc_temp_var (rep_type,
3432 build3 (BIT_INSERT_EXPR, rep_type, new_val,
3433 unshare_expr (gimple_assign_rhs1 (stmt)),
3434 bitpos), &gsi);
3436 if (dump_file && (dump_flags & TDF_DETAILS))
3437 print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (new_val), 0, TDF_SLIM);
3439 gimple *new_stmt = gimple_build_assign (unshare_expr (rep_comp_ref),
3440 new_val);
3441 gimple_move_vops (new_stmt, stmt);
3442 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
3444 if (dump_file && (dump_flags & TDF_DETAILS))
3445 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
3447 else
3449 tree bfr = build3 (BIT_FIELD_REF, bf_type, new_val,
3450 build_int_cst (bitsizetype, TYPE_PRECISION (bf_type)),
3451 bitpos);
3452 new_val = ifc_temp_var (bf_type, bfr, &gsi);
3454 gimple *new_stmt = gimple_build_assign (gimple_assign_lhs (stmt),
3455 new_val);
3456 gimple_move_vops (new_stmt, stmt);
3457 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
3459 if (dump_file && (dump_flags & TDF_DETAILS))
3460 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
3463 gsi_remove (&gsi, true);
3466 /* Return TRUE if there are bitfields to lower in this LOOP. Fill TO_LOWER
3467 with data structures representing these bitfields. */
3469 static bool
3470 bitfields_to_lower_p (class loop *loop,
3471 vec <gassign *> &reads_to_lower,
3472 vec <gassign *> &writes_to_lower)
3474 gimple_stmt_iterator gsi;
3476 if (dump_file && (dump_flags & TDF_DETAILS))
3478 fprintf (dump_file, "Analyzing loop %d for bitfields:\n", loop->num);
3481 for (unsigned i = 0; i < loop->num_nodes; ++i)
3483 basic_block bb = ifc_bbs[i];
3484 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3486 gassign *stmt = dyn_cast<gassign*> (gsi_stmt (gsi));
3487 if (!stmt)
3488 continue;
3490 tree op = gimple_assign_lhs (stmt);
3491 bool write = TREE_CODE (op) == COMPONENT_REF;
3493 if (!write)
3494 op = gimple_assign_rhs1 (stmt);
3496 if (TREE_CODE (op) != COMPONENT_REF)
3497 continue;
3499 if (DECL_BIT_FIELD_TYPE (TREE_OPERAND (op, 1)))
3501 if (dump_file && (dump_flags & TDF_DETAILS))
3502 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3504 if (!INTEGRAL_TYPE_P (TREE_TYPE (op)))
3506 if (dump_file && (dump_flags & TDF_DETAILS))
3507 fprintf (dump_file, "\t Bitfield NO OK to lower,"
3508 " field type is not Integral.\n");
3509 return false;
3512 if (!get_bitfield_rep (stmt, write, NULL, NULL))
3514 if (dump_file && (dump_flags & TDF_DETAILS))
3515 fprintf (dump_file, "\t Bitfield NOT OK to lower,"
3516 " representative is BLKmode.\n");
3517 return false;
3520 if (dump_file && (dump_flags & TDF_DETAILS))
3521 fprintf (dump_file, "\tBitfield OK to lower.\n");
3522 if (write)
3523 writes_to_lower.safe_push (stmt);
3524 else
3525 reads_to_lower.safe_push (stmt);
3529 return !reads_to_lower.is_empty () || !writes_to_lower.is_empty ();
3533 /* If-convert LOOP when it is legal. For the moment this pass has no
3534 profitability analysis. Returns non-zero todo flags when something
3535 changed. */
3537 unsigned int
3538 tree_if_conversion (class loop *loop, vec<gimple *> *preds)
3540 unsigned int todo = 0;
3541 bool aggressive_if_conv;
3542 class loop *rloop;
3543 auto_vec <gassign *, 4> reads_to_lower;
3544 auto_vec <gassign *, 4> writes_to_lower;
3545 bitmap exit_bbs;
3546 edge pe;
3547 auto_vec<data_reference_p, 10> refs;
3549 again:
3550 rloop = NULL;
3551 ifc_bbs = NULL;
3552 need_to_lower_bitfields = false;
3553 need_to_ifcvt = false;
3554 need_to_predicate = false;
3555 need_to_rewrite_undefined = false;
3556 any_complicated_phi = false;
3558 /* Apply more aggressive if-conversion when loop or its outer loop were
3559 marked with simd pragma. When that's the case, we try to if-convert
3560 loop containing PHIs with more than MAX_PHI_ARG_NUM arguments. */
3561 aggressive_if_conv = loop->force_vectorize;
3562 if (!aggressive_if_conv)
3564 class loop *outer_loop = loop_outer (loop);
3565 if (outer_loop && outer_loop->force_vectorize)
3566 aggressive_if_conv = true;
3569 if (!single_exit (loop))
3570 goto cleanup;
3572 /* If there are more than two BBs in the loop then there is at least one if
3573 to convert. */
3574 if (loop->num_nodes > 2
3575 && !ifcvt_split_critical_edges (loop, aggressive_if_conv))
3576 goto cleanup;
3578 ifc_bbs = get_loop_body_in_if_conv_order (loop);
3579 if (!ifc_bbs)
3581 if (dump_file && (dump_flags & TDF_DETAILS))
3582 fprintf (dump_file, "Irreducible loop\n");
3583 goto cleanup;
3586 if (find_data_references_in_loop (loop, &refs) == chrec_dont_know)
3587 goto cleanup;
3589 if (loop->num_nodes > 2)
3591 need_to_ifcvt = true;
3593 if (!if_convertible_loop_p (loop, &refs) || !dbg_cnt (if_conversion_tree))
3594 goto cleanup;
3596 if ((need_to_predicate || any_complicated_phi)
3597 && ((!flag_tree_loop_vectorize && !loop->force_vectorize)
3598 || loop->dont_vectorize))
3599 goto cleanup;
3602 if ((flag_tree_loop_vectorize || loop->force_vectorize)
3603 && !loop->dont_vectorize)
3604 need_to_lower_bitfields = bitfields_to_lower_p (loop, reads_to_lower,
3605 writes_to_lower);
3607 if (!need_to_ifcvt && !need_to_lower_bitfields)
3608 goto cleanup;
3610 /* The edge to insert invariant stmts on. */
3611 pe = loop_preheader_edge (loop);
3613 /* Since we have no cost model, always version loops unless the user
3614 specified -ftree-loop-if-convert or unless versioning is required.
3615 Either version this loop, or if the pattern is right for outer-loop
3616 vectorization, version the outer loop. In the latter case we will
3617 still if-convert the original inner loop. */
3618 if (need_to_lower_bitfields
3619 || need_to_predicate
3620 || any_complicated_phi
3621 || flag_tree_loop_if_convert != 1)
3623 class loop *vloop
3624 = (versionable_outer_loop_p (loop_outer (loop))
3625 ? loop_outer (loop) : loop);
3626 class loop *nloop = version_loop_for_if_conversion (vloop, preds);
3627 if (nloop == NULL)
3628 goto cleanup;
3629 if (vloop != loop)
3631 /* If versionable_outer_loop_p decided to version the
3632 outer loop, version also the inner loop of the non-vectorized
3633 loop copy. So we transform:
3634 loop1
3635 loop2
3636 into:
3637 if (LOOP_VECTORIZED (1, 3))
3639 loop1
3640 loop2
3642 else
3643 loop3 (copy of loop1)
3644 if (LOOP_VECTORIZED (4, 5))
3645 loop4 (copy of loop2)
3646 else
3647 loop5 (copy of loop4) */
3648 gcc_assert (nloop->inner && nloop->inner->next == NULL);
3649 rloop = nloop->inner;
3651 else
3652 /* If we versioned loop then make sure to insert invariant
3653 stmts before the .LOOP_VECTORIZED check since the vectorizer
3654 will re-use that for things like runtime alias versioning
3655 whose condition can end up using those invariants. */
3656 pe = single_pred_edge (gimple_bb (preds->last ()));
3659 if (need_to_lower_bitfields)
3661 if (dump_file && (dump_flags & TDF_DETAILS))
3663 fprintf (dump_file, "-------------------------\n");
3664 fprintf (dump_file, "Start lowering bitfields\n");
3666 while (!reads_to_lower.is_empty ())
3667 lower_bitfield (reads_to_lower.pop (), false);
3668 while (!writes_to_lower.is_empty ())
3669 lower_bitfield (writes_to_lower.pop (), true);
3671 if (dump_file && (dump_flags & TDF_DETAILS))
3673 fprintf (dump_file, "Done lowering bitfields\n");
3674 fprintf (dump_file, "-------------------------\n");
3677 if (need_to_ifcvt)
3679 /* Now all statements are if-convertible. Combine all the basic
3680 blocks into one huge basic block doing the if-conversion
3681 on-the-fly. */
3682 combine_blocks (loop);
3685 /* Perform local CSE, this esp. helps the vectorizer analysis if loads
3686 and stores are involved. CSE only the loop body, not the entry
3687 PHIs, those are to be kept in sync with the non-if-converted copy.
3688 ??? We'll still keep dead stores though. */
3689 exit_bbs = BITMAP_ALLOC (NULL);
3690 bitmap_set_bit (exit_bbs, single_exit (loop)->dest->index);
3691 bitmap_set_bit (exit_bbs, loop->latch->index);
3693 std::pair <tree, tree> *name_pair;
3694 unsigned ssa_names_idx;
3695 FOR_EACH_VEC_ELT (redundant_ssa_names, ssa_names_idx, name_pair)
3696 replace_uses_by (name_pair->first, name_pair->second);
3697 redundant_ssa_names.release ();
3699 todo |= do_rpo_vn (cfun, loop_preheader_edge (loop), exit_bbs);
3701 /* Delete dead predicate computations. */
3702 ifcvt_local_dce (loop);
3703 BITMAP_FREE (exit_bbs);
3705 ifcvt_hoist_invariants (loop, pe);
3707 todo |= TODO_cleanup_cfg;
3709 cleanup:
3710 data_reference_p dr;
3711 unsigned int i;
3712 for (i = 0; refs.iterate (i, &dr); i++)
3714 free (dr->aux);
3715 free_data_ref (dr);
3717 refs.truncate (0);
3719 if (ifc_bbs)
3721 unsigned int i;
3723 for (i = 0; i < loop->num_nodes; i++)
3724 free_bb_predicate (ifc_bbs[i]);
3726 free (ifc_bbs);
3727 ifc_bbs = NULL;
3729 if (rloop != NULL)
3731 loop = rloop;
3732 reads_to_lower.truncate (0);
3733 writes_to_lower.truncate (0);
3734 goto again;
3737 return todo;
3740 /* Tree if-conversion pass management. */
3742 namespace {
3744 const pass_data pass_data_if_conversion =
3746 GIMPLE_PASS, /* type */
3747 "ifcvt", /* name */
3748 OPTGROUP_NONE, /* optinfo_flags */
3749 TV_TREE_LOOP_IFCVT, /* tv_id */
3750 ( PROP_cfg | PROP_ssa ), /* properties_required */
3751 0, /* properties_provided */
3752 0, /* properties_destroyed */
3753 0, /* todo_flags_start */
3754 0, /* todo_flags_finish */
3757 class pass_if_conversion : public gimple_opt_pass
3759 public:
3760 pass_if_conversion (gcc::context *ctxt)
3761 : gimple_opt_pass (pass_data_if_conversion, ctxt)
3764 /* opt_pass methods: */
3765 bool gate (function *) final override;
3766 unsigned int execute (function *) final override;
3768 }; // class pass_if_conversion
3770 bool
3771 pass_if_conversion::gate (function *fun)
3773 return (((flag_tree_loop_vectorize || fun->has_force_vectorize_loops)
3774 && flag_tree_loop_if_convert != 0)
3775 || flag_tree_loop_if_convert == 1);
3778 unsigned int
3779 pass_if_conversion::execute (function *fun)
3781 unsigned todo = 0;
3783 if (number_of_loops (fun) <= 1)
3784 return 0;
3786 auto_vec<gimple *> preds;
3787 for (auto loop : loops_list (cfun, 0))
3788 if (flag_tree_loop_if_convert == 1
3789 || ((flag_tree_loop_vectorize || loop->force_vectorize)
3790 && !loop->dont_vectorize))
3791 todo |= tree_if_conversion (loop, &preds);
3793 if (todo)
3795 free_numbers_of_iterations_estimates (fun);
3796 scev_reset ();
3799 if (flag_checking)
3801 basic_block bb;
3802 FOR_EACH_BB_FN (bb, fun)
3803 gcc_assert (!bb->aux);
3806 /* Perform IL update now, it might elide some loops. */
3807 if (todo & TODO_cleanup_cfg)
3809 cleanup_tree_cfg ();
3810 if (need_ssa_update_p (fun))
3811 todo |= TODO_update_ssa;
3813 if (todo & TODO_update_ssa_any)
3814 update_ssa (todo & TODO_update_ssa_any);
3816 /* If if-conversion elided the loop fall back to the original one. */
3817 for (unsigned i = 0; i < preds.length (); ++i)
3819 gimple *g = preds[i];
3820 if (!gimple_bb (g))
3821 continue;
3822 unsigned ifcvt_loop = tree_to_uhwi (gimple_call_arg (g, 0));
3823 unsigned orig_loop = tree_to_uhwi (gimple_call_arg (g, 1));
3824 if (!get_loop (fun, ifcvt_loop) || !get_loop (fun, orig_loop))
3826 if (dump_file)
3827 fprintf (dump_file, "If-converted loop vanished\n");
3828 fold_loop_internal_call (g, boolean_false_node);
3832 return 0;
3835 } // anon namespace
3837 gimple_opt_pass *
3838 make_pass_if_conversion (gcc::context *ctxt)
3840 return new pass_if_conversion (ctxt);