d: Add test for PR d/108167 to the testsuite [PR108167]
[official-gcc.git] / gcc / tree-if-conv.cc
blobf133102ad3350a0fd3a09ad836c68e840f316a0e
1 /* If-conversion for vectorizer.
2 Copyright (C) 2004-2023 Free Software Foundation, Inc.
3 Contributed by Devang Patel <dpatel@apple.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass implements a tree level if-conversion of loops. Its
22 initial goal is to help the vectorizer to vectorize loops with
23 conditions.
25 A short description of if-conversion:
27 o Decide if a loop is if-convertible or not.
28 o Walk all loop basic blocks in breadth first order (BFS order).
29 o Remove conditional statements (at the end of basic block)
30 and propagate condition into destination basic blocks'
31 predicate list.
32 o Replace modify expression with conditional modify expression
33 using current basic block's condition.
34 o Merge all basic blocks
35 o Replace phi nodes with conditional modify expr
36 o Merge all basic blocks into header
38 Sample transformation:
40 INPUT
41 -----
43 # i_23 = PHI <0(0), i_18(10)>;
44 <L0>:;
45 j_15 = A[i_23];
46 if (j_15 > 41) goto <L1>; else goto <L17>;
48 <L17>:;
49 goto <bb 3> (<L3>);
51 <L1>:;
53 # iftmp.2_4 = PHI <0(8), 42(2)>;
54 <L3>:;
55 A[i_23] = iftmp.2_4;
56 i_18 = i_23 + 1;
57 if (i_18 <= 15) goto <L19>; else goto <L18>;
59 <L19>:;
60 goto <bb 1> (<L0>);
62 <L18>:;
64 OUTPUT
65 ------
67 # i_23 = PHI <0(0), i_18(10)>;
68 <L0>:;
69 j_15 = A[i_23];
71 <L3>:;
72 iftmp.2_4 = j_15 > 41 ? 42 : 0;
73 A[i_23] = iftmp.2_4;
74 i_18 = i_23 + 1;
75 if (i_18 <= 15) goto <L19>; else goto <L18>;
77 <L19>:;
78 goto <bb 1> (<L0>);
80 <L18>:;
83 #include "config.h"
84 #include "system.h"
85 #include "coretypes.h"
86 #include "backend.h"
87 #include "rtl.h"
88 #include "tree.h"
89 #include "gimple.h"
90 #include "cfghooks.h"
91 #include "tree-pass.h"
92 #include "ssa.h"
93 #include "expmed.h"
94 #include "expr.h"
95 #include "optabs-query.h"
96 #include "gimple-pretty-print.h"
97 #include "alias.h"
98 #include "fold-const.h"
99 #include "stor-layout.h"
100 #include "gimple-iterator.h"
101 #include "gimple-fold.h"
102 #include "gimplify.h"
103 #include "gimplify-me.h"
104 #include "tree-cfg.h"
105 #include "tree-into-ssa.h"
106 #include "tree-ssa.h"
107 #include "cfgloop.h"
108 #include "tree-data-ref.h"
109 #include "tree-scalar-evolution.h"
110 #include "tree-ssa-loop.h"
111 #include "tree-ssa-loop-niter.h"
112 #include "tree-ssa-loop-ivopts.h"
113 #include "tree-ssa-address.h"
114 #include "dbgcnt.h"
115 #include "tree-hash-traits.h"
116 #include "varasm.h"
117 #include "builtins.h"
118 #include "cfganal.h"
119 #include "internal-fn.h"
120 #include "fold-const.h"
121 #include "tree-ssa-sccvn.h"
122 #include "tree-cfgcleanup.h"
123 #include "tree-ssa-dse.h"
124 #include "tree-vectorizer.h"
125 #include "tree-eh.h"
126 #include "cgraph.h"
128 /* For lang_hooks.types.type_for_mode. */
129 #include "langhooks.h"
131 /* Only handle PHIs with no more arguments unless we are asked to by
132 simd pragma. */
133 #define MAX_PHI_ARG_NUM \
134 ((unsigned) param_max_tree_if_conversion_phi_args)
136 /* True if we've converted a statement that was only executed when some
137 condition C was true, and if for correctness we need to predicate the
138 statement to ensure that it is a no-op when C is false. See
139 predicate_statements for the kinds of predication we support. */
140 static bool need_to_predicate;
142 /* True if we have to rewrite stmts that may invoke undefined behavior
143 when a condition C was false so it doesn't if it is always executed.
144 See predicate_statements for the kinds of predication we support. */
145 static bool need_to_rewrite_undefined;
147 /* Indicate if there are any complicated PHIs that need to be handled in
148 if-conversion. Complicated PHI has more than two arguments and can't
149 be degenerated to two arguments PHI. See more information in comment
150 before phi_convertible_by_degenerating_args. */
151 static bool any_complicated_phi;
153 /* True if we have bitfield accesses we can lower. */
154 static bool need_to_lower_bitfields;
156 /* True if there is any ifcvting to be done. */
157 static bool need_to_ifcvt;
159 /* Hash for struct innermost_loop_behavior. It depends on the user to
160 free the memory. */
162 struct innermost_loop_behavior_hash : nofree_ptr_hash <innermost_loop_behavior>
164 static inline hashval_t hash (const value_type &);
165 static inline bool equal (const value_type &,
166 const compare_type &);
169 inline hashval_t
170 innermost_loop_behavior_hash::hash (const value_type &e)
172 hashval_t hash;
174 hash = iterative_hash_expr (e->base_address, 0);
175 hash = iterative_hash_expr (e->offset, hash);
176 hash = iterative_hash_expr (e->init, hash);
177 return iterative_hash_expr (e->step, hash);
180 inline bool
181 innermost_loop_behavior_hash::equal (const value_type &e1,
182 const compare_type &e2)
184 if ((e1->base_address && !e2->base_address)
185 || (!e1->base_address && e2->base_address)
186 || (!e1->offset && e2->offset)
187 || (e1->offset && !e2->offset)
188 || (!e1->init && e2->init)
189 || (e1->init && !e2->init)
190 || (!e1->step && e2->step)
191 || (e1->step && !e2->step))
192 return false;
194 if (e1->base_address && e2->base_address
195 && !operand_equal_p (e1->base_address, e2->base_address, 0))
196 return false;
197 if (e1->offset && e2->offset
198 && !operand_equal_p (e1->offset, e2->offset, 0))
199 return false;
200 if (e1->init && e2->init
201 && !operand_equal_p (e1->init, e2->init, 0))
202 return false;
203 if (e1->step && e2->step
204 && !operand_equal_p (e1->step, e2->step, 0))
205 return false;
207 return true;
210 /* List of basic blocks in if-conversion-suitable order. */
211 static basic_block *ifc_bbs;
213 /* Hash table to store <DR's innermost loop behavior, DR> pairs. */
214 static hash_map<innermost_loop_behavior_hash,
215 data_reference_p> *innermost_DR_map;
217 /* Hash table to store <base reference, DR> pairs. */
218 static hash_map<tree_operand_hash, data_reference_p> *baseref_DR_map;
220 /* List of redundant SSA names: the first should be replaced by the second. */
221 static vec< std::pair<tree, tree> > redundant_ssa_names;
223 /* Structure used to predicate basic blocks. This is attached to the
224 ->aux field of the BBs in the loop to be if-converted. */
225 struct bb_predicate {
227 /* The condition under which this basic block is executed. */
228 tree predicate;
230 /* PREDICATE is gimplified, and the sequence of statements is
231 recorded here, in order to avoid the duplication of computations
232 that occur in previous conditions. See PR44483. */
233 gimple_seq predicate_gimplified_stmts;
236 /* Returns true when the basic block BB has a predicate. */
238 static inline bool
239 bb_has_predicate (basic_block bb)
241 return bb->aux != NULL;
244 /* Returns the gimplified predicate for basic block BB. */
246 static inline tree
247 bb_predicate (basic_block bb)
249 return ((struct bb_predicate *) bb->aux)->predicate;
252 /* Sets the gimplified predicate COND for basic block BB. */
254 static inline void
255 set_bb_predicate (basic_block bb, tree cond)
257 gcc_assert ((TREE_CODE (cond) == TRUTH_NOT_EXPR
258 && is_gimple_val (TREE_OPERAND (cond, 0)))
259 || is_gimple_val (cond));
260 ((struct bb_predicate *) bb->aux)->predicate = cond;
263 /* Returns the sequence of statements of the gimplification of the
264 predicate for basic block BB. */
266 static inline gimple_seq
267 bb_predicate_gimplified_stmts (basic_block bb)
269 return ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts;
272 /* Sets the sequence of statements STMTS of the gimplification of the
273 predicate for basic block BB. */
275 static inline void
276 set_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
278 ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts = stmts;
281 /* Adds the sequence of statements STMTS to the sequence of statements
282 of the predicate for basic block BB. */
284 static inline void
285 add_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
287 /* We might have updated some stmts in STMTS via force_gimple_operand
288 calling fold_stmt and that producing multiple stmts. Delink immediate
289 uses so update_ssa after loop versioning doesn't get confused for
290 the not yet inserted predicates.
291 ??? This should go away once we reliably avoid updating stmts
292 not in any BB. */
293 for (gimple_stmt_iterator gsi = gsi_start (stmts);
294 !gsi_end_p (gsi); gsi_next (&gsi))
296 gimple *stmt = gsi_stmt (gsi);
297 delink_stmt_imm_use (stmt);
298 gimple_set_modified (stmt, true);
300 gimple_seq_add_seq_without_update
301 (&(((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts), stmts);
304 /* Initializes to TRUE the predicate of basic block BB. */
306 static inline void
307 init_bb_predicate (basic_block bb)
309 bb->aux = XNEW (struct bb_predicate);
310 set_bb_predicate_gimplified_stmts (bb, NULL);
311 set_bb_predicate (bb, boolean_true_node);
314 /* Release the SSA_NAMEs associated with the predicate of basic block BB. */
316 static inline void
317 release_bb_predicate (basic_block bb)
319 gimple_seq stmts = bb_predicate_gimplified_stmts (bb);
320 if (stmts)
322 /* Ensure that these stmts haven't yet been added to a bb. */
323 if (flag_checking)
324 for (gimple_stmt_iterator i = gsi_start (stmts);
325 !gsi_end_p (i); gsi_next (&i))
326 gcc_assert (! gimple_bb (gsi_stmt (i)));
328 /* Discard them. */
329 gimple_seq_discard (stmts);
330 set_bb_predicate_gimplified_stmts (bb, NULL);
334 /* Free the predicate of basic block BB. */
336 static inline void
337 free_bb_predicate (basic_block bb)
339 if (!bb_has_predicate (bb))
340 return;
342 release_bb_predicate (bb);
343 free (bb->aux);
344 bb->aux = NULL;
347 /* Reinitialize predicate of BB with the true predicate. */
349 static inline void
350 reset_bb_predicate (basic_block bb)
352 if (!bb_has_predicate (bb))
353 init_bb_predicate (bb);
354 else
356 release_bb_predicate (bb);
357 set_bb_predicate (bb, boolean_true_node);
361 /* Returns a new SSA_NAME of type TYPE that is assigned the value of
362 the expression EXPR. Inserts the statement created for this
363 computation before GSI and leaves the iterator GSI at the same
364 statement. */
366 static tree
367 ifc_temp_var (tree type, tree expr, gimple_stmt_iterator *gsi)
369 tree new_name = make_temp_ssa_name (type, NULL, "_ifc_");
370 gimple *stmt = gimple_build_assign (new_name, expr);
371 gimple_set_vuse (stmt, gimple_vuse (gsi_stmt (*gsi)));
372 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
373 return new_name;
376 /* Return true when COND is a false predicate. */
378 static inline bool
379 is_false_predicate (tree cond)
381 return (cond != NULL_TREE
382 && (cond == boolean_false_node
383 || integer_zerop (cond)));
386 /* Return true when COND is a true predicate. */
388 static inline bool
389 is_true_predicate (tree cond)
391 return (cond == NULL_TREE
392 || cond == boolean_true_node
393 || integer_onep (cond));
396 /* Returns true when BB has a predicate that is not trivial: true or
397 NULL_TREE. */
399 static inline bool
400 is_predicated (basic_block bb)
402 return !is_true_predicate (bb_predicate (bb));
405 /* Parses the predicate COND and returns its comparison code and
406 operands OP0 and OP1. */
408 static enum tree_code
409 parse_predicate (tree cond, tree *op0, tree *op1)
411 gimple *s;
413 if (TREE_CODE (cond) == SSA_NAME
414 && is_gimple_assign (s = SSA_NAME_DEF_STMT (cond)))
416 if (TREE_CODE_CLASS (gimple_assign_rhs_code (s)) == tcc_comparison)
418 *op0 = gimple_assign_rhs1 (s);
419 *op1 = gimple_assign_rhs2 (s);
420 return gimple_assign_rhs_code (s);
423 else if (gimple_assign_rhs_code (s) == TRUTH_NOT_EXPR)
425 tree op = gimple_assign_rhs1 (s);
426 tree type = TREE_TYPE (op);
427 enum tree_code code = parse_predicate (op, op0, op1);
429 return code == ERROR_MARK ? ERROR_MARK
430 : invert_tree_comparison (code, HONOR_NANS (type));
433 return ERROR_MARK;
436 if (COMPARISON_CLASS_P (cond))
438 *op0 = TREE_OPERAND (cond, 0);
439 *op1 = TREE_OPERAND (cond, 1);
440 return TREE_CODE (cond);
443 return ERROR_MARK;
446 /* Returns the fold of predicate C1 OR C2 at location LOC. */
448 static tree
449 fold_or_predicates (location_t loc, tree c1, tree c2)
451 tree op1a, op1b, op2a, op2b;
452 enum tree_code code1 = parse_predicate (c1, &op1a, &op1b);
453 enum tree_code code2 = parse_predicate (c2, &op2a, &op2b);
455 if (code1 != ERROR_MARK && code2 != ERROR_MARK)
457 tree t = maybe_fold_or_comparisons (boolean_type_node, code1, op1a, op1b,
458 code2, op2a, op2b);
459 if (t)
460 return t;
463 return fold_build2_loc (loc, TRUTH_OR_EXPR, boolean_type_node, c1, c2);
466 /* Returns either a COND_EXPR or the folded expression if the folded
467 expression is a MIN_EXPR, a MAX_EXPR, an ABS_EXPR,
468 a constant or a SSA_NAME. */
470 static tree
471 fold_build_cond_expr (tree type, tree cond, tree rhs, tree lhs)
473 /* If COND is comparison r != 0 and r has boolean type, convert COND
474 to SSA_NAME to accept by vect bool pattern. */
475 if (TREE_CODE (cond) == NE_EXPR)
477 tree op0 = TREE_OPERAND (cond, 0);
478 tree op1 = TREE_OPERAND (cond, 1);
479 if (TREE_CODE (op0) == SSA_NAME
480 && TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE
481 && (integer_zerop (op1)))
482 cond = op0;
485 gimple_match_op cexpr (gimple_match_cond::UNCOND, COND_EXPR,
486 type, cond, rhs, lhs);
487 if (cexpr.resimplify (NULL, follow_all_ssa_edges))
489 if (gimple_simplified_result_is_gimple_val (&cexpr))
490 return cexpr.ops[0];
491 else if (cexpr.code == ABS_EXPR)
492 return build1 (ABS_EXPR, type, cexpr.ops[0]);
493 else if (cexpr.code == MIN_EXPR
494 || cexpr.code == MAX_EXPR)
495 return build2 ((tree_code)cexpr.code, type, cexpr.ops[0], cexpr.ops[1]);
498 return build3 (COND_EXPR, type, cond, rhs, lhs);
501 /* Add condition NC to the predicate list of basic block BB. LOOP is
502 the loop to be if-converted. Use predicate of cd-equivalent block
503 for join bb if it exists: we call basic blocks bb1 and bb2
504 cd-equivalent if they are executed under the same condition. */
506 static inline void
507 add_to_predicate_list (class loop *loop, basic_block bb, tree nc)
509 tree bc, *tp;
510 basic_block dom_bb;
512 if (is_true_predicate (nc))
513 return;
515 /* If dominance tells us this basic block is always executed,
516 don't record any predicates for it. */
517 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
518 return;
520 dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb);
521 /* We use notion of cd equivalence to get simpler predicate for
522 join block, e.g. if join block has 2 predecessors with predicates
523 p1 & p2 and p1 & !p2, we'd like to get p1 for it instead of
524 p1 & p2 | p1 & !p2. */
525 if (dom_bb != loop->header
526 && get_immediate_dominator (CDI_POST_DOMINATORS, dom_bb) == bb)
528 gcc_assert (flow_bb_inside_loop_p (loop, dom_bb));
529 bc = bb_predicate (dom_bb);
530 if (!is_true_predicate (bc))
531 set_bb_predicate (bb, bc);
532 else
533 gcc_assert (is_true_predicate (bb_predicate (bb)));
534 if (dump_file && (dump_flags & TDF_DETAILS))
535 fprintf (dump_file, "Use predicate of bb#%d for bb#%d\n",
536 dom_bb->index, bb->index);
537 return;
540 if (!is_predicated (bb))
541 bc = nc;
542 else
544 bc = bb_predicate (bb);
545 bc = fold_or_predicates (EXPR_LOCATION (bc), nc, bc);
546 if (is_true_predicate (bc))
548 reset_bb_predicate (bb);
549 return;
553 /* Allow a TRUTH_NOT_EXPR around the main predicate. */
554 if (TREE_CODE (bc) == TRUTH_NOT_EXPR)
555 tp = &TREE_OPERAND (bc, 0);
556 else
557 tp = &bc;
558 if (!is_gimple_val (*tp))
560 gimple_seq stmts;
561 *tp = force_gimple_operand (*tp, &stmts, true, NULL_TREE);
562 add_bb_predicate_gimplified_stmts (bb, stmts);
564 set_bb_predicate (bb, bc);
567 /* Add the condition COND to the previous condition PREV_COND, and add
568 this to the predicate list of the destination of edge E. LOOP is
569 the loop to be if-converted. */
571 static void
572 add_to_dst_predicate_list (class loop *loop, edge e,
573 tree prev_cond, tree cond)
575 if (!flow_bb_inside_loop_p (loop, e->dest))
576 return;
578 if (!is_true_predicate (prev_cond))
579 cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
580 prev_cond, cond);
582 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, e->dest))
583 add_to_predicate_list (loop, e->dest, cond);
586 /* Return true if one of the successor edges of BB exits LOOP. */
588 static bool
589 bb_with_exit_edge_p (class loop *loop, basic_block bb)
591 edge e;
592 edge_iterator ei;
594 FOR_EACH_EDGE (e, ei, bb->succs)
595 if (loop_exit_edge_p (loop, e))
596 return true;
598 return false;
601 /* Given PHI which has more than two arguments, this function checks if
602 it's if-convertible by degenerating its arguments. Specifically, if
603 below two conditions are satisfied:
605 1) Number of PHI arguments with different values equals to 2 and one
606 argument has the only occurrence.
607 2) The edge corresponding to the unique argument isn't critical edge.
609 Such PHI can be handled as PHIs have only two arguments. For example,
610 below PHI:
612 res = PHI <A_1(e1), A_1(e2), A_2(e3)>;
614 can be transformed into:
616 res = (predicate of e3) ? A_2 : A_1;
618 Return TRUE if it is the case, FALSE otherwise. */
620 static bool
621 phi_convertible_by_degenerating_args (gphi *phi)
623 edge e;
624 tree arg, t1 = NULL, t2 = NULL;
625 unsigned int i, i1 = 0, i2 = 0, n1 = 0, n2 = 0;
626 unsigned int num_args = gimple_phi_num_args (phi);
628 gcc_assert (num_args > 2);
630 for (i = 0; i < num_args; i++)
632 arg = gimple_phi_arg_def (phi, i);
633 if (t1 == NULL || operand_equal_p (t1, arg, 0))
635 n1++;
636 i1 = i;
637 t1 = arg;
639 else if (t2 == NULL || operand_equal_p (t2, arg, 0))
641 n2++;
642 i2 = i;
643 t2 = arg;
645 else
646 return false;
649 if (n1 != 1 && n2 != 1)
650 return false;
652 /* Check if the edge corresponding to the unique arg is critical. */
653 e = gimple_phi_arg_edge (phi, (n1 == 1) ? i1 : i2);
654 if (EDGE_COUNT (e->src->succs) > 1)
655 return false;
657 return true;
660 /* Return true when PHI is if-convertible. PHI is part of loop LOOP
661 and it belongs to basic block BB. Note at this point, it is sure
662 that PHI is if-convertible. This function updates global variable
663 ANY_COMPLICATED_PHI if PHI is complicated. */
665 static bool
666 if_convertible_phi_p (class loop *loop, basic_block bb, gphi *phi)
668 if (dump_file && (dump_flags & TDF_DETAILS))
670 fprintf (dump_file, "-------------------------\n");
671 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
674 if (bb != loop->header
675 && gimple_phi_num_args (phi) > 2
676 && !phi_convertible_by_degenerating_args (phi))
677 any_complicated_phi = true;
679 return true;
682 /* Records the status of a data reference. This struct is attached to
683 each DR->aux field. */
685 struct ifc_dr {
686 bool rw_unconditionally;
687 bool w_unconditionally;
688 bool written_at_least_once;
690 tree rw_predicate;
691 tree w_predicate;
692 tree base_w_predicate;
695 #define IFC_DR(DR) ((struct ifc_dr *) (DR)->aux)
696 #define DR_BASE_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->written_at_least_once)
697 #define DR_RW_UNCONDITIONALLY(DR) (IFC_DR (DR)->rw_unconditionally)
698 #define DR_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->w_unconditionally)
700 /* Iterates over DR's and stores refs, DR and base refs, DR pairs in
701 HASH tables. While storing them in HASH table, it checks if the
702 reference is unconditionally read or written and stores that as a flag
703 information. For base reference it checks if it is written atlest once
704 unconditionally and stores it as flag information along with DR.
705 In other words for every data reference A in STMT there exist other
706 accesses to a data reference with the same base with predicates that
707 add up (OR-up) to the true predicate: this ensures that the data
708 reference A is touched (read or written) on every iteration of the
709 if-converted loop. */
710 static void
711 hash_memrefs_baserefs_and_store_DRs_read_written_info (data_reference_p a)
714 data_reference_p *master_dr, *base_master_dr;
715 tree base_ref = DR_BASE_OBJECT (a);
716 innermost_loop_behavior *innermost = &DR_INNERMOST (a);
717 tree ca = bb_predicate (gimple_bb (DR_STMT (a)));
718 bool exist1, exist2;
720 master_dr = &innermost_DR_map->get_or_insert (innermost, &exist1);
721 if (!exist1)
722 *master_dr = a;
724 if (DR_IS_WRITE (a))
726 IFC_DR (*master_dr)->w_predicate
727 = fold_or_predicates (UNKNOWN_LOCATION, ca,
728 IFC_DR (*master_dr)->w_predicate);
729 if (is_true_predicate (IFC_DR (*master_dr)->w_predicate))
730 DR_W_UNCONDITIONALLY (*master_dr) = true;
732 IFC_DR (*master_dr)->rw_predicate
733 = fold_or_predicates (UNKNOWN_LOCATION, ca,
734 IFC_DR (*master_dr)->rw_predicate);
735 if (is_true_predicate (IFC_DR (*master_dr)->rw_predicate))
736 DR_RW_UNCONDITIONALLY (*master_dr) = true;
738 if (DR_IS_WRITE (a))
740 base_master_dr = &baseref_DR_map->get_or_insert (base_ref, &exist2);
741 if (!exist2)
742 *base_master_dr = a;
743 IFC_DR (*base_master_dr)->base_w_predicate
744 = fold_or_predicates (UNKNOWN_LOCATION, ca,
745 IFC_DR (*base_master_dr)->base_w_predicate);
746 if (is_true_predicate (IFC_DR (*base_master_dr)->base_w_predicate))
747 DR_BASE_W_UNCONDITIONALLY (*base_master_dr) = true;
751 /* Return TRUE if can prove the index IDX of an array reference REF is
752 within array bound. Return false otherwise. */
754 static bool
755 idx_within_array_bound (tree ref, tree *idx, void *dta)
757 wi::overflow_type overflow;
758 widest_int niter, valid_niter, delta, wi_step;
759 tree ev, init, step;
760 tree low, high;
761 class loop *loop = (class loop*) dta;
763 /* Only support within-bound access for array references. */
764 if (TREE_CODE (ref) != ARRAY_REF)
765 return false;
767 /* For arrays that might have flexible sizes, it is not guaranteed that they
768 do not extend over their declared size. */
769 if (array_ref_flexible_size_p (ref))
770 return false;
772 ev = analyze_scalar_evolution (loop, *idx);
773 ev = instantiate_parameters (loop, ev);
774 init = initial_condition (ev);
775 step = evolution_part_in_loop_num (ev, loop->num);
777 if (!init || TREE_CODE (init) != INTEGER_CST
778 || (step && TREE_CODE (step) != INTEGER_CST))
779 return false;
781 low = array_ref_low_bound (ref);
782 high = array_ref_up_bound (ref);
784 /* The case of nonconstant bounds could be handled, but it would be
785 complicated. */
786 if (TREE_CODE (low) != INTEGER_CST
787 || !high || TREE_CODE (high) != INTEGER_CST)
788 return false;
790 /* Check if the intial idx is within bound. */
791 if (wi::to_widest (init) < wi::to_widest (low)
792 || wi::to_widest (init) > wi::to_widest (high))
793 return false;
795 /* The idx is always within bound. */
796 if (!step || integer_zerop (step))
797 return true;
799 if (!max_loop_iterations (loop, &niter))
800 return false;
802 if (wi::to_widest (step) < 0)
804 delta = wi::to_widest (init) - wi::to_widest (low);
805 wi_step = -wi::to_widest (step);
807 else
809 delta = wi::to_widest (high) - wi::to_widest (init);
810 wi_step = wi::to_widest (step);
813 valid_niter = wi::div_floor (delta, wi_step, SIGNED, &overflow);
814 /* The iteration space of idx is within array bound. */
815 if (!overflow && niter <= valid_niter)
816 return true;
818 return false;
821 /* Return TRUE if ref is a within bound array reference. */
823 static bool
824 ref_within_array_bound (gimple *stmt, tree ref)
826 class loop *loop = loop_containing_stmt (stmt);
828 gcc_assert (loop != NULL);
829 return for_each_index (&ref, idx_within_array_bound, loop);
833 /* Given a memory reference expression T, return TRUE if base object
834 it refers to is writable. The base object of a memory reference
835 is the main object being referenced, which is returned by function
836 get_base_address. */
838 static bool
839 base_object_writable (tree ref)
841 tree base_tree = get_base_address (ref);
843 return (base_tree
844 && DECL_P (base_tree)
845 && decl_binds_to_current_def_p (base_tree)
846 && !TREE_READONLY (base_tree));
849 /* Return true when the memory references of STMT won't trap in the
850 if-converted code. There are two things that we have to check for:
852 - writes to memory occur to writable memory: if-conversion of
853 memory writes transforms the conditional memory writes into
854 unconditional writes, i.e. "if (cond) A[i] = foo" is transformed
855 into "A[i] = cond ? foo : A[i]", and as the write to memory may not
856 be executed at all in the original code, it may be a readonly
857 memory. To check that A is not const-qualified, we check that
858 there exists at least an unconditional write to A in the current
859 function.
861 - reads or writes to memory are valid memory accesses for every
862 iteration. To check that the memory accesses are correctly formed
863 and that we are allowed to read and write in these locations, we
864 check that the memory accesses to be if-converted occur at every
865 iteration unconditionally.
867 Returns true for the memory reference in STMT, same memory reference
868 is read or written unconditionally atleast once and the base memory
869 reference is written unconditionally once. This is to check reference
870 will not write fault. Also retuns true if the memory reference is
871 unconditionally read once then we are conditionally writing to memory
872 which is defined as read and write and is bound to the definition
873 we are seeing. */
874 static bool
875 ifcvt_memrefs_wont_trap (gimple *stmt, vec<data_reference_p> drs)
877 /* If DR didn't see a reference here we can't use it to tell
878 whether the ref traps or not. */
879 if (gimple_uid (stmt) == 0)
880 return false;
882 data_reference_p *master_dr, *base_master_dr;
883 data_reference_p a = drs[gimple_uid (stmt) - 1];
885 tree base = DR_BASE_OBJECT (a);
886 innermost_loop_behavior *innermost = &DR_INNERMOST (a);
888 gcc_assert (DR_STMT (a) == stmt);
889 gcc_assert (DR_BASE_ADDRESS (a) || DR_OFFSET (a)
890 || DR_INIT (a) || DR_STEP (a));
892 master_dr = innermost_DR_map->get (innermost);
893 gcc_assert (master_dr != NULL);
895 base_master_dr = baseref_DR_map->get (base);
897 /* If a is unconditionally written to it doesn't trap. */
898 if (DR_W_UNCONDITIONALLY (*master_dr))
899 return true;
901 /* If a is unconditionally accessed then ...
903 Even a is conditional access, we can treat it as an unconditional
904 one if it's an array reference and all its index are within array
905 bound. */
906 if (DR_RW_UNCONDITIONALLY (*master_dr)
907 || ref_within_array_bound (stmt, DR_REF (a)))
909 /* an unconditional read won't trap. */
910 if (DR_IS_READ (a))
911 return true;
913 /* an unconditionaly write won't trap if the base is written
914 to unconditionally. */
915 if (base_master_dr
916 && DR_BASE_W_UNCONDITIONALLY (*base_master_dr))
917 return flag_store_data_races;
918 /* or the base is known to be not readonly. */
919 else if (base_object_writable (DR_REF (a)))
920 return flag_store_data_races;
923 return false;
926 /* Return true if STMT could be converted into a masked load or store
927 (conditional load or store based on a mask computed from bb predicate). */
929 static bool
930 ifcvt_can_use_mask_load_store (gimple *stmt)
932 /* Check whether this is a load or store. */
933 tree lhs = gimple_assign_lhs (stmt);
934 bool is_load;
935 tree ref;
936 if (gimple_store_p (stmt))
938 if (!is_gimple_val (gimple_assign_rhs1 (stmt)))
939 return false;
940 is_load = false;
941 ref = lhs;
943 else if (gimple_assign_load_p (stmt))
945 is_load = true;
946 ref = gimple_assign_rhs1 (stmt);
948 else
949 return false;
951 if (may_be_nonaddressable_p (ref))
952 return false;
954 /* Mask should be integer mode of the same size as the load/store
955 mode. */
956 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
957 if (!int_mode_for_mode (mode).exists () || VECTOR_MODE_P (mode))
958 return false;
960 if (can_vec_mask_load_store_p (mode, VOIDmode, is_load))
961 return true;
963 return false;
966 /* Return true if STMT could be converted from an operation that is
967 unconditional to one that is conditional on a bb predicate mask. */
969 static bool
970 ifcvt_can_predicate (gimple *stmt)
972 basic_block bb = gimple_bb (stmt);
974 if (!(flag_tree_loop_vectorize || bb->loop_father->force_vectorize)
975 || bb->loop_father->dont_vectorize
976 || gimple_has_volatile_ops (stmt))
977 return false;
979 if (gimple_assign_single_p (stmt))
980 return ifcvt_can_use_mask_load_store (stmt);
982 tree_code code = gimple_assign_rhs_code (stmt);
983 tree lhs_type = TREE_TYPE (gimple_assign_lhs (stmt));
984 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
985 if (!types_compatible_p (lhs_type, rhs_type))
986 return false;
987 internal_fn cond_fn = get_conditional_internal_fn (code);
988 return (cond_fn != IFN_LAST
989 && vectorized_internal_fn_supported_p (cond_fn, lhs_type));
992 /* Return true when STMT is if-convertible.
994 GIMPLE_ASSIGN statement is not if-convertible if,
995 - it is not movable,
996 - it could trap,
997 - LHS is not var decl. */
999 static bool
1000 if_convertible_gimple_assign_stmt_p (gimple *stmt,
1001 vec<data_reference_p> refs)
1003 tree lhs = gimple_assign_lhs (stmt);
1005 if (dump_file && (dump_flags & TDF_DETAILS))
1007 fprintf (dump_file, "-------------------------\n");
1008 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1011 if (!is_gimple_reg_type (TREE_TYPE (lhs)))
1012 return false;
1014 /* Some of these constrains might be too conservative. */
1015 if (stmt_ends_bb_p (stmt)
1016 || gimple_has_volatile_ops (stmt)
1017 || (TREE_CODE (lhs) == SSA_NAME
1018 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
1019 || gimple_has_side_effects (stmt))
1021 if (dump_file && (dump_flags & TDF_DETAILS))
1022 fprintf (dump_file, "stmt not suitable for ifcvt\n");
1023 return false;
1026 /* tree-into-ssa.cc uses GF_PLF_1, so avoid it, because
1027 in between if_convertible_loop_p and combine_blocks
1028 we can perform loop versioning. */
1029 gimple_set_plf (stmt, GF_PLF_2, false);
1031 if ((! gimple_vuse (stmt)
1032 || gimple_could_trap_p_1 (stmt, false, false)
1033 || ! ifcvt_memrefs_wont_trap (stmt, refs))
1034 && gimple_could_trap_p (stmt))
1036 if (ifcvt_can_predicate (stmt))
1038 gimple_set_plf (stmt, GF_PLF_2, true);
1039 need_to_predicate = true;
1040 return true;
1042 if (dump_file && (dump_flags & TDF_DETAILS))
1043 fprintf (dump_file, "tree could trap...\n");
1044 return false;
1046 else if ((INTEGRAL_TYPE_P (TREE_TYPE (lhs))
1047 || POINTER_TYPE_P (TREE_TYPE (lhs)))
1048 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs))
1049 && arith_code_with_undefined_signed_overflow
1050 (gimple_assign_rhs_code (stmt)))
1051 /* We have to rewrite stmts with undefined overflow. */
1052 need_to_rewrite_undefined = true;
1054 /* When if-converting stores force versioning, likewise if we
1055 ended up generating store data races. */
1056 if (gimple_vdef (stmt))
1057 need_to_predicate = true;
1059 return true;
1062 /* Return true when STMT is if-convertible.
1064 A statement is if-convertible if:
1065 - it is an if-convertible GIMPLE_ASSIGN,
1066 - it is a GIMPLE_LABEL or a GIMPLE_COND,
1067 - it is builtins call,
1068 - it is a call to a function with a SIMD clone. */
1070 static bool
1071 if_convertible_stmt_p (gimple *stmt, vec<data_reference_p> refs)
1073 switch (gimple_code (stmt))
1075 case GIMPLE_LABEL:
1076 case GIMPLE_DEBUG:
1077 case GIMPLE_COND:
1078 return true;
1080 case GIMPLE_ASSIGN:
1081 return if_convertible_gimple_assign_stmt_p (stmt, refs);
1083 case GIMPLE_CALL:
1085 tree fndecl = gimple_call_fndecl (stmt);
1086 if (fndecl)
1088 /* We can vectorize some builtins and functions with SIMD
1089 "inbranch" clones. */
1090 int flags = gimple_call_flags (stmt);
1091 struct cgraph_node *node = cgraph_node::get (fndecl);
1092 if ((flags & ECF_CONST)
1093 && !(flags & ECF_LOOPING_CONST_OR_PURE)
1094 && fndecl_built_in_p (fndecl))
1095 return true;
1096 if (node && node->simd_clones != NULL)
1097 /* Ensure that at least one clone can be "inbranch". */
1098 for (struct cgraph_node *n = node->simd_clones; n != NULL;
1099 n = n->simdclone->next_clone)
1100 if (n->simdclone->inbranch)
1102 gimple_set_plf (stmt, GF_PLF_2, true);
1103 need_to_predicate = true;
1104 return true;
1107 return false;
1110 default:
1111 /* Don't know what to do with 'em so don't do anything. */
1112 if (dump_file && (dump_flags & TDF_DETAILS))
1114 fprintf (dump_file, "don't know what to do\n");
1115 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1117 return false;
1121 /* Assumes that BB has more than 1 predecessors.
1122 Returns false if at least one successor is not on critical edge
1123 and true otherwise. */
1125 static inline bool
1126 all_preds_critical_p (basic_block bb)
1128 edge e;
1129 edge_iterator ei;
1131 FOR_EACH_EDGE (e, ei, bb->preds)
1132 if (EDGE_COUNT (e->src->succs) == 1)
1133 return false;
1134 return true;
1137 /* Return true when BB is if-convertible. This routine does not check
1138 basic block's statements and phis.
1140 A basic block is not if-convertible if:
1141 - it is non-empty and it is after the exit block (in BFS order),
1142 - it is after the exit block but before the latch,
1143 - its edges are not normal.
1145 EXIT_BB is the basic block containing the exit of the LOOP. BB is
1146 inside LOOP. */
1148 static bool
1149 if_convertible_bb_p (class loop *loop, basic_block bb, basic_block exit_bb)
1151 edge e;
1152 edge_iterator ei;
1154 if (dump_file && (dump_flags & TDF_DETAILS))
1155 fprintf (dump_file, "----------[%d]-------------\n", bb->index);
1157 if (EDGE_COUNT (bb->succs) > 2)
1158 return false;
1160 gimple *last = last_stmt (bb);
1161 if (gcall *call = safe_dyn_cast <gcall *> (last))
1162 if (gimple_call_ctrl_altering_p (call))
1163 return false;
1165 if (exit_bb)
1167 if (bb != loop->latch)
1169 if (dump_file && (dump_flags & TDF_DETAILS))
1170 fprintf (dump_file, "basic block after exit bb but before latch\n");
1171 return false;
1173 else if (!empty_block_p (bb))
1175 if (dump_file && (dump_flags & TDF_DETAILS))
1176 fprintf (dump_file, "non empty basic block after exit bb\n");
1177 return false;
1179 else if (bb == loop->latch
1180 && bb != exit_bb
1181 && !dominated_by_p (CDI_DOMINATORS, bb, exit_bb))
1183 if (dump_file && (dump_flags & TDF_DETAILS))
1184 fprintf (dump_file, "latch is not dominated by exit_block\n");
1185 return false;
1189 /* Be less adventurous and handle only normal edges. */
1190 FOR_EACH_EDGE (e, ei, bb->succs)
1191 if (e->flags & (EDGE_EH | EDGE_ABNORMAL | EDGE_IRREDUCIBLE_LOOP))
1193 if (dump_file && (dump_flags & TDF_DETAILS))
1194 fprintf (dump_file, "Difficult to handle edges\n");
1195 return false;
1198 return true;
1201 /* Return true when all predecessor blocks of BB are visited. The
1202 VISITED bitmap keeps track of the visited blocks. */
1204 static bool
1205 pred_blocks_visited_p (basic_block bb, bitmap *visited)
1207 edge e;
1208 edge_iterator ei;
1209 FOR_EACH_EDGE (e, ei, bb->preds)
1210 if (!bitmap_bit_p (*visited, e->src->index))
1211 return false;
1213 return true;
1216 /* Get body of a LOOP in suitable order for if-conversion. It is
1217 caller's responsibility to deallocate basic block list.
1218 If-conversion suitable order is, breadth first sort (BFS) order
1219 with an additional constraint: select a block only if all its
1220 predecessors are already selected. */
1222 static basic_block *
1223 get_loop_body_in_if_conv_order (const class loop *loop)
1225 basic_block *blocks, *blocks_in_bfs_order;
1226 basic_block bb;
1227 bitmap visited;
1228 unsigned int index = 0;
1229 unsigned int visited_count = 0;
1231 gcc_assert (loop->num_nodes);
1232 gcc_assert (loop->latch != EXIT_BLOCK_PTR_FOR_FN (cfun));
1234 blocks = XCNEWVEC (basic_block, loop->num_nodes);
1235 visited = BITMAP_ALLOC (NULL);
1237 blocks_in_bfs_order = get_loop_body_in_bfs_order (loop);
1239 index = 0;
1240 while (index < loop->num_nodes)
1242 bb = blocks_in_bfs_order [index];
1244 if (bb->flags & BB_IRREDUCIBLE_LOOP)
1246 free (blocks_in_bfs_order);
1247 BITMAP_FREE (visited);
1248 free (blocks);
1249 return NULL;
1252 if (!bitmap_bit_p (visited, bb->index))
1254 if (pred_blocks_visited_p (bb, &visited)
1255 || bb == loop->header)
1257 /* This block is now visited. */
1258 bitmap_set_bit (visited, bb->index);
1259 blocks[visited_count++] = bb;
1263 index++;
1265 if (index == loop->num_nodes
1266 && visited_count != loop->num_nodes)
1267 /* Not done yet. */
1268 index = 0;
1270 free (blocks_in_bfs_order);
1271 BITMAP_FREE (visited);
1272 return blocks;
1275 /* Returns true when the analysis of the predicates for all the basic
1276 blocks in LOOP succeeded.
1278 predicate_bbs first allocates the predicates of the basic blocks.
1279 These fields are then initialized with the tree expressions
1280 representing the predicates under which a basic block is executed
1281 in the LOOP. As the loop->header is executed at each iteration, it
1282 has the "true" predicate. Other statements executed under a
1283 condition are predicated with that condition, for example
1285 | if (x)
1286 | S1;
1287 | else
1288 | S2;
1290 S1 will be predicated with "x", and
1291 S2 will be predicated with "!x". */
1293 static void
1294 predicate_bbs (loop_p loop)
1296 unsigned int i;
1298 for (i = 0; i < loop->num_nodes; i++)
1299 init_bb_predicate (ifc_bbs[i]);
1301 for (i = 0; i < loop->num_nodes; i++)
1303 basic_block bb = ifc_bbs[i];
1304 tree cond;
1305 gimple *stmt;
1307 /* The loop latch and loop exit block are always executed and
1308 have no extra conditions to be processed: skip them. */
1309 if (bb == loop->latch
1310 || bb_with_exit_edge_p (loop, bb))
1312 reset_bb_predicate (bb);
1313 continue;
1316 cond = bb_predicate (bb);
1317 stmt = last_stmt (bb);
1318 if (stmt && gimple_code (stmt) == GIMPLE_COND)
1320 tree c2;
1321 edge true_edge, false_edge;
1322 location_t loc = gimple_location (stmt);
1323 tree c;
1324 /* gcc.dg/fold-bopcond-1.c shows that despite all forwprop passes
1325 conditions can remain unfolded because of multiple uses so
1326 try to re-fold here, especially to get precision changing
1327 conversions sorted out. Do not simply fold the stmt since
1328 this is analysis only. When conditions were embedded in
1329 COND_EXPRs those were folded separately before folding the
1330 COND_EXPR but as they are now outside we have to make sure
1331 to fold them. Do it here - another opportunity would be to
1332 fold predicates as they are inserted. */
1333 gimple_match_op cexpr (gimple_match_cond::UNCOND,
1334 gimple_cond_code (stmt),
1335 boolean_type_node,
1336 gimple_cond_lhs (stmt),
1337 gimple_cond_rhs (stmt));
1338 if (cexpr.resimplify (NULL, follow_all_ssa_edges)
1339 && cexpr.code.is_tree_code ()
1340 && TREE_CODE_CLASS ((tree_code)cexpr.code) == tcc_comparison)
1341 c = build2_loc (loc, (tree_code)cexpr.code, boolean_type_node,
1342 cexpr.ops[0], cexpr.ops[1]);
1343 else
1344 c = build2_loc (loc, gimple_cond_code (stmt),
1345 boolean_type_node,
1346 gimple_cond_lhs (stmt),
1347 gimple_cond_rhs (stmt));
1349 /* Add new condition into destination's predicate list. */
1350 extract_true_false_edges_from_block (gimple_bb (stmt),
1351 &true_edge, &false_edge);
1353 /* If C is true, then TRUE_EDGE is taken. */
1354 add_to_dst_predicate_list (loop, true_edge, unshare_expr (cond),
1355 unshare_expr (c));
1357 /* If C is false, then FALSE_EDGE is taken. */
1358 c2 = build1_loc (loc, TRUTH_NOT_EXPR, boolean_type_node,
1359 unshare_expr (c));
1360 add_to_dst_predicate_list (loop, false_edge,
1361 unshare_expr (cond), c2);
1363 cond = NULL_TREE;
1366 /* If current bb has only one successor, then consider it as an
1367 unconditional goto. */
1368 if (single_succ_p (bb))
1370 basic_block bb_n = single_succ (bb);
1372 /* The successor bb inherits the predicate of its
1373 predecessor. If there is no predicate in the predecessor
1374 bb, then consider the successor bb as always executed. */
1375 if (cond == NULL_TREE)
1376 cond = boolean_true_node;
1378 add_to_predicate_list (loop, bb_n, cond);
1382 /* The loop header is always executed. */
1383 reset_bb_predicate (loop->header);
1384 gcc_assert (bb_predicate_gimplified_stmts (loop->header) == NULL
1385 && bb_predicate_gimplified_stmts (loop->latch) == NULL);
1388 /* Build region by adding loop pre-header and post-header blocks. */
1390 static vec<basic_block>
1391 build_region (class loop *loop)
1393 vec<basic_block> region = vNULL;
1394 basic_block exit_bb = NULL;
1396 gcc_assert (ifc_bbs);
1397 /* The first element is loop pre-header. */
1398 region.safe_push (loop_preheader_edge (loop)->src);
1400 for (unsigned int i = 0; i < loop->num_nodes; i++)
1402 basic_block bb = ifc_bbs[i];
1403 region.safe_push (bb);
1404 /* Find loop postheader. */
1405 edge e;
1406 edge_iterator ei;
1407 FOR_EACH_EDGE (e, ei, bb->succs)
1408 if (loop_exit_edge_p (loop, e))
1410 exit_bb = e->dest;
1411 break;
1414 /* The last element is loop post-header. */
1415 gcc_assert (exit_bb);
1416 region.safe_push (exit_bb);
1417 return region;
1420 /* Return true when LOOP is if-convertible. This is a helper function
1421 for if_convertible_loop_p. REFS and DDRS are initialized and freed
1422 in if_convertible_loop_p. */
1424 static bool
1425 if_convertible_loop_p_1 (class loop *loop, vec<data_reference_p> *refs)
1427 unsigned int i;
1428 basic_block exit_bb = NULL;
1429 vec<basic_block> region;
1431 calculate_dominance_info (CDI_DOMINATORS);
1433 for (i = 0; i < loop->num_nodes; i++)
1435 basic_block bb = ifc_bbs[i];
1437 if (!if_convertible_bb_p (loop, bb, exit_bb))
1438 return false;
1440 if (bb_with_exit_edge_p (loop, bb))
1441 exit_bb = bb;
1444 for (i = 0; i < loop->num_nodes; i++)
1446 basic_block bb = ifc_bbs[i];
1447 gimple_stmt_iterator gsi;
1449 bool may_have_nonlocal_labels
1450 = bb_with_exit_edge_p (loop, bb) || bb == loop->latch;
1451 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1452 switch (gimple_code (gsi_stmt (gsi)))
1454 case GIMPLE_LABEL:
1455 if (!may_have_nonlocal_labels)
1457 tree label
1458 = gimple_label_label (as_a <glabel *> (gsi_stmt (gsi)));
1459 if (DECL_NONLOCAL (label) || FORCED_LABEL (label))
1460 return false;
1462 /* Fallthru. */
1463 case GIMPLE_ASSIGN:
1464 case GIMPLE_CALL:
1465 case GIMPLE_DEBUG:
1466 case GIMPLE_COND:
1467 gimple_set_uid (gsi_stmt (gsi), 0);
1468 break;
1469 default:
1470 return false;
1474 data_reference_p dr;
1476 innermost_DR_map
1477 = new hash_map<innermost_loop_behavior_hash, data_reference_p>;
1478 baseref_DR_map = new hash_map<tree_operand_hash, data_reference_p>;
1480 /* Compute post-dominator tree locally. */
1481 region = build_region (loop);
1482 calculate_dominance_info_for_region (CDI_POST_DOMINATORS, region);
1484 predicate_bbs (loop);
1486 /* Free post-dominator tree since it is not used after predication. */
1487 free_dominance_info_for_region (cfun, CDI_POST_DOMINATORS, region);
1488 region.release ();
1490 for (i = 0; refs->iterate (i, &dr); i++)
1492 tree ref = DR_REF (dr);
1494 dr->aux = XNEW (struct ifc_dr);
1495 DR_BASE_W_UNCONDITIONALLY (dr) = false;
1496 DR_RW_UNCONDITIONALLY (dr) = false;
1497 DR_W_UNCONDITIONALLY (dr) = false;
1498 IFC_DR (dr)->rw_predicate = boolean_false_node;
1499 IFC_DR (dr)->w_predicate = boolean_false_node;
1500 IFC_DR (dr)->base_w_predicate = boolean_false_node;
1501 if (gimple_uid (DR_STMT (dr)) == 0)
1502 gimple_set_uid (DR_STMT (dr), i + 1);
1504 /* If DR doesn't have innermost loop behavior or it's a compound
1505 memory reference, we synthesize its innermost loop behavior
1506 for hashing. */
1507 if (TREE_CODE (ref) == COMPONENT_REF
1508 || TREE_CODE (ref) == IMAGPART_EXPR
1509 || TREE_CODE (ref) == REALPART_EXPR
1510 || !(DR_BASE_ADDRESS (dr) || DR_OFFSET (dr)
1511 || DR_INIT (dr) || DR_STEP (dr)))
1513 while (TREE_CODE (ref) == COMPONENT_REF
1514 || TREE_CODE (ref) == IMAGPART_EXPR
1515 || TREE_CODE (ref) == REALPART_EXPR)
1516 ref = TREE_OPERAND (ref, 0);
1518 memset (&DR_INNERMOST (dr), 0, sizeof (DR_INNERMOST (dr)));
1519 DR_BASE_ADDRESS (dr) = ref;
1521 hash_memrefs_baserefs_and_store_DRs_read_written_info (dr);
1524 for (i = 0; i < loop->num_nodes; i++)
1526 basic_block bb = ifc_bbs[i];
1527 gimple_stmt_iterator itr;
1529 /* Check the if-convertibility of statements in predicated BBs. */
1530 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1531 for (itr = gsi_start_bb (bb); !gsi_end_p (itr); gsi_next (&itr))
1532 if (!if_convertible_stmt_p (gsi_stmt (itr), *refs))
1533 return false;
1536 /* Checking PHIs needs to be done after stmts, as the fact whether there
1537 are any masked loads or stores affects the tests. */
1538 for (i = 0; i < loop->num_nodes; i++)
1540 basic_block bb = ifc_bbs[i];
1541 gphi_iterator itr;
1543 for (itr = gsi_start_phis (bb); !gsi_end_p (itr); gsi_next (&itr))
1544 if (!if_convertible_phi_p (loop, bb, itr.phi ()))
1545 return false;
1548 if (dump_file)
1549 fprintf (dump_file, "Applying if-conversion\n");
1551 return true;
1554 /* Return true when LOOP is if-convertible.
1555 LOOP is if-convertible if:
1556 - it is innermost,
1557 - it has two or more basic blocks,
1558 - it has only one exit,
1559 - loop header is not the exit edge,
1560 - if its basic blocks and phi nodes are if convertible. */
1562 static bool
1563 if_convertible_loop_p (class loop *loop, vec<data_reference_p> *refs)
1565 edge e;
1566 edge_iterator ei;
1567 bool res = false;
1569 /* Handle only innermost loop. */
1570 if (!loop || loop->inner)
1572 if (dump_file && (dump_flags & TDF_DETAILS))
1573 fprintf (dump_file, "not innermost loop\n");
1574 return false;
1577 /* If only one block, no need for if-conversion. */
1578 if (loop->num_nodes <= 2)
1580 if (dump_file && (dump_flags & TDF_DETAILS))
1581 fprintf (dump_file, "less than 2 basic blocks\n");
1582 return false;
1585 /* More than one loop exit is too much to handle. */
1586 if (!single_exit (loop))
1588 if (dump_file && (dump_flags & TDF_DETAILS))
1589 fprintf (dump_file, "multiple exits\n");
1590 return false;
1593 /* If one of the loop header's edge is an exit edge then do not
1594 apply if-conversion. */
1595 FOR_EACH_EDGE (e, ei, loop->header->succs)
1596 if (loop_exit_edge_p (loop, e))
1597 return false;
1599 res = if_convertible_loop_p_1 (loop, refs);
1601 delete innermost_DR_map;
1602 innermost_DR_map = NULL;
1604 delete baseref_DR_map;
1605 baseref_DR_map = NULL;
1607 return res;
1610 /* Return reduc_1 if has_nop.
1612 if (...)
1613 tmp1 = (unsigned type) reduc_1;
1614 tmp2 = tmp1 + rhs2;
1615 reduc_3 = (signed type) tmp2. */
1616 static tree
1617 strip_nop_cond_scalar_reduction (bool has_nop, tree op)
1619 if (!has_nop)
1620 return op;
1622 if (TREE_CODE (op) != SSA_NAME)
1623 return NULL_TREE;
1625 gassign *stmt = safe_dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op));
1626 if (!stmt
1627 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt))
1628 || !tree_nop_conversion_p (TREE_TYPE (op), TREE_TYPE
1629 (gimple_assign_rhs1 (stmt))))
1630 return NULL_TREE;
1632 return gimple_assign_rhs1 (stmt);
1635 /* Returns true if def-stmt for phi argument ARG is simple increment/decrement
1636 which is in predicated basic block.
1637 In fact, the following PHI pattern is searching:
1638 loop-header:
1639 reduc_1 = PHI <..., reduc_2>
1641 if (...)
1642 reduc_3 = ...
1643 reduc_2 = PHI <reduc_1, reduc_3>
1645 ARG_0 and ARG_1 are correspondent PHI arguments.
1646 REDUC, OP0 and OP1 contain reduction stmt and its operands.
1647 EXTENDED is true if PHI has > 2 arguments. */
1649 static bool
1650 is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1,
1651 tree *op0, tree *op1, bool extended, bool* has_nop,
1652 gimple **nop_reduc)
1654 tree lhs, r_op1, r_op2, r_nop1, r_nop2;
1655 gimple *stmt;
1656 gimple *header_phi = NULL;
1657 enum tree_code reduction_op;
1658 basic_block bb = gimple_bb (phi);
1659 class loop *loop = bb->loop_father;
1660 edge latch_e = loop_latch_edge (loop);
1661 imm_use_iterator imm_iter;
1662 use_operand_p use_p;
1663 edge e;
1664 edge_iterator ei;
1665 bool result = *has_nop = false;
1666 if (TREE_CODE (arg_0) != SSA_NAME || TREE_CODE (arg_1) != SSA_NAME)
1667 return false;
1669 if (!extended && gimple_code (SSA_NAME_DEF_STMT (arg_0)) == GIMPLE_PHI)
1671 lhs = arg_1;
1672 header_phi = SSA_NAME_DEF_STMT (arg_0);
1673 stmt = SSA_NAME_DEF_STMT (arg_1);
1675 else if (gimple_code (SSA_NAME_DEF_STMT (arg_1)) == GIMPLE_PHI)
1677 lhs = arg_0;
1678 header_phi = SSA_NAME_DEF_STMT (arg_1);
1679 stmt = SSA_NAME_DEF_STMT (arg_0);
1681 else
1682 return false;
1683 if (gimple_bb (header_phi) != loop->header)
1684 return false;
1686 if (PHI_ARG_DEF_FROM_EDGE (header_phi, latch_e) != PHI_RESULT (phi))
1687 return false;
1689 if (gimple_code (stmt) != GIMPLE_ASSIGN
1690 || gimple_has_volatile_ops (stmt))
1691 return false;
1693 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
1694 return false;
1696 if (!is_predicated (gimple_bb (stmt)))
1697 return false;
1699 /* Check that stmt-block is predecessor of phi-block. */
1700 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->succs)
1701 if (e->dest == bb)
1703 result = true;
1704 break;
1706 if (!result)
1707 return false;
1709 if (!has_single_use (lhs))
1710 return false;
1712 reduction_op = gimple_assign_rhs_code (stmt);
1714 /* Catch something like below
1716 loop-header:
1717 reduc_1 = PHI <..., reduc_2>
1719 if (...)
1720 tmp1 = (unsigned type) reduc_1;
1721 tmp2 = tmp1 + rhs2;
1722 reduc_3 = (signed type) tmp2;
1724 reduc_2 = PHI <reduc_1, reduc_3>
1726 and convert to
1728 reduc_2 = PHI <0, reduc_3>
1729 tmp1 = (unsigned type)reduce_1;
1730 ifcvt = cond_expr ? rhs2 : 0
1731 tmp2 = tmp1 +/- ifcvt;
1732 reduce_1 = (signed type)tmp2; */
1734 if (CONVERT_EXPR_CODE_P (reduction_op))
1736 lhs = gimple_assign_rhs1 (stmt);
1737 if (TREE_CODE (lhs) != SSA_NAME
1738 || !has_single_use (lhs))
1739 return false;
1741 *nop_reduc = stmt;
1742 stmt = SSA_NAME_DEF_STMT (lhs);
1743 if (gimple_bb (stmt) != gimple_bb (*nop_reduc)
1744 || !is_gimple_assign (stmt))
1745 return false;
1747 *has_nop = true;
1748 reduction_op = gimple_assign_rhs_code (stmt);
1751 if (reduction_op != PLUS_EXPR
1752 && reduction_op != MINUS_EXPR
1753 && reduction_op != MULT_EXPR
1754 && reduction_op != BIT_IOR_EXPR
1755 && reduction_op != BIT_XOR_EXPR
1756 && reduction_op != BIT_AND_EXPR)
1757 return false;
1758 r_op1 = gimple_assign_rhs1 (stmt);
1759 r_op2 = gimple_assign_rhs2 (stmt);
1761 r_nop1 = strip_nop_cond_scalar_reduction (*has_nop, r_op1);
1762 r_nop2 = strip_nop_cond_scalar_reduction (*has_nop, r_op2);
1764 /* Make R_OP1 to hold reduction variable. */
1765 if (r_nop2 == PHI_RESULT (header_phi)
1766 && commutative_tree_code (reduction_op))
1768 std::swap (r_op1, r_op2);
1769 std::swap (r_nop1, r_nop2);
1771 else if (r_nop1 != PHI_RESULT (header_phi))
1772 return false;
1774 if (*has_nop)
1776 /* Check that R_NOP1 is used in nop_stmt or in PHI only. */
1777 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_nop1)
1779 gimple *use_stmt = USE_STMT (use_p);
1780 if (is_gimple_debug (use_stmt))
1781 continue;
1782 if (use_stmt == SSA_NAME_DEF_STMT (r_op1))
1783 continue;
1784 if (use_stmt != phi)
1785 return false;
1789 /* Check that R_OP1 is used in reduction stmt or in PHI only. */
1790 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_op1)
1792 gimple *use_stmt = USE_STMT (use_p);
1793 if (is_gimple_debug (use_stmt))
1794 continue;
1795 if (use_stmt == stmt)
1796 continue;
1797 if (gimple_code (use_stmt) != GIMPLE_PHI)
1798 return false;
1801 *op0 = r_op1; *op1 = r_op2;
1802 *reduc = stmt;
1803 return true;
1806 /* Converts conditional scalar reduction into unconditional form, e.g.
1807 bb_4
1808 if (_5 != 0) goto bb_5 else goto bb_6
1809 end_bb_4
1810 bb_5
1811 res_6 = res_13 + 1;
1812 end_bb_5
1813 bb_6
1814 # res_2 = PHI <res_13(4), res_6(5)>
1815 end_bb_6
1817 will be converted into sequence
1818 _ifc__1 = _5 != 0 ? 1 : 0;
1819 res_2 = res_13 + _ifc__1;
1820 Argument SWAP tells that arguments of conditional expression should be
1821 swapped.
1822 Returns rhs of resulting PHI assignment. */
1824 static tree
1825 convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi,
1826 tree cond, tree op0, tree op1, bool swap,
1827 bool has_nop, gimple* nop_reduc)
1829 gimple_stmt_iterator stmt_it;
1830 gimple *new_assign;
1831 tree rhs;
1832 tree rhs1 = gimple_assign_rhs1 (reduc);
1833 tree tmp = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, "_ifc_");
1834 tree c;
1835 enum tree_code reduction_op = gimple_assign_rhs_code (reduc);
1836 tree op_nochange = neutral_op_for_reduction (TREE_TYPE (rhs1), reduction_op, NULL);
1837 gimple_seq stmts = NULL;
1839 if (dump_file && (dump_flags & TDF_DETAILS))
1841 fprintf (dump_file, "Found cond scalar reduction.\n");
1842 print_gimple_stmt (dump_file, reduc, 0, TDF_SLIM);
1845 /* Build cond expression using COND and constant operand
1846 of reduction rhs. */
1847 c = fold_build_cond_expr (TREE_TYPE (rhs1),
1848 unshare_expr (cond),
1849 swap ? op_nochange : op1,
1850 swap ? op1 : op_nochange);
1852 /* Create assignment stmt and insert it at GSI. */
1853 new_assign = gimple_build_assign (tmp, c);
1854 gsi_insert_before (gsi, new_assign, GSI_SAME_STMT);
1855 /* Build rhs for unconditional increment/decrement/logic_operation. */
1856 rhs = gimple_build (&stmts, reduction_op,
1857 TREE_TYPE (rhs1), op0, tmp);
1859 if (has_nop)
1861 rhs = gimple_convert (&stmts,
1862 TREE_TYPE (gimple_assign_lhs (nop_reduc)), rhs);
1863 stmt_it = gsi_for_stmt (nop_reduc);
1864 gsi_remove (&stmt_it, true);
1865 release_defs (nop_reduc);
1867 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
1869 /* Delete original reduction stmt. */
1870 stmt_it = gsi_for_stmt (reduc);
1871 gsi_remove (&stmt_it, true);
1872 release_defs (reduc);
1873 return rhs;
1876 /* Produce condition for all occurrences of ARG in PHI node. */
1878 static tree
1879 gen_phi_arg_condition (gphi *phi, vec<int> *occur,
1880 gimple_stmt_iterator *gsi)
1882 int len;
1883 int i;
1884 tree cond = NULL_TREE;
1885 tree c;
1886 edge e;
1888 len = occur->length ();
1889 gcc_assert (len > 0);
1890 for (i = 0; i < len; i++)
1892 e = gimple_phi_arg_edge (phi, (*occur)[i]);
1893 c = bb_predicate (e->src);
1894 if (is_true_predicate (c))
1896 cond = c;
1897 break;
1899 c = force_gimple_operand_gsi (gsi, unshare_expr (c),
1900 true, NULL_TREE, true, GSI_SAME_STMT);
1901 if (cond != NULL_TREE)
1903 /* Must build OR expression. */
1904 cond = fold_or_predicates (EXPR_LOCATION (c), c, cond);
1905 cond = force_gimple_operand_gsi (gsi, unshare_expr (cond), true,
1906 NULL_TREE, true, GSI_SAME_STMT);
1908 else
1909 cond = c;
1911 gcc_assert (cond != NULL_TREE);
1912 return cond;
1915 /* Replace a scalar PHI node with a COND_EXPR using COND as condition.
1916 This routine can handle PHI nodes with more than two arguments.
1918 For example,
1919 S1: A = PHI <x1(1), x2(5)>
1920 is converted into,
1921 S2: A = cond ? x1 : x2;
1923 The generated code is inserted at GSI that points to the top of
1924 basic block's statement list.
1925 If PHI node has more than two arguments a chain of conditional
1926 expression is produced. */
1929 static void
1930 predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
1932 gimple *new_stmt = NULL, *reduc, *nop_reduc;
1933 tree rhs, res, arg0, arg1, op0, op1, scev;
1934 tree cond;
1935 unsigned int index0;
1936 unsigned int max, args_len;
1937 edge e;
1938 basic_block bb;
1939 unsigned int i;
1940 bool has_nop;
1942 res = gimple_phi_result (phi);
1943 if (virtual_operand_p (res))
1944 return;
1946 if ((rhs = degenerate_phi_result (phi))
1947 || ((scev = analyze_scalar_evolution (gimple_bb (phi)->loop_father,
1948 res))
1949 && !chrec_contains_undetermined (scev)
1950 && scev != res
1951 && (rhs = gimple_phi_arg_def (phi, 0))))
1953 if (dump_file && (dump_flags & TDF_DETAILS))
1955 fprintf (dump_file, "Degenerate phi!\n");
1956 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
1958 new_stmt = gimple_build_assign (res, rhs);
1959 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1960 update_stmt (new_stmt);
1961 return;
1964 bb = gimple_bb (phi);
1965 if (EDGE_COUNT (bb->preds) == 2)
1967 /* Predicate ordinary PHI node with 2 arguments. */
1968 edge first_edge, second_edge;
1969 basic_block true_bb;
1970 first_edge = EDGE_PRED (bb, 0);
1971 second_edge = EDGE_PRED (bb, 1);
1972 cond = bb_predicate (first_edge->src);
1973 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1974 std::swap (first_edge, second_edge);
1975 if (EDGE_COUNT (first_edge->src->succs) > 1)
1977 cond = bb_predicate (second_edge->src);
1978 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1979 cond = TREE_OPERAND (cond, 0);
1980 else
1981 first_edge = second_edge;
1983 else
1984 cond = bb_predicate (first_edge->src);
1985 /* Gimplify the condition to a valid cond-expr conditonal operand. */
1986 cond = force_gimple_operand_gsi (gsi, unshare_expr (cond), true,
1987 NULL_TREE, true, GSI_SAME_STMT);
1988 true_bb = first_edge->src;
1989 if (EDGE_PRED (bb, 1)->src == true_bb)
1991 arg0 = gimple_phi_arg_def (phi, 1);
1992 arg1 = gimple_phi_arg_def (phi, 0);
1994 else
1996 arg0 = gimple_phi_arg_def (phi, 0);
1997 arg1 = gimple_phi_arg_def (phi, 1);
1999 if (is_cond_scalar_reduction (phi, &reduc, arg0, arg1,
2000 &op0, &op1, false, &has_nop,
2001 &nop_reduc))
2003 /* Convert reduction stmt into vectorizable form. */
2004 rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
2005 true_bb != gimple_bb (reduc),
2006 has_nop, nop_reduc);
2007 redundant_ssa_names.safe_push (std::make_pair (res, rhs));
2009 else
2010 /* Build new RHS using selected condition and arguments. */
2011 rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
2012 arg0, arg1);
2013 new_stmt = gimple_build_assign (res, rhs);
2014 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2015 gimple_stmt_iterator new_gsi = gsi_for_stmt (new_stmt);
2016 if (fold_stmt (&new_gsi, follow_all_ssa_edges))
2018 new_stmt = gsi_stmt (new_gsi);
2019 update_stmt (new_stmt);
2022 if (dump_file && (dump_flags & TDF_DETAILS))
2024 fprintf (dump_file, "new phi replacement stmt\n");
2025 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
2027 return;
2030 /* Create hashmap for PHI node which contain vector of argument indexes
2031 having the same value. */
2032 bool swap = false;
2033 hash_map<tree_operand_hash, auto_vec<int> > phi_arg_map;
2034 unsigned int num_args = gimple_phi_num_args (phi);
2035 int max_ind = -1;
2036 /* Vector of different PHI argument values. */
2037 auto_vec<tree> args (num_args);
2039 /* Compute phi_arg_map. */
2040 for (i = 0; i < num_args; i++)
2042 tree arg;
2044 arg = gimple_phi_arg_def (phi, i);
2045 if (!phi_arg_map.get (arg))
2046 args.quick_push (arg);
2047 phi_arg_map.get_or_insert (arg).safe_push (i);
2050 /* Determine element with max number of occurrences. */
2051 max_ind = -1;
2052 max = 1;
2053 args_len = args.length ();
2054 for (i = 0; i < args_len; i++)
2056 unsigned int len;
2057 if ((len = phi_arg_map.get (args[i])->length ()) > max)
2059 max_ind = (int) i;
2060 max = len;
2064 /* Put element with max number of occurences to the end of ARGS. */
2065 if (max_ind != -1 && max_ind +1 != (int) args_len)
2066 std::swap (args[args_len - 1], args[max_ind]);
2068 /* Handle one special case when number of arguments with different values
2069 is equal 2 and one argument has the only occurrence. Such PHI can be
2070 handled as if would have only 2 arguments. */
2071 if (args_len == 2 && phi_arg_map.get (args[0])->length () == 1)
2073 vec<int> *indexes;
2074 indexes = phi_arg_map.get (args[0]);
2075 index0 = (*indexes)[0];
2076 arg0 = args[0];
2077 arg1 = args[1];
2078 e = gimple_phi_arg_edge (phi, index0);
2079 cond = bb_predicate (e->src);
2080 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2082 swap = true;
2083 cond = TREE_OPERAND (cond, 0);
2085 /* Gimplify the condition to a valid cond-expr conditonal operand. */
2086 cond = force_gimple_operand_gsi (gsi, unshare_expr (cond), true,
2087 NULL_TREE, true, GSI_SAME_STMT);
2088 if (!(is_cond_scalar_reduction (phi, &reduc, arg0 , arg1,
2089 &op0, &op1, true, &has_nop, &nop_reduc)))
2090 rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
2091 swap? arg1 : arg0,
2092 swap? arg0 : arg1);
2093 else
2095 /* Convert reduction stmt into vectorizable form. */
2096 rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
2097 swap,has_nop, nop_reduc);
2098 redundant_ssa_names.safe_push (std::make_pair (res, rhs));
2100 new_stmt = gimple_build_assign (res, rhs);
2101 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2102 update_stmt (new_stmt);
2104 else
2106 /* Common case. */
2107 vec<int> *indexes;
2108 tree type = TREE_TYPE (gimple_phi_result (phi));
2109 tree lhs;
2110 arg1 = args[1];
2111 for (i = 0; i < args_len; i++)
2113 arg0 = args[i];
2114 indexes = phi_arg_map.get (args[i]);
2115 if (i != args_len - 1)
2116 lhs = make_temp_ssa_name (type, NULL, "_ifc_");
2117 else
2118 lhs = res;
2119 cond = gen_phi_arg_condition (phi, indexes, gsi);
2120 rhs = fold_build_cond_expr (type, unshare_expr (cond),
2121 arg0, arg1);
2122 new_stmt = gimple_build_assign (lhs, rhs);
2123 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2124 update_stmt (new_stmt);
2125 arg1 = lhs;
2129 if (dump_file && (dump_flags & TDF_DETAILS))
2131 fprintf (dump_file, "new extended phi replacement stmt\n");
2132 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
2136 /* Replaces in LOOP all the scalar phi nodes other than those in the
2137 LOOP->header block with conditional modify expressions. */
2139 static void
2140 predicate_all_scalar_phis (class loop *loop)
2142 basic_block bb;
2143 unsigned int orig_loop_num_nodes = loop->num_nodes;
2144 unsigned int i;
2146 for (i = 1; i < orig_loop_num_nodes; i++)
2148 gphi *phi;
2149 gimple_stmt_iterator gsi;
2150 gphi_iterator phi_gsi;
2151 bb = ifc_bbs[i];
2153 if (bb == loop->header)
2154 continue;
2156 phi_gsi = gsi_start_phis (bb);
2157 if (gsi_end_p (phi_gsi))
2158 continue;
2160 gsi = gsi_after_labels (bb);
2161 while (!gsi_end_p (phi_gsi))
2163 phi = phi_gsi.phi ();
2164 if (virtual_operand_p (gimple_phi_result (phi)))
2165 gsi_next (&phi_gsi);
2166 else
2168 predicate_scalar_phi (phi, &gsi);
2169 remove_phi_node (&phi_gsi, false);
2175 /* Insert in each basic block of LOOP the statements produced by the
2176 gimplification of the predicates. */
2178 static void
2179 insert_gimplified_predicates (loop_p loop)
2181 unsigned int i;
2183 for (i = 0; i < loop->num_nodes; i++)
2185 basic_block bb = ifc_bbs[i];
2186 gimple_seq stmts;
2187 if (!is_predicated (bb))
2188 gcc_assert (bb_predicate_gimplified_stmts (bb) == NULL);
2189 if (!is_predicated (bb))
2191 /* Do not insert statements for a basic block that is not
2192 predicated. Also make sure that the predicate of the
2193 basic block is set to true. */
2194 reset_bb_predicate (bb);
2195 continue;
2198 stmts = bb_predicate_gimplified_stmts (bb);
2199 if (stmts)
2201 if (need_to_predicate)
2203 /* Insert the predicate of the BB just after the label,
2204 as the if-conversion of memory writes will use this
2205 predicate. */
2206 gimple_stmt_iterator gsi = gsi_after_labels (bb);
2207 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2209 else
2211 /* Insert the predicate of the BB at the end of the BB
2212 as this would reduce the register pressure: the only
2213 use of this predicate will be in successor BBs. */
2214 gimple_stmt_iterator gsi = gsi_last_bb (bb);
2216 if (gsi_end_p (gsi)
2217 || stmt_ends_bb_p (gsi_stmt (gsi)))
2218 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2219 else
2220 gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
2223 /* Once the sequence is code generated, set it to NULL. */
2224 set_bb_predicate_gimplified_stmts (bb, NULL);
2229 /* Helper function for predicate_statements. Returns index of existent
2230 mask if it was created for given SIZE and -1 otherwise. */
2232 static int
2233 mask_exists (int size, const vec<int> &vec)
2235 unsigned int ix;
2236 int v;
2237 FOR_EACH_VEC_ELT (vec, ix, v)
2238 if (v == size)
2239 return (int) ix;
2240 return -1;
2243 /* Helper function for predicate_statements. STMT is a memory read or
2244 write and it needs to be predicated by MASK. Return a statement
2245 that does so. */
2247 static gimple *
2248 predicate_load_or_store (gimple_stmt_iterator *gsi, gassign *stmt, tree mask)
2250 gcall *new_stmt;
2252 tree lhs = gimple_assign_lhs (stmt);
2253 tree rhs = gimple_assign_rhs1 (stmt);
2254 tree ref = TREE_CODE (lhs) == SSA_NAME ? rhs : lhs;
2255 mark_addressable (ref);
2256 tree addr = force_gimple_operand_gsi (gsi, build_fold_addr_expr (ref),
2257 true, NULL_TREE, true, GSI_SAME_STMT);
2258 tree ptr = build_int_cst (reference_alias_ptr_type (ref),
2259 get_object_alignment (ref));
2260 /* Copy points-to info if possible. */
2261 if (TREE_CODE (addr) == SSA_NAME && !SSA_NAME_PTR_INFO (addr))
2262 copy_ref_info (build2 (MEM_REF, TREE_TYPE (ref), addr, ptr),
2263 ref);
2264 if (TREE_CODE (lhs) == SSA_NAME)
2266 new_stmt
2267 = gimple_build_call_internal (IFN_MASK_LOAD, 3, addr,
2268 ptr, mask);
2269 gimple_call_set_lhs (new_stmt, lhs);
2270 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
2272 else
2274 new_stmt
2275 = gimple_build_call_internal (IFN_MASK_STORE, 4, addr, ptr,
2276 mask, rhs);
2277 gimple_move_vops (new_stmt, stmt);
2279 gimple_call_set_nothrow (new_stmt, true);
2280 return new_stmt;
2283 /* STMT uses OP_LHS. Check whether it is equivalent to:
2285 ... = OP_MASK ? OP_LHS : X;
2287 Return X if so, otherwise return null. OP_MASK is an SSA_NAME that is
2288 known to have value OP_COND. */
2290 static tree
2291 check_redundant_cond_expr (gimple *stmt, tree op_mask, tree op_cond,
2292 tree op_lhs)
2294 gassign *assign = dyn_cast <gassign *> (stmt);
2295 if (!assign || gimple_assign_rhs_code (assign) != COND_EXPR)
2296 return NULL_TREE;
2298 tree use_cond = gimple_assign_rhs1 (assign);
2299 tree if_true = gimple_assign_rhs2 (assign);
2300 tree if_false = gimple_assign_rhs3 (assign);
2302 if ((use_cond == op_mask || operand_equal_p (use_cond, op_cond, 0))
2303 && if_true == op_lhs)
2304 return if_false;
2306 if (inverse_conditions_p (use_cond, op_cond) && if_false == op_lhs)
2307 return if_true;
2309 return NULL_TREE;
2312 /* Return true if VALUE is available for use at STMT. SSA_NAMES is
2313 the set of SSA names defined earlier in STMT's block. */
2315 static bool
2316 value_available_p (gimple *stmt, hash_set<tree_ssa_name_hash> *ssa_names,
2317 tree value)
2319 if (is_gimple_min_invariant (value))
2320 return true;
2322 if (TREE_CODE (value) == SSA_NAME)
2324 if (SSA_NAME_IS_DEFAULT_DEF (value))
2325 return true;
2327 basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (value));
2328 basic_block use_bb = gimple_bb (stmt);
2329 return (def_bb == use_bb
2330 ? ssa_names->contains (value)
2331 : dominated_by_p (CDI_DOMINATORS, use_bb, def_bb));
2334 return false;
2337 /* Helper function for predicate_statements. STMT is a potentially-trapping
2338 arithmetic operation that needs to be predicated by MASK, an SSA_NAME that
2339 has value COND. Return a statement that does so. SSA_NAMES is the set of
2340 SSA names defined earlier in STMT's block. */
2342 static gimple *
2343 predicate_rhs_code (gassign *stmt, tree mask, tree cond,
2344 hash_set<tree_ssa_name_hash> *ssa_names)
2346 tree lhs = gimple_assign_lhs (stmt);
2347 tree_code code = gimple_assign_rhs_code (stmt);
2348 unsigned int nops = gimple_num_ops (stmt);
2349 internal_fn cond_fn = get_conditional_internal_fn (code);
2351 /* Construct the arguments to the conditional internal function. */
2352 auto_vec<tree, 8> args;
2353 args.safe_grow (nops + 1, true);
2354 args[0] = mask;
2355 for (unsigned int i = 1; i < nops; ++i)
2356 args[i] = gimple_op (stmt, i);
2357 args[nops] = NULL_TREE;
2359 /* Look for uses of the result to see whether they are COND_EXPRs that can
2360 be folded into the conditional call. */
2361 imm_use_iterator imm_iter;
2362 gimple *use_stmt;
2363 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs)
2365 tree new_else = check_redundant_cond_expr (use_stmt, mask, cond, lhs);
2366 if (new_else && value_available_p (stmt, ssa_names, new_else))
2368 if (!args[nops])
2369 args[nops] = new_else;
2370 if (operand_equal_p (new_else, args[nops], 0))
2372 /* We have:
2374 LHS = IFN_COND (MASK, ..., ELSE);
2375 X = MASK ? LHS : ELSE;
2377 which makes X equivalent to LHS. */
2378 tree use_lhs = gimple_assign_lhs (use_stmt);
2379 redundant_ssa_names.safe_push (std::make_pair (use_lhs, lhs));
2383 if (!args[nops])
2384 args[nops] = targetm.preferred_else_value (cond_fn, TREE_TYPE (lhs),
2385 nops - 1, &args[1]);
2387 /* Create and insert the call. */
2388 gcall *new_stmt = gimple_build_call_internal_vec (cond_fn, args);
2389 gimple_call_set_lhs (new_stmt, lhs);
2390 gimple_call_set_nothrow (new_stmt, true);
2392 return new_stmt;
2395 /* Predicate each write to memory in LOOP.
2397 This function transforms control flow constructs containing memory
2398 writes of the form:
2400 | for (i = 0; i < N; i++)
2401 | if (cond)
2402 | A[i] = expr;
2404 into the following form that does not contain control flow:
2406 | for (i = 0; i < N; i++)
2407 | A[i] = cond ? expr : A[i];
2409 The original CFG looks like this:
2411 | bb_0
2412 | i = 0
2413 | end_bb_0
2415 | bb_1
2416 | if (i < N) goto bb_5 else goto bb_2
2417 | end_bb_1
2419 | bb_2
2420 | cond = some_computation;
2421 | if (cond) goto bb_3 else goto bb_4
2422 | end_bb_2
2424 | bb_3
2425 | A[i] = expr;
2426 | goto bb_4
2427 | end_bb_3
2429 | bb_4
2430 | goto bb_1
2431 | end_bb_4
2433 insert_gimplified_predicates inserts the computation of the COND
2434 expression at the beginning of the destination basic block:
2436 | bb_0
2437 | i = 0
2438 | end_bb_0
2440 | bb_1
2441 | if (i < N) goto bb_5 else goto bb_2
2442 | end_bb_1
2444 | bb_2
2445 | cond = some_computation;
2446 | if (cond) goto bb_3 else goto bb_4
2447 | end_bb_2
2449 | bb_3
2450 | cond = some_computation;
2451 | A[i] = expr;
2452 | goto bb_4
2453 | end_bb_3
2455 | bb_4
2456 | goto bb_1
2457 | end_bb_4
2459 predicate_statements is then predicating the memory write as follows:
2461 | bb_0
2462 | i = 0
2463 | end_bb_0
2465 | bb_1
2466 | if (i < N) goto bb_5 else goto bb_2
2467 | end_bb_1
2469 | bb_2
2470 | if (cond) goto bb_3 else goto bb_4
2471 | end_bb_2
2473 | bb_3
2474 | cond = some_computation;
2475 | A[i] = cond ? expr : A[i];
2476 | goto bb_4
2477 | end_bb_3
2479 | bb_4
2480 | goto bb_1
2481 | end_bb_4
2483 and finally combine_blocks removes the basic block boundaries making
2484 the loop vectorizable:
2486 | bb_0
2487 | i = 0
2488 | if (i < N) goto bb_5 else goto bb_1
2489 | end_bb_0
2491 | bb_1
2492 | cond = some_computation;
2493 | A[i] = cond ? expr : A[i];
2494 | if (i < N) goto bb_5 else goto bb_4
2495 | end_bb_1
2497 | bb_4
2498 | goto bb_1
2499 | end_bb_4
2502 static void
2503 predicate_statements (loop_p loop)
2505 unsigned int i, orig_loop_num_nodes = loop->num_nodes;
2506 auto_vec<int, 1> vect_sizes;
2507 auto_vec<tree, 1> vect_masks;
2508 hash_set<tree_ssa_name_hash> ssa_names;
2510 for (i = 1; i < orig_loop_num_nodes; i++)
2512 gimple_stmt_iterator gsi;
2513 basic_block bb = ifc_bbs[i];
2514 tree cond = bb_predicate (bb);
2515 bool swap;
2516 int index;
2518 if (is_true_predicate (cond))
2519 continue;
2521 swap = false;
2522 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2524 swap = true;
2525 cond = TREE_OPERAND (cond, 0);
2528 vect_sizes.truncate (0);
2529 vect_masks.truncate (0);
2531 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
2533 gassign *stmt = dyn_cast <gassign *> (gsi_stmt (gsi));
2534 tree lhs;
2535 if (!stmt)
2537 else if (is_false_predicate (cond)
2538 && gimple_vdef (stmt))
2540 unlink_stmt_vdef (stmt);
2541 gsi_remove (&gsi, true);
2542 release_defs (stmt);
2543 continue;
2545 else if (gimple_plf (stmt, GF_PLF_2)
2546 && is_gimple_assign (stmt))
2548 tree lhs = gimple_assign_lhs (stmt);
2549 tree mask;
2550 gimple *new_stmt;
2551 gimple_seq stmts = NULL;
2552 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
2553 /* We checked before setting GF_PLF_2 that an equivalent
2554 integer mode exists. */
2555 int bitsize = GET_MODE_BITSIZE (mode).to_constant ();
2556 if (!vect_sizes.is_empty ()
2557 && (index = mask_exists (bitsize, vect_sizes)) != -1)
2558 /* Use created mask. */
2559 mask = vect_masks[index];
2560 else
2562 if (COMPARISON_CLASS_P (cond))
2563 mask = gimple_build (&stmts, TREE_CODE (cond),
2564 boolean_type_node,
2565 TREE_OPERAND (cond, 0),
2566 TREE_OPERAND (cond, 1));
2567 else
2568 mask = cond;
2570 if (swap)
2572 tree true_val
2573 = constant_boolean_node (true, TREE_TYPE (mask));
2574 mask = gimple_build (&stmts, BIT_XOR_EXPR,
2575 TREE_TYPE (mask), mask, true_val);
2577 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2579 /* Save mask and its size for further use. */
2580 vect_sizes.safe_push (bitsize);
2581 vect_masks.safe_push (mask);
2583 if (gimple_assign_single_p (stmt))
2584 new_stmt = predicate_load_or_store (&gsi, stmt, mask);
2585 else
2586 new_stmt = predicate_rhs_code (stmt, mask, cond, &ssa_names);
2588 gsi_replace (&gsi, new_stmt, true);
2590 else if (((lhs = gimple_assign_lhs (stmt)), true)
2591 && (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2592 || POINTER_TYPE_P (TREE_TYPE (lhs)))
2593 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs))
2594 && arith_code_with_undefined_signed_overflow
2595 (gimple_assign_rhs_code (stmt)))
2597 gsi_remove (&gsi, true);
2598 gimple_seq stmts = rewrite_to_defined_overflow (stmt);
2599 bool first = true;
2600 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
2601 !gsi_end_p (gsi2);)
2603 gassign *stmt2 = as_a <gassign *> (gsi_stmt (gsi2));
2604 gsi_remove (&gsi2, false);
2605 if (first)
2607 gsi_insert_before (&gsi, stmt2, GSI_NEW_STMT);
2608 first = false;
2610 else
2611 gsi_insert_after (&gsi, stmt2, GSI_NEW_STMT);
2614 else if (gimple_vdef (stmt))
2616 tree lhs = gimple_assign_lhs (stmt);
2617 tree rhs = gimple_assign_rhs1 (stmt);
2618 tree type = TREE_TYPE (lhs);
2620 lhs = ifc_temp_var (type, unshare_expr (lhs), &gsi);
2621 rhs = ifc_temp_var (type, unshare_expr (rhs), &gsi);
2622 if (swap)
2623 std::swap (lhs, rhs);
2624 cond = force_gimple_operand_gsi (&gsi, unshare_expr (cond), true,
2625 NULL_TREE, true, GSI_SAME_STMT);
2626 rhs = fold_build_cond_expr (type, unshare_expr (cond), rhs, lhs);
2627 gimple_assign_set_rhs1 (stmt, ifc_temp_var (type, rhs, &gsi));
2628 update_stmt (stmt);
2630 else if (gimple_plf (stmt, GF_PLF_2)
2631 && is_gimple_call (stmt))
2633 /* Convert functions that have a SIMD clone to IFN_MASK_CALL.
2634 This will cause the vectorizer to match the "in branch"
2635 clone variants, and serves to build the mask vector
2636 in a natural way. */
2637 gcall *call = dyn_cast <gcall *> (gsi_stmt (gsi));
2638 tree orig_fn = gimple_call_fn (call);
2639 int orig_nargs = gimple_call_num_args (call);
2640 auto_vec<tree> args;
2641 args.safe_push (orig_fn);
2642 for (int i = 0; i < orig_nargs; i++)
2643 args.safe_push (gimple_call_arg (call, i));
2644 args.safe_push (cond);
2646 /* Replace the call with a IFN_MASK_CALL that has the extra
2647 condition parameter. */
2648 gcall *new_call = gimple_build_call_internal_vec (IFN_MASK_CALL,
2649 args);
2650 gimple_call_set_lhs (new_call, gimple_call_lhs (call));
2651 gsi_replace (&gsi, new_call, true);
2654 lhs = gimple_get_lhs (gsi_stmt (gsi));
2655 if (lhs && TREE_CODE (lhs) == SSA_NAME)
2656 ssa_names.add (lhs);
2657 gsi_next (&gsi);
2659 ssa_names.empty ();
2663 /* Remove all GIMPLE_CONDs and GIMPLE_LABELs of all the basic blocks
2664 other than the exit and latch of the LOOP. Also resets the
2665 GIMPLE_DEBUG information. */
2667 static void
2668 remove_conditions_and_labels (loop_p loop)
2670 gimple_stmt_iterator gsi;
2671 unsigned int i;
2673 for (i = 0; i < loop->num_nodes; i++)
2675 basic_block bb = ifc_bbs[i];
2677 if (bb_with_exit_edge_p (loop, bb)
2678 || bb == loop->latch)
2679 continue;
2681 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
2682 switch (gimple_code (gsi_stmt (gsi)))
2684 case GIMPLE_COND:
2685 case GIMPLE_LABEL:
2686 gsi_remove (&gsi, true);
2687 break;
2689 case GIMPLE_DEBUG:
2690 /* ??? Should there be conditional GIMPLE_DEBUG_BINDs? */
2691 if (gimple_debug_bind_p (gsi_stmt (gsi)))
2693 gimple_debug_bind_reset_value (gsi_stmt (gsi));
2694 update_stmt (gsi_stmt (gsi));
2696 gsi_next (&gsi);
2697 break;
2699 default:
2700 gsi_next (&gsi);
2705 /* Combine all the basic blocks from LOOP into one or two super basic
2706 blocks. Replace PHI nodes with conditional modify expressions. */
2708 static void
2709 combine_blocks (class loop *loop)
2711 basic_block bb, exit_bb, merge_target_bb;
2712 unsigned int orig_loop_num_nodes = loop->num_nodes;
2713 unsigned int i;
2714 edge e;
2715 edge_iterator ei;
2717 remove_conditions_and_labels (loop);
2718 insert_gimplified_predicates (loop);
2719 predicate_all_scalar_phis (loop);
2721 if (need_to_predicate || need_to_rewrite_undefined)
2722 predicate_statements (loop);
2724 /* Merge basic blocks. */
2725 exit_bb = NULL;
2726 bool *predicated = XNEWVEC (bool, orig_loop_num_nodes);
2727 for (i = 0; i < orig_loop_num_nodes; i++)
2729 bb = ifc_bbs[i];
2730 predicated[i] = !is_true_predicate (bb_predicate (bb));
2731 free_bb_predicate (bb);
2732 if (bb_with_exit_edge_p (loop, bb))
2734 gcc_assert (exit_bb == NULL);
2735 exit_bb = bb;
2738 gcc_assert (exit_bb != loop->latch);
2740 merge_target_bb = loop->header;
2742 /* Get at the virtual def valid for uses starting at the first block
2743 we merge into the header. Without a virtual PHI the loop has the
2744 same virtual use on all stmts. */
2745 gphi *vphi = get_virtual_phi (loop->header);
2746 tree last_vdef = NULL_TREE;
2747 if (vphi)
2749 last_vdef = gimple_phi_result (vphi);
2750 for (gimple_stmt_iterator gsi = gsi_start_bb (loop->header);
2751 ! gsi_end_p (gsi); gsi_next (&gsi))
2752 if (gimple_vdef (gsi_stmt (gsi)))
2753 last_vdef = gimple_vdef (gsi_stmt (gsi));
2755 for (i = 1; i < orig_loop_num_nodes; i++)
2757 gimple_stmt_iterator gsi;
2758 gimple_stmt_iterator last;
2760 bb = ifc_bbs[i];
2762 if (bb == exit_bb || bb == loop->latch)
2763 continue;
2765 /* We release virtual PHIs late because we have to propagate them
2766 out using the current VUSE. The def might be the one used
2767 after the loop. */
2768 vphi = get_virtual_phi (bb);
2769 if (vphi)
2771 /* When there's just loads inside the loop a stray virtual
2772 PHI merging the uses can appear, update last_vdef from
2773 it. */
2774 if (!last_vdef)
2775 last_vdef = gimple_phi_arg_def (vphi, 0);
2776 imm_use_iterator iter;
2777 use_operand_p use_p;
2778 gimple *use_stmt;
2779 FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi))
2781 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2782 SET_USE (use_p, last_vdef);
2784 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (vphi)))
2785 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (last_vdef) = 1;
2786 gsi = gsi_for_stmt (vphi);
2787 remove_phi_node (&gsi, true);
2790 /* Make stmts member of loop->header and clear range info from all stmts
2791 in BB which is now no longer executed conditional on a predicate we
2792 could have derived it from. */
2793 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2795 gimple *stmt = gsi_stmt (gsi);
2796 gimple_set_bb (stmt, merge_target_bb);
2797 /* Update virtual operands. */
2798 if (last_vdef)
2800 use_operand_p use_p = ssa_vuse_operand (stmt);
2801 if (use_p
2802 && USE_FROM_PTR (use_p) != last_vdef)
2803 SET_USE (use_p, last_vdef);
2804 if (gimple_vdef (stmt))
2805 last_vdef = gimple_vdef (stmt);
2807 else
2808 /* If this is the first load we arrive at update last_vdef
2809 so we handle stray PHIs correctly. */
2810 last_vdef = gimple_vuse (stmt);
2811 if (predicated[i])
2813 ssa_op_iter i;
2814 tree op;
2815 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF)
2816 reset_flow_sensitive_info (op);
2820 /* Update stmt list. */
2821 last = gsi_last_bb (merge_target_bb);
2822 gsi_insert_seq_after_without_update (&last, bb_seq (bb), GSI_NEW_STMT);
2823 set_bb_seq (bb, NULL);
2826 /* Fixup virtual operands in the exit block. */
2827 if (exit_bb
2828 && exit_bb != loop->header)
2830 /* We release virtual PHIs late because we have to propagate them
2831 out using the current VUSE. The def might be the one used
2832 after the loop. */
2833 vphi = get_virtual_phi (exit_bb);
2834 if (vphi)
2836 /* When there's just loads inside the loop a stray virtual
2837 PHI merging the uses can appear, update last_vdef from
2838 it. */
2839 if (!last_vdef)
2840 last_vdef = gimple_phi_arg_def (vphi, 0);
2841 imm_use_iterator iter;
2842 use_operand_p use_p;
2843 gimple *use_stmt;
2844 FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi))
2846 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2847 SET_USE (use_p, last_vdef);
2849 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (vphi)))
2850 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (last_vdef) = 1;
2851 gimple_stmt_iterator gsi = gsi_for_stmt (vphi);
2852 remove_phi_node (&gsi, true);
2856 /* Now remove all the edges in the loop, except for those from the exit
2857 block and delete the blocks we elided. */
2858 for (i = 1; i < orig_loop_num_nodes; i++)
2860 bb = ifc_bbs[i];
2862 for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));)
2864 if (e->src == exit_bb)
2865 ei_next (&ei);
2866 else
2867 remove_edge (e);
2870 for (i = 1; i < orig_loop_num_nodes; i++)
2872 bb = ifc_bbs[i];
2874 if (bb == exit_bb || bb == loop->latch)
2875 continue;
2877 delete_basic_block (bb);
2880 /* Re-connect the exit block. */
2881 if (exit_bb != NULL)
2883 if (exit_bb != loop->header)
2885 /* Connect this node to loop header. */
2886 make_single_succ_edge (loop->header, exit_bb, EDGE_FALLTHRU);
2887 set_immediate_dominator (CDI_DOMINATORS, exit_bb, loop->header);
2890 /* Redirect non-exit edges to loop->latch. */
2891 FOR_EACH_EDGE (e, ei, exit_bb->succs)
2893 if (!loop_exit_edge_p (loop, e))
2894 redirect_edge_and_branch (e, loop->latch);
2896 set_immediate_dominator (CDI_DOMINATORS, loop->latch, exit_bb);
2898 else
2900 /* If the loop does not have an exit, reconnect header and latch. */
2901 make_edge (loop->header, loop->latch, EDGE_FALLTHRU);
2902 set_immediate_dominator (CDI_DOMINATORS, loop->latch, loop->header);
2905 /* If possible, merge loop header to the block with the exit edge.
2906 This reduces the number of basic blocks to two, to please the
2907 vectorizer that handles only loops with two nodes. */
2908 if (exit_bb
2909 && exit_bb != loop->header)
2911 if (can_merge_blocks_p (loop->header, exit_bb))
2912 merge_blocks (loop->header, exit_bb);
2915 free (ifc_bbs);
2916 ifc_bbs = NULL;
2917 free (predicated);
2920 /* Version LOOP before if-converting it; the original loop
2921 will be if-converted, the new copy of the loop will not,
2922 and the LOOP_VECTORIZED internal call will be guarding which
2923 loop to execute. The vectorizer pass will fold this
2924 internal call into either true or false.
2926 Note that this function intentionally invalidates profile. Both edges
2927 out of LOOP_VECTORIZED must have 100% probability so the profile remains
2928 consistent after the condition is folded in the vectorizer. */
2930 static class loop *
2931 version_loop_for_if_conversion (class loop *loop, vec<gimple *> *preds)
2933 basic_block cond_bb;
2934 tree cond = make_ssa_name (boolean_type_node);
2935 class loop *new_loop;
2936 gimple *g;
2937 gimple_stmt_iterator gsi;
2938 unsigned int save_length = 0;
2940 g = gimple_build_call_internal (IFN_LOOP_VECTORIZED, 2,
2941 build_int_cst (integer_type_node, loop->num),
2942 integer_zero_node);
2943 gimple_call_set_lhs (g, cond);
2945 void **saved_preds = NULL;
2946 if (any_complicated_phi || need_to_predicate)
2948 /* Save BB->aux around loop_version as that uses the same field. */
2949 save_length = loop->inner ? loop->inner->num_nodes : loop->num_nodes;
2950 saved_preds = XALLOCAVEC (void *, save_length);
2951 for (unsigned i = 0; i < save_length; i++)
2952 saved_preds[i] = ifc_bbs[i]->aux;
2955 initialize_original_copy_tables ();
2956 /* At this point we invalidate porfile confistency until IFN_LOOP_VECTORIZED
2957 is re-merged in the vectorizer. */
2958 new_loop = loop_version (loop, cond, &cond_bb,
2959 profile_probability::always (),
2960 profile_probability::always (),
2961 profile_probability::always (),
2962 profile_probability::always (), true);
2963 free_original_copy_tables ();
2965 if (any_complicated_phi || need_to_predicate)
2966 for (unsigned i = 0; i < save_length; i++)
2967 ifc_bbs[i]->aux = saved_preds[i];
2969 if (new_loop == NULL)
2970 return NULL;
2972 new_loop->dont_vectorize = true;
2973 new_loop->force_vectorize = false;
2974 gsi = gsi_last_bb (cond_bb);
2975 gimple_call_set_arg (g, 1, build_int_cst (integer_type_node, new_loop->num));
2976 if (preds)
2977 preds->safe_push (g);
2978 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2979 update_ssa (TODO_update_ssa_no_phi);
2980 return new_loop;
2983 /* Return true when LOOP satisfies the follow conditions that will
2984 allow it to be recognized by the vectorizer for outer-loop
2985 vectorization:
2986 - The loop is not the root node of the loop tree.
2987 - The loop has exactly one inner loop.
2988 - The loop has a single exit.
2989 - The loop header has a single successor, which is the inner
2990 loop header.
2991 - Each of the inner and outer loop latches have a single
2992 predecessor.
2993 - The loop exit block has a single predecessor, which is the
2994 inner loop's exit block. */
2996 static bool
2997 versionable_outer_loop_p (class loop *loop)
2999 if (!loop_outer (loop)
3000 || loop->dont_vectorize
3001 || !loop->inner
3002 || loop->inner->next
3003 || !single_exit (loop)
3004 || !single_succ_p (loop->header)
3005 || single_succ (loop->header) != loop->inner->header
3006 || !single_pred_p (loop->latch)
3007 || !single_pred_p (loop->inner->latch))
3008 return false;
3010 basic_block outer_exit = single_pred (loop->latch);
3011 basic_block inner_exit = single_pred (loop->inner->latch);
3013 if (!single_pred_p (outer_exit) || single_pred (outer_exit) != inner_exit)
3014 return false;
3016 if (dump_file)
3017 fprintf (dump_file, "Found vectorizable outer loop for versioning\n");
3019 return true;
3022 /* Performs splitting of critical edges. Skip splitting and return false
3023 if LOOP will not be converted because:
3025 - LOOP is not well formed.
3026 - LOOP has PHI with more than MAX_PHI_ARG_NUM arguments.
3028 Last restriction is valid only if AGGRESSIVE_IF_CONV is false. */
3030 static bool
3031 ifcvt_split_critical_edges (class loop *loop, bool aggressive_if_conv)
3033 basic_block *body;
3034 basic_block bb;
3035 unsigned int num = loop->num_nodes;
3036 unsigned int i;
3037 gimple *stmt;
3038 edge e;
3039 edge_iterator ei;
3040 auto_vec<edge> critical_edges;
3042 /* Loop is not well formed. */
3043 if (loop->inner)
3044 return false;
3046 body = get_loop_body (loop);
3047 for (i = 0; i < num; i++)
3049 bb = body[i];
3050 if (!aggressive_if_conv
3051 && phi_nodes (bb)
3052 && EDGE_COUNT (bb->preds) > MAX_PHI_ARG_NUM)
3054 if (dump_file && (dump_flags & TDF_DETAILS))
3055 fprintf (dump_file,
3056 "BB %d has complicated PHI with more than %u args.\n",
3057 bb->index, MAX_PHI_ARG_NUM);
3059 free (body);
3060 return false;
3062 if (bb == loop->latch || bb_with_exit_edge_p (loop, bb))
3063 continue;
3065 stmt = last_stmt (bb);
3066 /* Skip basic blocks not ending with conditional branch. */
3067 if (!stmt || gimple_code (stmt) != GIMPLE_COND)
3068 continue;
3070 FOR_EACH_EDGE (e, ei, bb->succs)
3071 if (EDGE_CRITICAL_P (e) && e->dest->loop_father == loop)
3072 critical_edges.safe_push (e);
3074 free (body);
3076 while (critical_edges.length () > 0)
3078 e = critical_edges.pop ();
3079 /* Don't split if bb can be predicated along non-critical edge. */
3080 if (EDGE_COUNT (e->dest->preds) > 2 || all_preds_critical_p (e->dest))
3081 split_edge (e);
3084 return true;
3087 /* Delete redundant statements produced by predication which prevents
3088 loop vectorization. */
3090 static void
3091 ifcvt_local_dce (class loop *loop)
3093 gimple *stmt;
3094 gimple *stmt1;
3095 gimple *phi;
3096 gimple_stmt_iterator gsi;
3097 auto_vec<gimple *> worklist;
3098 enum gimple_code code;
3099 use_operand_p use_p;
3100 imm_use_iterator imm_iter;
3102 /* The loop has a single BB only. */
3103 basic_block bb = loop->header;
3104 tree latch_vdef = NULL_TREE;
3106 worklist.create (64);
3107 /* Consider all phi as live statements. */
3108 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3110 phi = gsi_stmt (gsi);
3111 gimple_set_plf (phi, GF_PLF_2, true);
3112 worklist.safe_push (phi);
3113 if (virtual_operand_p (gimple_phi_result (phi)))
3114 latch_vdef = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
3116 /* Consider load/store statements, CALL and COND as live. */
3117 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3119 stmt = gsi_stmt (gsi);
3120 if (is_gimple_debug (stmt))
3122 gimple_set_plf (stmt, GF_PLF_2, true);
3123 continue;
3125 if (gimple_store_p (stmt) || gimple_assign_load_p (stmt))
3127 gimple_set_plf (stmt, GF_PLF_2, true);
3128 worklist.safe_push (stmt);
3129 continue;
3131 code = gimple_code (stmt);
3132 if (code == GIMPLE_COND || code == GIMPLE_CALL)
3134 gimple_set_plf (stmt, GF_PLF_2, true);
3135 worklist.safe_push (stmt);
3136 continue;
3138 gimple_set_plf (stmt, GF_PLF_2, false);
3140 if (code == GIMPLE_ASSIGN)
3142 tree lhs = gimple_assign_lhs (stmt);
3143 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
3145 stmt1 = USE_STMT (use_p);
3146 if (!is_gimple_debug (stmt1) && gimple_bb (stmt1) != bb)
3148 gimple_set_plf (stmt, GF_PLF_2, true);
3149 worklist.safe_push (stmt);
3150 break;
3155 /* Propagate liveness through arguments of live stmt. */
3156 while (worklist.length () > 0)
3158 ssa_op_iter iter;
3159 use_operand_p use_p;
3160 tree use;
3162 stmt = worklist.pop ();
3163 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
3165 use = USE_FROM_PTR (use_p);
3166 if (TREE_CODE (use) != SSA_NAME)
3167 continue;
3168 stmt1 = SSA_NAME_DEF_STMT (use);
3169 if (gimple_bb (stmt1) != bb || gimple_plf (stmt1, GF_PLF_2))
3170 continue;
3171 gimple_set_plf (stmt1, GF_PLF_2, true);
3172 worklist.safe_push (stmt1);
3175 /* Delete dead statements. */
3176 gsi = gsi_last_bb (bb);
3177 while (!gsi_end_p (gsi))
3179 gimple_stmt_iterator gsiprev = gsi;
3180 gsi_prev (&gsiprev);
3181 stmt = gsi_stmt (gsi);
3182 if (gimple_store_p (stmt) && gimple_vdef (stmt))
3184 tree lhs = gimple_get_lhs (stmt);
3185 ao_ref write;
3186 ao_ref_init (&write, lhs);
3188 if (dse_classify_store (&write, stmt, false, NULL, NULL, latch_vdef)
3189 == DSE_STORE_DEAD)
3190 delete_dead_or_redundant_assignment (&gsi, "dead");
3191 gsi = gsiprev;
3192 continue;
3195 if (gimple_plf (stmt, GF_PLF_2))
3197 gsi = gsiprev;
3198 continue;
3200 if (dump_file && (dump_flags & TDF_DETAILS))
3202 fprintf (dump_file, "Delete dead stmt in bb#%d\n", bb->index);
3203 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3205 gsi_remove (&gsi, true);
3206 release_defs (stmt);
3207 gsi = gsiprev;
3211 /* Return true if VALUE is already available on edge PE. */
3213 static bool
3214 ifcvt_available_on_edge_p (edge pe, tree value)
3216 if (is_gimple_min_invariant (value))
3217 return true;
3219 if (TREE_CODE (value) == SSA_NAME)
3221 basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (value));
3222 if (!def_bb || dominated_by_p (CDI_DOMINATORS, pe->dest, def_bb))
3223 return true;
3226 return false;
3229 /* Return true if STMT can be hoisted from if-converted loop LOOP to
3230 edge PE. */
3232 static bool
3233 ifcvt_can_hoist (class loop *loop, edge pe, gimple *stmt)
3235 if (auto *call = dyn_cast<gcall *> (stmt))
3237 if (gimple_call_internal_p (call)
3238 && internal_fn_mask_index (gimple_call_internal_fn (call)) >= 0)
3239 return false;
3241 else if (auto *assign = dyn_cast<gassign *> (stmt))
3243 if (gimple_assign_rhs_code (assign) == COND_EXPR)
3244 return false;
3246 else
3247 return false;
3249 if (gimple_has_side_effects (stmt)
3250 || gimple_could_trap_p (stmt)
3251 || stmt_could_throw_p (cfun, stmt)
3252 || gimple_vdef (stmt)
3253 || gimple_vuse (stmt))
3254 return false;
3256 int num_args = gimple_num_args (stmt);
3257 if (pe != loop_preheader_edge (loop))
3259 for (int i = 0; i < num_args; ++i)
3260 if (!ifcvt_available_on_edge_p (pe, gimple_arg (stmt, i)))
3261 return false;
3263 else
3265 for (int i = 0; i < num_args; ++i)
3266 if (!expr_invariant_in_loop_p (loop, gimple_arg (stmt, i)))
3267 return false;
3270 return true;
3273 /* Hoist invariant statements from LOOP to edge PE. */
3275 static void
3276 ifcvt_hoist_invariants (class loop *loop, edge pe)
3278 gimple_stmt_iterator hoist_gsi = {};
3279 unsigned int num_blocks = loop->num_nodes;
3280 basic_block *body = get_loop_body (loop);
3281 for (unsigned int i = 0; i < num_blocks; ++i)
3282 for (gimple_stmt_iterator gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi);)
3284 gimple *stmt = gsi_stmt (gsi);
3285 if (ifcvt_can_hoist (loop, pe, stmt))
3287 /* Once we've hoisted one statement, insert other statements
3288 after it. */
3289 gsi_remove (&gsi, false);
3290 if (hoist_gsi.ptr)
3291 gsi_insert_after (&hoist_gsi, stmt, GSI_NEW_STMT);
3292 else
3294 gsi_insert_on_edge_immediate (pe, stmt);
3295 hoist_gsi = gsi_for_stmt (stmt);
3297 continue;
3299 gsi_next (&gsi);
3301 free (body);
3304 /* Returns the DECL_FIELD_BIT_OFFSET of the bitfield accesse in stmt iff its
3305 type mode is not BLKmode. If BITPOS is not NULL it will hold the poly_int64
3306 value of the DECL_FIELD_BIT_OFFSET of the bitfield access and STRUCT_EXPR,
3307 if not NULL, will hold the tree representing the base struct of this
3308 bitfield. */
3310 static tree
3311 get_bitfield_rep (gassign *stmt, bool write, tree *bitpos,
3312 tree *struct_expr)
3314 tree comp_ref = write ? gimple_assign_lhs (stmt)
3315 : gimple_assign_rhs1 (stmt);
3317 tree field_decl = TREE_OPERAND (comp_ref, 1);
3318 tree rep_decl = DECL_BIT_FIELD_REPRESENTATIVE (field_decl);
3320 /* Bail out if the representative is BLKmode as we will not be able to
3321 vectorize this. */
3322 if (TYPE_MODE (TREE_TYPE (rep_decl)) == E_BLKmode)
3323 return NULL_TREE;
3325 /* Bail out if the DECL_SIZE of the field_decl isn't the same as the BF's
3326 precision. */
3327 unsigned HOST_WIDE_INT bf_prec
3328 = TYPE_PRECISION (TREE_TYPE (gimple_assign_lhs (stmt)));
3329 if (compare_tree_int (DECL_SIZE (field_decl), bf_prec) != 0)
3330 return NULL_TREE;
3332 if (struct_expr)
3333 *struct_expr = TREE_OPERAND (comp_ref, 0);
3335 if (bitpos)
3337 /* To calculate the bitposition of the BITFIELD_REF we have to determine
3338 where our bitfield starts in relation to the container REP_DECL. The
3339 DECL_FIELD_OFFSET of the original bitfield's member FIELD_DECL tells
3340 us how many bytes from the start of the structure there are until the
3341 start of the group of bitfield members the FIELD_DECL belongs to,
3342 whereas DECL_FIELD_BIT_OFFSET will tell us how many bits from that
3343 position our actual bitfield member starts. For the container
3344 REP_DECL adding DECL_FIELD_OFFSET and DECL_FIELD_BIT_OFFSET will tell
3345 us the distance between the start of the structure and the start of
3346 the container, though the first is in bytes and the later other in
3347 bits. With this in mind we calculate the bit position of our new
3348 BITFIELD_REF by subtracting the number of bits between the start of
3349 the structure and the container from the number of bits from the start
3350 of the structure and the actual bitfield member. */
3351 tree bf_pos = fold_build2 (MULT_EXPR, bitsizetype,
3352 DECL_FIELD_OFFSET (field_decl),
3353 build_int_cst (bitsizetype, BITS_PER_UNIT));
3354 bf_pos = fold_build2 (PLUS_EXPR, bitsizetype, bf_pos,
3355 DECL_FIELD_BIT_OFFSET (field_decl));
3356 tree rep_pos = fold_build2 (MULT_EXPR, bitsizetype,
3357 DECL_FIELD_OFFSET (rep_decl),
3358 build_int_cst (bitsizetype, BITS_PER_UNIT));
3359 rep_pos = fold_build2 (PLUS_EXPR, bitsizetype, rep_pos,
3360 DECL_FIELD_BIT_OFFSET (rep_decl));
3362 *bitpos = fold_build2 (MINUS_EXPR, bitsizetype, bf_pos, rep_pos);
3365 return rep_decl;
3369 /* Lowers the bitfield described by DATA.
3370 For a write like:
3372 struct.bf = _1;
3374 lower to:
3376 __ifc_1 = struct.<representative>;
3377 __ifc_2 = BIT_INSERT_EXPR (__ifc_1, _1, bitpos);
3378 struct.<representative> = __ifc_2;
3380 For a read:
3382 _1 = struct.bf;
3384 lower to:
3386 __ifc_1 = struct.<representative>;
3387 _1 = BIT_FIELD_REF (__ifc_1, bitsize, bitpos);
3389 where representative is a legal load that contains the bitfield value,
3390 bitsize is the size of the bitfield and bitpos the offset to the start of
3391 the bitfield within the representative. */
3393 static void
3394 lower_bitfield (gassign *stmt, bool write)
3396 tree struct_expr;
3397 tree bitpos;
3398 tree rep_decl = get_bitfield_rep (stmt, write, &bitpos, &struct_expr);
3399 tree rep_type = TREE_TYPE (rep_decl);
3400 tree bf_type = TREE_TYPE (gimple_assign_lhs (stmt));
3402 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3403 if (dump_file && (dump_flags & TDF_DETAILS))
3405 fprintf (dump_file, "Lowering:\n");
3406 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3407 fprintf (dump_file, "to:\n");
3410 /* REP_COMP_REF is a COMPONENT_REF for the representative. NEW_VAL is it's
3411 defining SSA_NAME. */
3412 tree rep_comp_ref = build3 (COMPONENT_REF, rep_type, struct_expr, rep_decl,
3413 NULL_TREE);
3414 tree new_val = ifc_temp_var (rep_type, rep_comp_ref, &gsi);
3416 if (dump_file && (dump_flags & TDF_DETAILS))
3417 print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (new_val), 0, TDF_SLIM);
3419 if (write)
3421 new_val = ifc_temp_var (rep_type,
3422 build3 (BIT_INSERT_EXPR, rep_type, new_val,
3423 unshare_expr (gimple_assign_rhs1 (stmt)),
3424 bitpos), &gsi);
3426 if (dump_file && (dump_flags & TDF_DETAILS))
3427 print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (new_val), 0, TDF_SLIM);
3429 gimple *new_stmt = gimple_build_assign (unshare_expr (rep_comp_ref),
3430 new_val);
3431 gimple_move_vops (new_stmt, stmt);
3432 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
3434 if (dump_file && (dump_flags & TDF_DETAILS))
3435 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
3437 else
3439 tree bfr = build3 (BIT_FIELD_REF, bf_type, new_val,
3440 build_int_cst (bitsizetype, TYPE_PRECISION (bf_type)),
3441 bitpos);
3442 new_val = ifc_temp_var (bf_type, bfr, &gsi);
3444 gimple *new_stmt = gimple_build_assign (gimple_assign_lhs (stmt),
3445 new_val);
3446 gimple_move_vops (new_stmt, stmt);
3447 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
3449 if (dump_file && (dump_flags & TDF_DETAILS))
3450 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
3453 gsi_remove (&gsi, true);
3456 /* Return TRUE if there are bitfields to lower in this LOOP. Fill TO_LOWER
3457 with data structures representing these bitfields. */
3459 static bool
3460 bitfields_to_lower_p (class loop *loop,
3461 vec <gassign *> &reads_to_lower,
3462 vec <gassign *> &writes_to_lower)
3464 gimple_stmt_iterator gsi;
3466 if (dump_file && (dump_flags & TDF_DETAILS))
3468 fprintf (dump_file, "Analyzing loop %d for bitfields:\n", loop->num);
3471 for (unsigned i = 0; i < loop->num_nodes; ++i)
3473 basic_block bb = ifc_bbs[i];
3474 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3476 gassign *stmt = dyn_cast<gassign*> (gsi_stmt (gsi));
3477 if (!stmt)
3478 continue;
3480 tree op = gimple_assign_lhs (stmt);
3481 bool write = TREE_CODE (op) == COMPONENT_REF;
3483 if (!write)
3484 op = gimple_assign_rhs1 (stmt);
3486 if (TREE_CODE (op) != COMPONENT_REF)
3487 continue;
3489 if (DECL_BIT_FIELD_TYPE (TREE_OPERAND (op, 1)))
3491 if (dump_file && (dump_flags & TDF_DETAILS))
3492 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3494 if (!INTEGRAL_TYPE_P (TREE_TYPE (op)))
3496 if (dump_file && (dump_flags & TDF_DETAILS))
3497 fprintf (dump_file, "\t Bitfield NO OK to lower,"
3498 " field type is not Integral.\n");
3499 return false;
3502 if (!get_bitfield_rep (stmt, write, NULL, NULL))
3504 if (dump_file && (dump_flags & TDF_DETAILS))
3505 fprintf (dump_file, "\t Bitfield NOT OK to lower,"
3506 " representative is BLKmode.\n");
3507 return false;
3510 if (dump_file && (dump_flags & TDF_DETAILS))
3511 fprintf (dump_file, "\tBitfield OK to lower.\n");
3512 if (write)
3513 writes_to_lower.safe_push (stmt);
3514 else
3515 reads_to_lower.safe_push (stmt);
3519 return !reads_to_lower.is_empty () || !writes_to_lower.is_empty ();
3523 /* If-convert LOOP when it is legal. For the moment this pass has no
3524 profitability analysis. Returns non-zero todo flags when something
3525 changed. */
3527 unsigned int
3528 tree_if_conversion (class loop *loop, vec<gimple *> *preds)
3530 unsigned int todo = 0;
3531 bool aggressive_if_conv;
3532 class loop *rloop;
3533 auto_vec <gassign *, 4> reads_to_lower;
3534 auto_vec <gassign *, 4> writes_to_lower;
3535 bitmap exit_bbs;
3536 edge pe;
3537 auto_vec<data_reference_p, 10> refs;
3539 again:
3540 rloop = NULL;
3541 ifc_bbs = NULL;
3542 need_to_lower_bitfields = false;
3543 need_to_ifcvt = false;
3544 need_to_predicate = false;
3545 need_to_rewrite_undefined = false;
3546 any_complicated_phi = false;
3548 /* Apply more aggressive if-conversion when loop or its outer loop were
3549 marked with simd pragma. When that's the case, we try to if-convert
3550 loop containing PHIs with more than MAX_PHI_ARG_NUM arguments. */
3551 aggressive_if_conv = loop->force_vectorize;
3552 if (!aggressive_if_conv)
3554 class loop *outer_loop = loop_outer (loop);
3555 if (outer_loop && outer_loop->force_vectorize)
3556 aggressive_if_conv = true;
3559 if (!single_exit (loop))
3560 goto cleanup;
3562 /* If there are more than two BBs in the loop then there is at least one if
3563 to convert. */
3564 if (loop->num_nodes > 2
3565 && !ifcvt_split_critical_edges (loop, aggressive_if_conv))
3566 goto cleanup;
3568 ifc_bbs = get_loop_body_in_if_conv_order (loop);
3569 if (!ifc_bbs)
3571 if (dump_file && (dump_flags & TDF_DETAILS))
3572 fprintf (dump_file, "Irreducible loop\n");
3573 goto cleanup;
3576 if (find_data_references_in_loop (loop, &refs) == chrec_dont_know)
3577 goto cleanup;
3579 if (loop->num_nodes > 2)
3581 need_to_ifcvt = true;
3583 if (!if_convertible_loop_p (loop, &refs) || !dbg_cnt (if_conversion_tree))
3584 goto cleanup;
3586 if ((need_to_predicate || any_complicated_phi)
3587 && ((!flag_tree_loop_vectorize && !loop->force_vectorize)
3588 || loop->dont_vectorize))
3589 goto cleanup;
3592 if ((flag_tree_loop_vectorize || loop->force_vectorize)
3593 && !loop->dont_vectorize)
3594 need_to_lower_bitfields = bitfields_to_lower_p (loop, reads_to_lower,
3595 writes_to_lower);
3597 if (!need_to_ifcvt && !need_to_lower_bitfields)
3598 goto cleanup;
3600 /* The edge to insert invariant stmts on. */
3601 pe = loop_preheader_edge (loop);
3603 /* Since we have no cost model, always version loops unless the user
3604 specified -ftree-loop-if-convert or unless versioning is required.
3605 Either version this loop, or if the pattern is right for outer-loop
3606 vectorization, version the outer loop. In the latter case we will
3607 still if-convert the original inner loop. */
3608 if (need_to_lower_bitfields
3609 || need_to_predicate
3610 || any_complicated_phi
3611 || flag_tree_loop_if_convert != 1)
3613 class loop *vloop
3614 = (versionable_outer_loop_p (loop_outer (loop))
3615 ? loop_outer (loop) : loop);
3616 class loop *nloop = version_loop_for_if_conversion (vloop, preds);
3617 if (nloop == NULL)
3618 goto cleanup;
3619 if (vloop != loop)
3621 /* If versionable_outer_loop_p decided to version the
3622 outer loop, version also the inner loop of the non-vectorized
3623 loop copy. So we transform:
3624 loop1
3625 loop2
3626 into:
3627 if (LOOP_VECTORIZED (1, 3))
3629 loop1
3630 loop2
3632 else
3633 loop3 (copy of loop1)
3634 if (LOOP_VECTORIZED (4, 5))
3635 loop4 (copy of loop2)
3636 else
3637 loop5 (copy of loop4) */
3638 gcc_assert (nloop->inner && nloop->inner->next == NULL);
3639 rloop = nloop->inner;
3641 else
3642 /* If we versioned loop then make sure to insert invariant
3643 stmts before the .LOOP_VECTORIZED check since the vectorizer
3644 will re-use that for things like runtime alias versioning
3645 whose condition can end up using those invariants. */
3646 pe = single_pred_edge (gimple_bb (preds->last ()));
3649 if (need_to_lower_bitfields)
3651 if (dump_file && (dump_flags & TDF_DETAILS))
3653 fprintf (dump_file, "-------------------------\n");
3654 fprintf (dump_file, "Start lowering bitfields\n");
3656 while (!reads_to_lower.is_empty ())
3657 lower_bitfield (reads_to_lower.pop (), false);
3658 while (!writes_to_lower.is_empty ())
3659 lower_bitfield (writes_to_lower.pop (), true);
3661 if (dump_file && (dump_flags & TDF_DETAILS))
3663 fprintf (dump_file, "Done lowering bitfields\n");
3664 fprintf (dump_file, "-------------------------\n");
3667 if (need_to_ifcvt)
3669 /* Now all statements are if-convertible. Combine all the basic
3670 blocks into one huge basic block doing the if-conversion
3671 on-the-fly. */
3672 combine_blocks (loop);
3675 /* Perform local CSE, this esp. helps the vectorizer analysis if loads
3676 and stores are involved. CSE only the loop body, not the entry
3677 PHIs, those are to be kept in sync with the non-if-converted copy.
3678 ??? We'll still keep dead stores though. */
3679 exit_bbs = BITMAP_ALLOC (NULL);
3680 bitmap_set_bit (exit_bbs, single_exit (loop)->dest->index);
3681 bitmap_set_bit (exit_bbs, loop->latch->index);
3683 std::pair <tree, tree> *name_pair;
3684 unsigned ssa_names_idx;
3685 FOR_EACH_VEC_ELT (redundant_ssa_names, ssa_names_idx, name_pair)
3686 replace_uses_by (name_pair->first, name_pair->second);
3687 redundant_ssa_names.release ();
3689 todo |= do_rpo_vn (cfun, loop_preheader_edge (loop), exit_bbs);
3691 /* Delete dead predicate computations. */
3692 ifcvt_local_dce (loop);
3693 BITMAP_FREE (exit_bbs);
3695 ifcvt_hoist_invariants (loop, pe);
3697 todo |= TODO_cleanup_cfg;
3699 cleanup:
3700 data_reference_p dr;
3701 unsigned int i;
3702 for (i = 0; refs.iterate (i, &dr); i++)
3704 free (dr->aux);
3705 free_data_ref (dr);
3707 refs.truncate (0);
3709 if (ifc_bbs)
3711 unsigned int i;
3713 for (i = 0; i < loop->num_nodes; i++)
3714 free_bb_predicate (ifc_bbs[i]);
3716 free (ifc_bbs);
3717 ifc_bbs = NULL;
3719 if (rloop != NULL)
3721 loop = rloop;
3722 reads_to_lower.truncate (0);
3723 writes_to_lower.truncate (0);
3724 goto again;
3727 return todo;
3730 /* Tree if-conversion pass management. */
3732 namespace {
3734 const pass_data pass_data_if_conversion =
3736 GIMPLE_PASS, /* type */
3737 "ifcvt", /* name */
3738 OPTGROUP_NONE, /* optinfo_flags */
3739 TV_TREE_LOOP_IFCVT, /* tv_id */
3740 ( PROP_cfg | PROP_ssa ), /* properties_required */
3741 0, /* properties_provided */
3742 0, /* properties_destroyed */
3743 0, /* todo_flags_start */
3744 0, /* todo_flags_finish */
3747 class pass_if_conversion : public gimple_opt_pass
3749 public:
3750 pass_if_conversion (gcc::context *ctxt)
3751 : gimple_opt_pass (pass_data_if_conversion, ctxt)
3754 /* opt_pass methods: */
3755 bool gate (function *) final override;
3756 unsigned int execute (function *) final override;
3758 }; // class pass_if_conversion
3760 bool
3761 pass_if_conversion::gate (function *fun)
3763 return (((flag_tree_loop_vectorize || fun->has_force_vectorize_loops)
3764 && flag_tree_loop_if_convert != 0)
3765 || flag_tree_loop_if_convert == 1);
3768 unsigned int
3769 pass_if_conversion::execute (function *fun)
3771 unsigned todo = 0;
3773 if (number_of_loops (fun) <= 1)
3774 return 0;
3776 auto_vec<gimple *> preds;
3777 for (auto loop : loops_list (cfun, 0))
3778 if (flag_tree_loop_if_convert == 1
3779 || ((flag_tree_loop_vectorize || loop->force_vectorize)
3780 && !loop->dont_vectorize))
3781 todo |= tree_if_conversion (loop, &preds);
3783 if (todo)
3785 free_numbers_of_iterations_estimates (fun);
3786 scev_reset ();
3789 if (flag_checking)
3791 basic_block bb;
3792 FOR_EACH_BB_FN (bb, fun)
3793 gcc_assert (!bb->aux);
3796 /* Perform IL update now, it might elide some loops. */
3797 if (todo & TODO_cleanup_cfg)
3799 cleanup_tree_cfg ();
3800 if (need_ssa_update_p (fun))
3801 todo |= TODO_update_ssa;
3803 if (todo & TODO_update_ssa_any)
3804 update_ssa (todo & TODO_update_ssa_any);
3806 /* If if-conversion elided the loop fall back to the original one. */
3807 for (unsigned i = 0; i < preds.length (); ++i)
3809 gimple *g = preds[i];
3810 if (!gimple_bb (g))
3811 continue;
3812 unsigned ifcvt_loop = tree_to_uhwi (gimple_call_arg (g, 0));
3813 unsigned orig_loop = tree_to_uhwi (gimple_call_arg (g, 1));
3814 if (!get_loop (fun, ifcvt_loop) || !get_loop (fun, orig_loop))
3816 if (dump_file)
3817 fprintf (dump_file, "If-converted loop vanished\n");
3818 fold_loop_internal_call (g, boolean_false_node);
3822 return 0;
3825 } // anon namespace
3827 gimple_opt_pass *
3828 make_pass_if_conversion (gcc::context *ctxt)
3830 return new pass_if_conversion (ctxt);