c++, tree: declare some basic functions inline
[official-gcc.git] / gcc / tree-if-conv.cc
bloba19450f533dab0c6c5f7ff628ba1937d791b66a8
1 /* If-conversion for vectorizer.
2 Copyright (C) 2004-2023 Free Software Foundation, Inc.
3 Contributed by Devang Patel <dpatel@apple.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass implements a tree level if-conversion of loops. Its
22 initial goal is to help the vectorizer to vectorize loops with
23 conditions.
25 A short description of if-conversion:
27 o Decide if a loop is if-convertible or not.
28 o Walk all loop basic blocks in breadth first order (BFS order).
29 o Remove conditional statements (at the end of basic block)
30 and propagate condition into destination basic blocks'
31 predicate list.
32 o Replace modify expression with conditional modify expression
33 using current basic block's condition.
34 o Merge all basic blocks
35 o Replace phi nodes with conditional modify expr
36 o Merge all basic blocks into header
38 Sample transformation:
40 INPUT
41 -----
43 # i_23 = PHI <0(0), i_18(10)>;
44 <L0>:;
45 j_15 = A[i_23];
46 if (j_15 > 41) goto <L1>; else goto <L17>;
48 <L17>:;
49 goto <bb 3> (<L3>);
51 <L1>:;
53 # iftmp.2_4 = PHI <0(8), 42(2)>;
54 <L3>:;
55 A[i_23] = iftmp.2_4;
56 i_18 = i_23 + 1;
57 if (i_18 <= 15) goto <L19>; else goto <L18>;
59 <L19>:;
60 goto <bb 1> (<L0>);
62 <L18>:;
64 OUTPUT
65 ------
67 # i_23 = PHI <0(0), i_18(10)>;
68 <L0>:;
69 j_15 = A[i_23];
71 <L3>:;
72 iftmp.2_4 = j_15 > 41 ? 42 : 0;
73 A[i_23] = iftmp.2_4;
74 i_18 = i_23 + 1;
75 if (i_18 <= 15) goto <L19>; else goto <L18>;
77 <L19>:;
78 goto <bb 1> (<L0>);
80 <L18>:;
83 #include "config.h"
84 #include "system.h"
85 #include "coretypes.h"
86 #include "backend.h"
87 #include "rtl.h"
88 #include "tree.h"
89 #include "gimple.h"
90 #include "cfghooks.h"
91 #include "tree-pass.h"
92 #include "ssa.h"
93 #include "expmed.h"
94 #include "expr.h"
95 #include "optabs-query.h"
96 #include "gimple-pretty-print.h"
97 #include "alias.h"
98 #include "fold-const.h"
99 #include "stor-layout.h"
100 #include "gimple-iterator.h"
101 #include "gimple-fold.h"
102 #include "gimplify.h"
103 #include "gimplify-me.h"
104 #include "tree-cfg.h"
105 #include "tree-into-ssa.h"
106 #include "tree-ssa.h"
107 #include "cfgloop.h"
108 #include "tree-data-ref.h"
109 #include "tree-scalar-evolution.h"
110 #include "tree-ssa-loop.h"
111 #include "tree-ssa-loop-niter.h"
112 #include "tree-ssa-loop-ivopts.h"
113 #include "tree-ssa-address.h"
114 #include "dbgcnt.h"
115 #include "tree-hash-traits.h"
116 #include "varasm.h"
117 #include "builtins.h"
118 #include "cfganal.h"
119 #include "internal-fn.h"
120 #include "fold-const.h"
121 #include "tree-ssa-sccvn.h"
122 #include "tree-cfgcleanup.h"
123 #include "tree-ssa-dse.h"
124 #include "tree-vectorizer.h"
125 #include "tree-eh.h"
126 #include "cgraph.h"
128 /* For lang_hooks.types.type_for_mode. */
129 #include "langhooks.h"
131 /* Only handle PHIs with no more arguments unless we are asked to by
132 simd pragma. */
133 #define MAX_PHI_ARG_NUM \
134 ((unsigned) param_max_tree_if_conversion_phi_args)
136 /* True if we've converted a statement that was only executed when some
137 condition C was true, and if for correctness we need to predicate the
138 statement to ensure that it is a no-op when C is false. See
139 predicate_statements for the kinds of predication we support. */
140 static bool need_to_predicate;
142 /* True if we have to rewrite stmts that may invoke undefined behavior
143 when a condition C was false so it doesn't if it is always executed.
144 See predicate_statements for the kinds of predication we support. */
145 static bool need_to_rewrite_undefined;
147 /* Indicate if there are any complicated PHIs that need to be handled in
148 if-conversion. Complicated PHI has more than two arguments and can't
149 be degenerated to two arguments PHI. See more information in comment
150 before phi_convertible_by_degenerating_args. */
151 static bool any_complicated_phi;
153 /* True if we have bitfield accesses we can lower. */
154 static bool need_to_lower_bitfields;
156 /* True if there is any ifcvting to be done. */
157 static bool need_to_ifcvt;
159 /* Hash for struct innermost_loop_behavior. It depends on the user to
160 free the memory. */
162 struct innermost_loop_behavior_hash : nofree_ptr_hash <innermost_loop_behavior>
164 static inline hashval_t hash (const value_type &);
165 static inline bool equal (const value_type &,
166 const compare_type &);
169 inline hashval_t
170 innermost_loop_behavior_hash::hash (const value_type &e)
172 hashval_t hash;
174 hash = iterative_hash_expr (e->base_address, 0);
175 hash = iterative_hash_expr (e->offset, hash);
176 hash = iterative_hash_expr (e->init, hash);
177 return iterative_hash_expr (e->step, hash);
180 inline bool
181 innermost_loop_behavior_hash::equal (const value_type &e1,
182 const compare_type &e2)
184 if ((e1->base_address && !e2->base_address)
185 || (!e1->base_address && e2->base_address)
186 || (!e1->offset && e2->offset)
187 || (e1->offset && !e2->offset)
188 || (!e1->init && e2->init)
189 || (e1->init && !e2->init)
190 || (!e1->step && e2->step)
191 || (e1->step && !e2->step))
192 return false;
194 if (e1->base_address && e2->base_address
195 && !operand_equal_p (e1->base_address, e2->base_address, 0))
196 return false;
197 if (e1->offset && e2->offset
198 && !operand_equal_p (e1->offset, e2->offset, 0))
199 return false;
200 if (e1->init && e2->init
201 && !operand_equal_p (e1->init, e2->init, 0))
202 return false;
203 if (e1->step && e2->step
204 && !operand_equal_p (e1->step, e2->step, 0))
205 return false;
207 return true;
210 /* List of basic blocks in if-conversion-suitable order. */
211 static basic_block *ifc_bbs;
213 /* Hash table to store <DR's innermost loop behavior, DR> pairs. */
214 static hash_map<innermost_loop_behavior_hash,
215 data_reference_p> *innermost_DR_map;
217 /* Hash table to store <base reference, DR> pairs. */
218 static hash_map<tree_operand_hash, data_reference_p> *baseref_DR_map;
220 /* List of redundant SSA names: the first should be replaced by the second. */
221 static vec< std::pair<tree, tree> > redundant_ssa_names;
223 /* Structure used to predicate basic blocks. This is attached to the
224 ->aux field of the BBs in the loop to be if-converted. */
225 struct bb_predicate {
227 /* The condition under which this basic block is executed. */
228 tree predicate;
230 /* PREDICATE is gimplified, and the sequence of statements is
231 recorded here, in order to avoid the duplication of computations
232 that occur in previous conditions. See PR44483. */
233 gimple_seq predicate_gimplified_stmts;
236 /* Returns true when the basic block BB has a predicate. */
238 static inline bool
239 bb_has_predicate (basic_block bb)
241 return bb->aux != NULL;
244 /* Returns the gimplified predicate for basic block BB. */
246 static inline tree
247 bb_predicate (basic_block bb)
249 return ((struct bb_predicate *) bb->aux)->predicate;
252 /* Sets the gimplified predicate COND for basic block BB. */
254 static inline void
255 set_bb_predicate (basic_block bb, tree cond)
257 gcc_assert ((TREE_CODE (cond) == TRUTH_NOT_EXPR
258 && is_gimple_val (TREE_OPERAND (cond, 0)))
259 || is_gimple_val (cond));
260 ((struct bb_predicate *) bb->aux)->predicate = cond;
263 /* Returns the sequence of statements of the gimplification of the
264 predicate for basic block BB. */
266 static inline gimple_seq
267 bb_predicate_gimplified_stmts (basic_block bb)
269 return ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts;
272 /* Sets the sequence of statements STMTS of the gimplification of the
273 predicate for basic block BB. */
275 static inline void
276 set_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
278 ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts = stmts;
281 /* Adds the sequence of statements STMTS to the sequence of statements
282 of the predicate for basic block BB. */
284 static inline void
285 add_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
287 /* We might have updated some stmts in STMTS via force_gimple_operand
288 calling fold_stmt and that producing multiple stmts. Delink immediate
289 uses so update_ssa after loop versioning doesn't get confused for
290 the not yet inserted predicates.
291 ??? This should go away once we reliably avoid updating stmts
292 not in any BB. */
293 for (gimple_stmt_iterator gsi = gsi_start (stmts);
294 !gsi_end_p (gsi); gsi_next (&gsi))
296 gimple *stmt = gsi_stmt (gsi);
297 delink_stmt_imm_use (stmt);
298 gimple_set_modified (stmt, true);
300 gimple_seq_add_seq_without_update
301 (&(((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts), stmts);
304 /* Initializes to TRUE the predicate of basic block BB. */
306 static inline void
307 init_bb_predicate (basic_block bb)
309 bb->aux = XNEW (struct bb_predicate);
310 set_bb_predicate_gimplified_stmts (bb, NULL);
311 set_bb_predicate (bb, boolean_true_node);
314 /* Release the SSA_NAMEs associated with the predicate of basic block BB. */
316 static inline void
317 release_bb_predicate (basic_block bb)
319 gimple_seq stmts = bb_predicate_gimplified_stmts (bb);
320 if (stmts)
322 /* Ensure that these stmts haven't yet been added to a bb. */
323 if (flag_checking)
324 for (gimple_stmt_iterator i = gsi_start (stmts);
325 !gsi_end_p (i); gsi_next (&i))
326 gcc_assert (! gimple_bb (gsi_stmt (i)));
328 /* Discard them. */
329 gimple_seq_discard (stmts);
330 set_bb_predicate_gimplified_stmts (bb, NULL);
334 /* Free the predicate of basic block BB. */
336 static inline void
337 free_bb_predicate (basic_block bb)
339 if (!bb_has_predicate (bb))
340 return;
342 release_bb_predicate (bb);
343 free (bb->aux);
344 bb->aux = NULL;
347 /* Reinitialize predicate of BB with the true predicate. */
349 static inline void
350 reset_bb_predicate (basic_block bb)
352 if (!bb_has_predicate (bb))
353 init_bb_predicate (bb);
354 else
356 release_bb_predicate (bb);
357 set_bb_predicate (bb, boolean_true_node);
361 /* Returns a new SSA_NAME of type TYPE that is assigned the value of
362 the expression EXPR. Inserts the statement created for this
363 computation before GSI and leaves the iterator GSI at the same
364 statement. */
366 static tree
367 ifc_temp_var (tree type, tree expr, gimple_stmt_iterator *gsi)
369 tree new_name = make_temp_ssa_name (type, NULL, "_ifc_");
370 gimple *stmt = gimple_build_assign (new_name, expr);
371 gimple_set_vuse (stmt, gimple_vuse (gsi_stmt (*gsi)));
372 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
373 return new_name;
376 /* Return true when COND is a false predicate. */
378 static inline bool
379 is_false_predicate (tree cond)
381 return (cond != NULL_TREE
382 && (cond == boolean_false_node
383 || integer_zerop (cond)));
386 /* Return true when COND is a true predicate. */
388 static inline bool
389 is_true_predicate (tree cond)
391 return (cond == NULL_TREE
392 || cond == boolean_true_node
393 || integer_onep (cond));
396 /* Returns true when BB has a predicate that is not trivial: true or
397 NULL_TREE. */
399 static inline bool
400 is_predicated (basic_block bb)
402 return !is_true_predicate (bb_predicate (bb));
405 /* Parses the predicate COND and returns its comparison code and
406 operands OP0 and OP1. */
408 static enum tree_code
409 parse_predicate (tree cond, tree *op0, tree *op1)
411 gimple *s;
413 if (TREE_CODE (cond) == SSA_NAME
414 && is_gimple_assign (s = SSA_NAME_DEF_STMT (cond)))
416 if (TREE_CODE_CLASS (gimple_assign_rhs_code (s)) == tcc_comparison)
418 *op0 = gimple_assign_rhs1 (s);
419 *op1 = gimple_assign_rhs2 (s);
420 return gimple_assign_rhs_code (s);
423 else if (gimple_assign_rhs_code (s) == TRUTH_NOT_EXPR)
425 tree op = gimple_assign_rhs1 (s);
426 tree type = TREE_TYPE (op);
427 enum tree_code code = parse_predicate (op, op0, op1);
429 return code == ERROR_MARK ? ERROR_MARK
430 : invert_tree_comparison (code, HONOR_NANS (type));
433 return ERROR_MARK;
436 if (COMPARISON_CLASS_P (cond))
438 *op0 = TREE_OPERAND (cond, 0);
439 *op1 = TREE_OPERAND (cond, 1);
440 return TREE_CODE (cond);
443 return ERROR_MARK;
446 /* Returns the fold of predicate C1 OR C2 at location LOC. */
448 static tree
449 fold_or_predicates (location_t loc, tree c1, tree c2)
451 tree op1a, op1b, op2a, op2b;
452 enum tree_code code1 = parse_predicate (c1, &op1a, &op1b);
453 enum tree_code code2 = parse_predicate (c2, &op2a, &op2b);
455 if (code1 != ERROR_MARK && code2 != ERROR_MARK)
457 tree t = maybe_fold_or_comparisons (boolean_type_node, code1, op1a, op1b,
458 code2, op2a, op2b);
459 if (t)
460 return t;
463 return fold_build2_loc (loc, TRUTH_OR_EXPR, boolean_type_node, c1, c2);
466 /* Returns either a COND_EXPR or the folded expression if the folded
467 expression is a MIN_EXPR, a MAX_EXPR, an ABS_EXPR,
468 a constant or a SSA_NAME. */
470 static tree
471 fold_build_cond_expr (tree type, tree cond, tree rhs, tree lhs)
473 /* If COND is comparison r != 0 and r has boolean type, convert COND
474 to SSA_NAME to accept by vect bool pattern. */
475 if (TREE_CODE (cond) == NE_EXPR)
477 tree op0 = TREE_OPERAND (cond, 0);
478 tree op1 = TREE_OPERAND (cond, 1);
479 if (TREE_CODE (op0) == SSA_NAME
480 && TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE
481 && (integer_zerop (op1)))
482 cond = op0;
485 gimple_match_op cexpr (gimple_match_cond::UNCOND, COND_EXPR,
486 type, cond, rhs, lhs);
487 if (cexpr.resimplify (NULL, follow_all_ssa_edges))
489 if (gimple_simplified_result_is_gimple_val (&cexpr))
490 return cexpr.ops[0];
491 else if (cexpr.code == ABS_EXPR)
492 return build1 (ABS_EXPR, type, cexpr.ops[0]);
493 else if (cexpr.code == MIN_EXPR
494 || cexpr.code == MAX_EXPR)
495 return build2 ((tree_code)cexpr.code, type, cexpr.ops[0], cexpr.ops[1]);
498 return build3 (COND_EXPR, type, cond, rhs, lhs);
501 /* Add condition NC to the predicate list of basic block BB. LOOP is
502 the loop to be if-converted. Use predicate of cd-equivalent block
503 for join bb if it exists: we call basic blocks bb1 and bb2
504 cd-equivalent if they are executed under the same condition. */
506 static inline void
507 add_to_predicate_list (class loop *loop, basic_block bb, tree nc)
509 tree bc, *tp;
510 basic_block dom_bb;
512 if (is_true_predicate (nc))
513 return;
515 /* If dominance tells us this basic block is always executed,
516 don't record any predicates for it. */
517 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
518 return;
520 dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb);
521 /* We use notion of cd equivalence to get simpler predicate for
522 join block, e.g. if join block has 2 predecessors with predicates
523 p1 & p2 and p1 & !p2, we'd like to get p1 for it instead of
524 p1 & p2 | p1 & !p2. */
525 if (dom_bb != loop->header
526 && get_immediate_dominator (CDI_POST_DOMINATORS, dom_bb) == bb)
528 gcc_assert (flow_bb_inside_loop_p (loop, dom_bb));
529 bc = bb_predicate (dom_bb);
530 if (!is_true_predicate (bc))
531 set_bb_predicate (bb, bc);
532 else
533 gcc_assert (is_true_predicate (bb_predicate (bb)));
534 if (dump_file && (dump_flags & TDF_DETAILS))
535 fprintf (dump_file, "Use predicate of bb#%d for bb#%d\n",
536 dom_bb->index, bb->index);
537 return;
540 if (!is_predicated (bb))
541 bc = nc;
542 else
544 bc = bb_predicate (bb);
545 bc = fold_or_predicates (EXPR_LOCATION (bc), nc, bc);
546 if (is_true_predicate (bc))
548 reset_bb_predicate (bb);
549 return;
553 /* Allow a TRUTH_NOT_EXPR around the main predicate. */
554 if (TREE_CODE (bc) == TRUTH_NOT_EXPR)
555 tp = &TREE_OPERAND (bc, 0);
556 else
557 tp = &bc;
558 if (!is_gimple_val (*tp))
560 gimple_seq stmts;
561 *tp = force_gimple_operand (*tp, &stmts, true, NULL_TREE);
562 add_bb_predicate_gimplified_stmts (bb, stmts);
564 set_bb_predicate (bb, bc);
567 /* Add the condition COND to the previous condition PREV_COND, and add
568 this to the predicate list of the destination of edge E. LOOP is
569 the loop to be if-converted. */
571 static void
572 add_to_dst_predicate_list (class loop *loop, edge e,
573 tree prev_cond, tree cond)
575 if (!flow_bb_inside_loop_p (loop, e->dest))
576 return;
578 if (!is_true_predicate (prev_cond))
579 cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
580 prev_cond, cond);
582 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, e->dest))
583 add_to_predicate_list (loop, e->dest, cond);
586 /* Return true if one of the successor edges of BB exits LOOP. */
588 static bool
589 bb_with_exit_edge_p (class loop *loop, basic_block bb)
591 edge e;
592 edge_iterator ei;
594 FOR_EACH_EDGE (e, ei, bb->succs)
595 if (loop_exit_edge_p (loop, e))
596 return true;
598 return false;
601 /* Given PHI which has more than two arguments, this function checks if
602 it's if-convertible by degenerating its arguments. Specifically, if
603 below two conditions are satisfied:
605 1) Number of PHI arguments with different values equals to 2 and one
606 argument has the only occurrence.
607 2) The edge corresponding to the unique argument isn't critical edge.
609 Such PHI can be handled as PHIs have only two arguments. For example,
610 below PHI:
612 res = PHI <A_1(e1), A_1(e2), A_2(e3)>;
614 can be transformed into:
616 res = (predicate of e3) ? A_2 : A_1;
618 Return TRUE if it is the case, FALSE otherwise. */
620 static bool
621 phi_convertible_by_degenerating_args (gphi *phi)
623 edge e;
624 tree arg, t1 = NULL, t2 = NULL;
625 unsigned int i, i1 = 0, i2 = 0, n1 = 0, n2 = 0;
626 unsigned int num_args = gimple_phi_num_args (phi);
628 gcc_assert (num_args > 2);
630 for (i = 0; i < num_args; i++)
632 arg = gimple_phi_arg_def (phi, i);
633 if (t1 == NULL || operand_equal_p (t1, arg, 0))
635 n1++;
636 i1 = i;
637 t1 = arg;
639 else if (t2 == NULL || operand_equal_p (t2, arg, 0))
641 n2++;
642 i2 = i;
643 t2 = arg;
645 else
646 return false;
649 if (n1 != 1 && n2 != 1)
650 return false;
652 /* Check if the edge corresponding to the unique arg is critical. */
653 e = gimple_phi_arg_edge (phi, (n1 == 1) ? i1 : i2);
654 if (EDGE_COUNT (e->src->succs) > 1)
655 return false;
657 return true;
660 /* Return true when PHI is if-convertible. PHI is part of loop LOOP
661 and it belongs to basic block BB. Note at this point, it is sure
662 that PHI is if-convertible. This function updates global variable
663 ANY_COMPLICATED_PHI if PHI is complicated. */
665 static bool
666 if_convertible_phi_p (class loop *loop, basic_block bb, gphi *phi)
668 if (dump_file && (dump_flags & TDF_DETAILS))
670 fprintf (dump_file, "-------------------------\n");
671 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
674 if (bb != loop->header
675 && gimple_phi_num_args (phi) > 2
676 && !phi_convertible_by_degenerating_args (phi))
677 any_complicated_phi = true;
679 return true;
682 /* Records the status of a data reference. This struct is attached to
683 each DR->aux field. */
685 struct ifc_dr {
686 bool rw_unconditionally;
687 bool w_unconditionally;
688 bool written_at_least_once;
690 tree rw_predicate;
691 tree w_predicate;
692 tree base_w_predicate;
695 #define IFC_DR(DR) ((struct ifc_dr *) (DR)->aux)
696 #define DR_BASE_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->written_at_least_once)
697 #define DR_RW_UNCONDITIONALLY(DR) (IFC_DR (DR)->rw_unconditionally)
698 #define DR_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->w_unconditionally)
700 /* Iterates over DR's and stores refs, DR and base refs, DR pairs in
701 HASH tables. While storing them in HASH table, it checks if the
702 reference is unconditionally read or written and stores that as a flag
703 information. For base reference it checks if it is written atlest once
704 unconditionally and stores it as flag information along with DR.
705 In other words for every data reference A in STMT there exist other
706 accesses to a data reference with the same base with predicates that
707 add up (OR-up) to the true predicate: this ensures that the data
708 reference A is touched (read or written) on every iteration of the
709 if-converted loop. */
710 static void
711 hash_memrefs_baserefs_and_store_DRs_read_written_info (data_reference_p a)
714 data_reference_p *master_dr, *base_master_dr;
715 tree base_ref = DR_BASE_OBJECT (a);
716 innermost_loop_behavior *innermost = &DR_INNERMOST (a);
717 tree ca = bb_predicate (gimple_bb (DR_STMT (a)));
718 bool exist1, exist2;
720 master_dr = &innermost_DR_map->get_or_insert (innermost, &exist1);
721 if (!exist1)
722 *master_dr = a;
724 if (DR_IS_WRITE (a))
726 IFC_DR (*master_dr)->w_predicate
727 = fold_or_predicates (UNKNOWN_LOCATION, ca,
728 IFC_DR (*master_dr)->w_predicate);
729 if (is_true_predicate (IFC_DR (*master_dr)->w_predicate))
730 DR_W_UNCONDITIONALLY (*master_dr) = true;
732 IFC_DR (*master_dr)->rw_predicate
733 = fold_or_predicates (UNKNOWN_LOCATION, ca,
734 IFC_DR (*master_dr)->rw_predicate);
735 if (is_true_predicate (IFC_DR (*master_dr)->rw_predicate))
736 DR_RW_UNCONDITIONALLY (*master_dr) = true;
738 if (DR_IS_WRITE (a))
740 base_master_dr = &baseref_DR_map->get_or_insert (base_ref, &exist2);
741 if (!exist2)
742 *base_master_dr = a;
743 IFC_DR (*base_master_dr)->base_w_predicate
744 = fold_or_predicates (UNKNOWN_LOCATION, ca,
745 IFC_DR (*base_master_dr)->base_w_predicate);
746 if (is_true_predicate (IFC_DR (*base_master_dr)->base_w_predicate))
747 DR_BASE_W_UNCONDITIONALLY (*base_master_dr) = true;
751 /* Return TRUE if can prove the index IDX of an array reference REF is
752 within array bound. Return false otherwise. */
754 static bool
755 idx_within_array_bound (tree ref, tree *idx, void *dta)
757 wi::overflow_type overflow;
758 widest_int niter, valid_niter, delta, wi_step;
759 tree ev, init, step;
760 tree low, high;
761 class loop *loop = (class loop*) dta;
763 /* Only support within-bound access for array references. */
764 if (TREE_CODE (ref) != ARRAY_REF)
765 return false;
767 /* For arrays that might have flexible sizes, it is not guaranteed that they
768 do not extend over their declared size. */
769 if (array_ref_flexible_size_p (ref))
770 return false;
772 ev = analyze_scalar_evolution (loop, *idx);
773 ev = instantiate_parameters (loop, ev);
774 init = initial_condition (ev);
775 step = evolution_part_in_loop_num (ev, loop->num);
777 if (!init || TREE_CODE (init) != INTEGER_CST
778 || (step && TREE_CODE (step) != INTEGER_CST))
779 return false;
781 low = array_ref_low_bound (ref);
782 high = array_ref_up_bound (ref);
784 /* The case of nonconstant bounds could be handled, but it would be
785 complicated. */
786 if (TREE_CODE (low) != INTEGER_CST
787 || !high || TREE_CODE (high) != INTEGER_CST)
788 return false;
790 /* Check if the intial idx is within bound. */
791 if (wi::to_widest (init) < wi::to_widest (low)
792 || wi::to_widest (init) > wi::to_widest (high))
793 return false;
795 /* The idx is always within bound. */
796 if (!step || integer_zerop (step))
797 return true;
799 if (!max_loop_iterations (loop, &niter))
800 return false;
802 if (wi::to_widest (step) < 0)
804 delta = wi::to_widest (init) - wi::to_widest (low);
805 wi_step = -wi::to_widest (step);
807 else
809 delta = wi::to_widest (high) - wi::to_widest (init);
810 wi_step = wi::to_widest (step);
813 valid_niter = wi::div_floor (delta, wi_step, SIGNED, &overflow);
814 /* The iteration space of idx is within array bound. */
815 if (!overflow && niter <= valid_niter)
816 return true;
818 return false;
821 /* Return TRUE if ref is a within bound array reference. */
823 static bool
824 ref_within_array_bound (gimple *stmt, tree ref)
826 class loop *loop = loop_containing_stmt (stmt);
828 gcc_assert (loop != NULL);
829 return for_each_index (&ref, idx_within_array_bound, loop);
833 /* Given a memory reference expression T, return TRUE if base object
834 it refers to is writable. The base object of a memory reference
835 is the main object being referenced, which is returned by function
836 get_base_address. */
838 static bool
839 base_object_writable (tree ref)
841 tree base_tree = get_base_address (ref);
843 return (base_tree
844 && DECL_P (base_tree)
845 && decl_binds_to_current_def_p (base_tree)
846 && !TREE_READONLY (base_tree));
849 /* Return true when the memory references of STMT won't trap in the
850 if-converted code. There are two things that we have to check for:
852 - writes to memory occur to writable memory: if-conversion of
853 memory writes transforms the conditional memory writes into
854 unconditional writes, i.e. "if (cond) A[i] = foo" is transformed
855 into "A[i] = cond ? foo : A[i]", and as the write to memory may not
856 be executed at all in the original code, it may be a readonly
857 memory. To check that A is not const-qualified, we check that
858 there exists at least an unconditional write to A in the current
859 function.
861 - reads or writes to memory are valid memory accesses for every
862 iteration. To check that the memory accesses are correctly formed
863 and that we are allowed to read and write in these locations, we
864 check that the memory accesses to be if-converted occur at every
865 iteration unconditionally.
867 Returns true for the memory reference in STMT, same memory reference
868 is read or written unconditionally atleast once and the base memory
869 reference is written unconditionally once. This is to check reference
870 will not write fault. Also retuns true if the memory reference is
871 unconditionally read once then we are conditionally writing to memory
872 which is defined as read and write and is bound to the definition
873 we are seeing. */
874 static bool
875 ifcvt_memrefs_wont_trap (gimple *stmt, vec<data_reference_p> drs)
877 /* If DR didn't see a reference here we can't use it to tell
878 whether the ref traps or not. */
879 if (gimple_uid (stmt) == 0)
880 return false;
882 data_reference_p *master_dr, *base_master_dr;
883 data_reference_p a = drs[gimple_uid (stmt) - 1];
885 tree base = DR_BASE_OBJECT (a);
886 innermost_loop_behavior *innermost = &DR_INNERMOST (a);
888 gcc_assert (DR_STMT (a) == stmt);
889 gcc_assert (DR_BASE_ADDRESS (a) || DR_OFFSET (a)
890 || DR_INIT (a) || DR_STEP (a));
892 master_dr = innermost_DR_map->get (innermost);
893 gcc_assert (master_dr != NULL);
895 base_master_dr = baseref_DR_map->get (base);
897 /* If a is unconditionally written to it doesn't trap. */
898 if (DR_W_UNCONDITIONALLY (*master_dr))
899 return true;
901 /* If a is unconditionally accessed then ...
903 Even a is conditional access, we can treat it as an unconditional
904 one if it's an array reference and all its index are within array
905 bound. */
906 if (DR_RW_UNCONDITIONALLY (*master_dr)
907 || ref_within_array_bound (stmt, DR_REF (a)))
909 /* an unconditional read won't trap. */
910 if (DR_IS_READ (a))
911 return true;
913 /* an unconditionaly write won't trap if the base is written
914 to unconditionally. */
915 if (base_master_dr
916 && DR_BASE_W_UNCONDITIONALLY (*base_master_dr))
917 return flag_store_data_races;
918 /* or the base is known to be not readonly. */
919 else if (base_object_writable (DR_REF (a)))
920 return flag_store_data_races;
923 return false;
926 /* Return true if STMT could be converted into a masked load or store
927 (conditional load or store based on a mask computed from bb predicate). */
929 static bool
930 ifcvt_can_use_mask_load_store (gimple *stmt)
932 /* Check whether this is a load or store. */
933 tree lhs = gimple_assign_lhs (stmt);
934 bool is_load;
935 tree ref;
936 if (gimple_store_p (stmt))
938 if (!is_gimple_val (gimple_assign_rhs1 (stmt)))
939 return false;
940 is_load = false;
941 ref = lhs;
943 else if (gimple_assign_load_p (stmt))
945 is_load = true;
946 ref = gimple_assign_rhs1 (stmt);
948 else
949 return false;
951 if (may_be_nonaddressable_p (ref))
952 return false;
954 /* Mask should be integer mode of the same size as the load/store
955 mode. */
956 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
957 if (!int_mode_for_mode (mode).exists () || VECTOR_MODE_P (mode))
958 return false;
960 if (can_vec_mask_load_store_p (mode, VOIDmode, is_load))
961 return true;
963 return false;
966 /* Return true if STMT could be converted from an operation that is
967 unconditional to one that is conditional on a bb predicate mask. */
969 static bool
970 ifcvt_can_predicate (gimple *stmt)
972 basic_block bb = gimple_bb (stmt);
974 if (!(flag_tree_loop_vectorize || bb->loop_father->force_vectorize)
975 || bb->loop_father->dont_vectorize
976 || gimple_has_volatile_ops (stmt))
977 return false;
979 if (gimple_assign_single_p (stmt))
980 return ifcvt_can_use_mask_load_store (stmt);
982 tree_code code = gimple_assign_rhs_code (stmt);
983 tree lhs_type = TREE_TYPE (gimple_assign_lhs (stmt));
984 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
985 if (!types_compatible_p (lhs_type, rhs_type))
986 return false;
987 internal_fn cond_fn = get_conditional_internal_fn (code);
988 return (cond_fn != IFN_LAST
989 && vectorized_internal_fn_supported_p (cond_fn, lhs_type));
992 /* Return true when STMT is if-convertible.
994 GIMPLE_ASSIGN statement is not if-convertible if,
995 - it is not movable,
996 - it could trap,
997 - LHS is not var decl. */
999 static bool
1000 if_convertible_gimple_assign_stmt_p (gimple *stmt,
1001 vec<data_reference_p> refs)
1003 tree lhs = gimple_assign_lhs (stmt);
1005 if (dump_file && (dump_flags & TDF_DETAILS))
1007 fprintf (dump_file, "-------------------------\n");
1008 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1011 if (!is_gimple_reg_type (TREE_TYPE (lhs)))
1012 return false;
1014 /* Some of these constrains might be too conservative. */
1015 if (stmt_ends_bb_p (stmt)
1016 || gimple_has_volatile_ops (stmt)
1017 || (TREE_CODE (lhs) == SSA_NAME
1018 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
1019 || gimple_has_side_effects (stmt))
1021 if (dump_file && (dump_flags & TDF_DETAILS))
1022 fprintf (dump_file, "stmt not suitable for ifcvt\n");
1023 return false;
1026 /* tree-into-ssa.cc uses GF_PLF_1, so avoid it, because
1027 in between if_convertible_loop_p and combine_blocks
1028 we can perform loop versioning. */
1029 gimple_set_plf (stmt, GF_PLF_2, false);
1031 if ((! gimple_vuse (stmt)
1032 || gimple_could_trap_p_1 (stmt, false, false)
1033 || ! ifcvt_memrefs_wont_trap (stmt, refs))
1034 && gimple_could_trap_p (stmt))
1036 if (ifcvt_can_predicate (stmt))
1038 gimple_set_plf (stmt, GF_PLF_2, true);
1039 need_to_predicate = true;
1040 return true;
1042 if (dump_file && (dump_flags & TDF_DETAILS))
1043 fprintf (dump_file, "tree could trap...\n");
1044 return false;
1046 else if ((INTEGRAL_TYPE_P (TREE_TYPE (lhs))
1047 || POINTER_TYPE_P (TREE_TYPE (lhs)))
1048 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs))
1049 && arith_code_with_undefined_signed_overflow
1050 (gimple_assign_rhs_code (stmt)))
1051 /* We have to rewrite stmts with undefined overflow. */
1052 need_to_rewrite_undefined = true;
1054 /* When if-converting stores force versioning, likewise if we
1055 ended up generating store data races. */
1056 if (gimple_vdef (stmt))
1057 need_to_predicate = true;
1059 return true;
1062 /* Return true when STMT is if-convertible.
1064 A statement is if-convertible if:
1065 - it is an if-convertible GIMPLE_ASSIGN,
1066 - it is a GIMPLE_LABEL or a GIMPLE_COND,
1067 - it is builtins call,
1068 - it is a call to a function with a SIMD clone. */
1070 static bool
1071 if_convertible_stmt_p (gimple *stmt, vec<data_reference_p> refs)
1073 switch (gimple_code (stmt))
1075 case GIMPLE_LABEL:
1076 case GIMPLE_DEBUG:
1077 case GIMPLE_COND:
1078 return true;
1080 case GIMPLE_ASSIGN:
1081 return if_convertible_gimple_assign_stmt_p (stmt, refs);
1083 case GIMPLE_CALL:
1085 tree fndecl = gimple_call_fndecl (stmt);
1086 if (fndecl)
1088 /* We can vectorize some builtins and functions with SIMD
1089 "inbranch" clones. */
1090 int flags = gimple_call_flags (stmt);
1091 struct cgraph_node *node = cgraph_node::get (fndecl);
1092 if ((flags & ECF_CONST)
1093 && !(flags & ECF_LOOPING_CONST_OR_PURE)
1094 && fndecl_built_in_p (fndecl))
1095 return true;
1096 if (node && node->simd_clones != NULL)
1097 /* Ensure that at least one clone can be "inbranch". */
1098 for (struct cgraph_node *n = node->simd_clones; n != NULL;
1099 n = n->simdclone->next_clone)
1100 if (n->simdclone->inbranch)
1102 gimple_set_plf (stmt, GF_PLF_2, true);
1103 need_to_predicate = true;
1104 return true;
1107 return false;
1110 default:
1111 /* Don't know what to do with 'em so don't do anything. */
1112 if (dump_file && (dump_flags & TDF_DETAILS))
1114 fprintf (dump_file, "don't know what to do\n");
1115 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1117 return false;
1121 /* Assumes that BB has more than 1 predecessors.
1122 Returns false if at least one successor is not on critical edge
1123 and true otherwise. */
1125 static inline bool
1126 all_preds_critical_p (basic_block bb)
1128 edge e;
1129 edge_iterator ei;
1131 FOR_EACH_EDGE (e, ei, bb->preds)
1132 if (EDGE_COUNT (e->src->succs) == 1)
1133 return false;
1134 return true;
1137 /* Return true when BB is if-convertible. This routine does not check
1138 basic block's statements and phis.
1140 A basic block is not if-convertible if:
1141 - it is non-empty and it is after the exit block (in BFS order),
1142 - it is after the exit block but before the latch,
1143 - its edges are not normal.
1145 EXIT_BB is the basic block containing the exit of the LOOP. BB is
1146 inside LOOP. */
1148 static bool
1149 if_convertible_bb_p (class loop *loop, basic_block bb, basic_block exit_bb)
1151 edge e;
1152 edge_iterator ei;
1154 if (dump_file && (dump_flags & TDF_DETAILS))
1155 fprintf (dump_file, "----------[%d]-------------\n", bb->index);
1157 if (EDGE_COUNT (bb->succs) > 2)
1158 return false;
1160 gimple *last = last_stmt (bb);
1161 if (gcall *call = safe_dyn_cast <gcall *> (last))
1162 if (gimple_call_ctrl_altering_p (call))
1163 return false;
1165 if (exit_bb)
1167 if (bb != loop->latch)
1169 if (dump_file && (dump_flags & TDF_DETAILS))
1170 fprintf (dump_file, "basic block after exit bb but before latch\n");
1171 return false;
1173 else if (!empty_block_p (bb))
1175 if (dump_file && (dump_flags & TDF_DETAILS))
1176 fprintf (dump_file, "non empty basic block after exit bb\n");
1177 return false;
1179 else if (bb == loop->latch
1180 && bb != exit_bb
1181 && !dominated_by_p (CDI_DOMINATORS, bb, exit_bb))
1183 if (dump_file && (dump_flags & TDF_DETAILS))
1184 fprintf (dump_file, "latch is not dominated by exit_block\n");
1185 return false;
1189 /* Be less adventurous and handle only normal edges. */
1190 FOR_EACH_EDGE (e, ei, bb->succs)
1191 if (e->flags & (EDGE_EH | EDGE_ABNORMAL | EDGE_IRREDUCIBLE_LOOP))
1193 if (dump_file && (dump_flags & TDF_DETAILS))
1194 fprintf (dump_file, "Difficult to handle edges\n");
1195 return false;
1198 return true;
1201 /* Return true when all predecessor blocks of BB are visited. The
1202 VISITED bitmap keeps track of the visited blocks. */
1204 static bool
1205 pred_blocks_visited_p (basic_block bb, bitmap *visited)
1207 edge e;
1208 edge_iterator ei;
1209 FOR_EACH_EDGE (e, ei, bb->preds)
1210 if (!bitmap_bit_p (*visited, e->src->index))
1211 return false;
1213 return true;
1216 /* Get body of a LOOP in suitable order for if-conversion. It is
1217 caller's responsibility to deallocate basic block list.
1218 If-conversion suitable order is, breadth first sort (BFS) order
1219 with an additional constraint: select a block only if all its
1220 predecessors are already selected. */
1222 static basic_block *
1223 get_loop_body_in_if_conv_order (const class loop *loop)
1225 basic_block *blocks, *blocks_in_bfs_order;
1226 basic_block bb;
1227 bitmap visited;
1228 unsigned int index = 0;
1229 unsigned int visited_count = 0;
1231 gcc_assert (loop->num_nodes);
1232 gcc_assert (loop->latch != EXIT_BLOCK_PTR_FOR_FN (cfun));
1234 blocks = XCNEWVEC (basic_block, loop->num_nodes);
1235 visited = BITMAP_ALLOC (NULL);
1237 blocks_in_bfs_order = get_loop_body_in_bfs_order (loop);
1239 index = 0;
1240 while (index < loop->num_nodes)
1242 bb = blocks_in_bfs_order [index];
1244 if (bb->flags & BB_IRREDUCIBLE_LOOP)
1246 free (blocks_in_bfs_order);
1247 BITMAP_FREE (visited);
1248 free (blocks);
1249 return NULL;
1252 if (!bitmap_bit_p (visited, bb->index))
1254 if (pred_blocks_visited_p (bb, &visited)
1255 || bb == loop->header)
1257 /* This block is now visited. */
1258 bitmap_set_bit (visited, bb->index);
1259 blocks[visited_count++] = bb;
1263 index++;
1265 if (index == loop->num_nodes
1266 && visited_count != loop->num_nodes)
1267 /* Not done yet. */
1268 index = 0;
1270 free (blocks_in_bfs_order);
1271 BITMAP_FREE (visited);
1272 return blocks;
1275 /* Returns true when the analysis of the predicates for all the basic
1276 blocks in LOOP succeeded.
1278 predicate_bbs first allocates the predicates of the basic blocks.
1279 These fields are then initialized with the tree expressions
1280 representing the predicates under which a basic block is executed
1281 in the LOOP. As the loop->header is executed at each iteration, it
1282 has the "true" predicate. Other statements executed under a
1283 condition are predicated with that condition, for example
1285 | if (x)
1286 | S1;
1287 | else
1288 | S2;
1290 S1 will be predicated with "x", and
1291 S2 will be predicated with "!x". */
1293 static void
1294 predicate_bbs (loop_p loop)
1296 unsigned int i;
1298 for (i = 0; i < loop->num_nodes; i++)
1299 init_bb_predicate (ifc_bbs[i]);
1301 for (i = 0; i < loop->num_nodes; i++)
1303 basic_block bb = ifc_bbs[i];
1304 tree cond;
1305 gimple *stmt;
1307 /* The loop latch and loop exit block are always executed and
1308 have no extra conditions to be processed: skip them. */
1309 if (bb == loop->latch
1310 || bb_with_exit_edge_p (loop, bb))
1312 reset_bb_predicate (bb);
1313 continue;
1316 cond = bb_predicate (bb);
1317 stmt = last_stmt (bb);
1318 if (stmt && gimple_code (stmt) == GIMPLE_COND)
1320 tree c2;
1321 edge true_edge, false_edge;
1322 location_t loc = gimple_location (stmt);
1323 tree c;
1324 /* gcc.dg/fold-bopcond-1.c shows that despite all forwprop passes
1325 conditions can remain unfolded because of multiple uses so
1326 try to re-fold here, especially to get precision changing
1327 conversions sorted out. Do not simply fold the stmt since
1328 this is analysis only. When conditions were embedded in
1329 COND_EXPRs those were folded separately before folding the
1330 COND_EXPR but as they are now outside we have to make sure
1331 to fold them. Do it here - another opportunity would be to
1332 fold predicates as they are inserted. */
1333 gimple_match_op cexpr (gimple_match_cond::UNCOND,
1334 gimple_cond_code (stmt),
1335 boolean_type_node,
1336 gimple_cond_lhs (stmt),
1337 gimple_cond_rhs (stmt));
1338 if (cexpr.resimplify (NULL, follow_all_ssa_edges)
1339 && cexpr.code.is_tree_code ()
1340 && TREE_CODE_CLASS ((tree_code)cexpr.code) == tcc_comparison)
1341 c = build2_loc (loc, (tree_code)cexpr.code, boolean_type_node,
1342 cexpr.ops[0], cexpr.ops[1]);
1343 else
1344 c = build2_loc (loc, gimple_cond_code (stmt),
1345 boolean_type_node,
1346 gimple_cond_lhs (stmt),
1347 gimple_cond_rhs (stmt));
1349 /* Add new condition into destination's predicate list. */
1350 extract_true_false_edges_from_block (gimple_bb (stmt),
1351 &true_edge, &false_edge);
1353 /* If C is true, then TRUE_EDGE is taken. */
1354 add_to_dst_predicate_list (loop, true_edge, unshare_expr (cond),
1355 unshare_expr (c));
1357 /* If C is false, then FALSE_EDGE is taken. */
1358 c2 = build1_loc (loc, TRUTH_NOT_EXPR, boolean_type_node,
1359 unshare_expr (c));
1360 add_to_dst_predicate_list (loop, false_edge,
1361 unshare_expr (cond), c2);
1363 cond = NULL_TREE;
1366 /* If current bb has only one successor, then consider it as an
1367 unconditional goto. */
1368 if (single_succ_p (bb))
1370 basic_block bb_n = single_succ (bb);
1372 /* The successor bb inherits the predicate of its
1373 predecessor. If there is no predicate in the predecessor
1374 bb, then consider the successor bb as always executed. */
1375 if (cond == NULL_TREE)
1376 cond = boolean_true_node;
1378 add_to_predicate_list (loop, bb_n, cond);
1382 /* The loop header is always executed. */
1383 reset_bb_predicate (loop->header);
1384 gcc_assert (bb_predicate_gimplified_stmts (loop->header) == NULL
1385 && bb_predicate_gimplified_stmts (loop->latch) == NULL);
1388 /* Build region by adding loop pre-header and post-header blocks. */
1390 static vec<basic_block>
1391 build_region (class loop *loop)
1393 vec<basic_block> region = vNULL;
1394 basic_block exit_bb = NULL;
1396 gcc_assert (ifc_bbs);
1397 /* The first element is loop pre-header. */
1398 region.safe_push (loop_preheader_edge (loop)->src);
1400 for (unsigned int i = 0; i < loop->num_nodes; i++)
1402 basic_block bb = ifc_bbs[i];
1403 region.safe_push (bb);
1404 /* Find loop postheader. */
1405 edge e;
1406 edge_iterator ei;
1407 FOR_EACH_EDGE (e, ei, bb->succs)
1408 if (loop_exit_edge_p (loop, e))
1410 exit_bb = e->dest;
1411 break;
1414 /* The last element is loop post-header. */
1415 gcc_assert (exit_bb);
1416 region.safe_push (exit_bb);
1417 return region;
1420 /* Return true when LOOP is if-convertible. This is a helper function
1421 for if_convertible_loop_p. REFS and DDRS are initialized and freed
1422 in if_convertible_loop_p. */
1424 static bool
1425 if_convertible_loop_p_1 (class loop *loop, vec<data_reference_p> *refs)
1427 unsigned int i;
1428 basic_block exit_bb = NULL;
1429 vec<basic_block> region;
1431 calculate_dominance_info (CDI_DOMINATORS);
1433 for (i = 0; i < loop->num_nodes; i++)
1435 basic_block bb = ifc_bbs[i];
1437 if (!if_convertible_bb_p (loop, bb, exit_bb))
1438 return false;
1440 if (bb_with_exit_edge_p (loop, bb))
1441 exit_bb = bb;
1444 for (i = 0; i < loop->num_nodes; i++)
1446 basic_block bb = ifc_bbs[i];
1447 gimple_stmt_iterator gsi;
1449 bool may_have_nonlocal_labels
1450 = bb_with_exit_edge_p (loop, bb) || bb == loop->latch;
1451 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1452 switch (gimple_code (gsi_stmt (gsi)))
1454 case GIMPLE_LABEL:
1455 if (!may_have_nonlocal_labels)
1457 tree label
1458 = gimple_label_label (as_a <glabel *> (gsi_stmt (gsi)));
1459 if (DECL_NONLOCAL (label) || FORCED_LABEL (label))
1460 return false;
1462 /* Fallthru. */
1463 case GIMPLE_ASSIGN:
1464 case GIMPLE_CALL:
1465 case GIMPLE_DEBUG:
1466 case GIMPLE_COND:
1467 gimple_set_uid (gsi_stmt (gsi), 0);
1468 break;
1469 default:
1470 return false;
1474 data_reference_p dr;
1476 innermost_DR_map
1477 = new hash_map<innermost_loop_behavior_hash, data_reference_p>;
1478 baseref_DR_map = new hash_map<tree_operand_hash, data_reference_p>;
1480 /* Compute post-dominator tree locally. */
1481 region = build_region (loop);
1482 calculate_dominance_info_for_region (CDI_POST_DOMINATORS, region);
1484 predicate_bbs (loop);
1486 /* Free post-dominator tree since it is not used after predication. */
1487 free_dominance_info_for_region (cfun, CDI_POST_DOMINATORS, region);
1488 region.release ();
1490 for (i = 0; refs->iterate (i, &dr); i++)
1492 tree ref = DR_REF (dr);
1494 dr->aux = XNEW (struct ifc_dr);
1495 DR_BASE_W_UNCONDITIONALLY (dr) = false;
1496 DR_RW_UNCONDITIONALLY (dr) = false;
1497 DR_W_UNCONDITIONALLY (dr) = false;
1498 IFC_DR (dr)->rw_predicate = boolean_false_node;
1499 IFC_DR (dr)->w_predicate = boolean_false_node;
1500 IFC_DR (dr)->base_w_predicate = boolean_false_node;
1501 if (gimple_uid (DR_STMT (dr)) == 0)
1502 gimple_set_uid (DR_STMT (dr), i + 1);
1504 /* If DR doesn't have innermost loop behavior or it's a compound
1505 memory reference, we synthesize its innermost loop behavior
1506 for hashing. */
1507 if (TREE_CODE (ref) == COMPONENT_REF
1508 || TREE_CODE (ref) == IMAGPART_EXPR
1509 || TREE_CODE (ref) == REALPART_EXPR
1510 || !(DR_BASE_ADDRESS (dr) || DR_OFFSET (dr)
1511 || DR_INIT (dr) || DR_STEP (dr)))
1513 while (TREE_CODE (ref) == COMPONENT_REF
1514 || TREE_CODE (ref) == IMAGPART_EXPR
1515 || TREE_CODE (ref) == REALPART_EXPR)
1516 ref = TREE_OPERAND (ref, 0);
1518 memset (&DR_INNERMOST (dr), 0, sizeof (DR_INNERMOST (dr)));
1519 DR_BASE_ADDRESS (dr) = ref;
1521 hash_memrefs_baserefs_and_store_DRs_read_written_info (dr);
1524 for (i = 0; i < loop->num_nodes; i++)
1526 basic_block bb = ifc_bbs[i];
1527 gimple_stmt_iterator itr;
1529 /* Check the if-convertibility of statements in predicated BBs. */
1530 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1531 for (itr = gsi_start_bb (bb); !gsi_end_p (itr); gsi_next (&itr))
1532 if (!if_convertible_stmt_p (gsi_stmt (itr), *refs))
1533 return false;
1536 /* Checking PHIs needs to be done after stmts, as the fact whether there
1537 are any masked loads or stores affects the tests. */
1538 for (i = 0; i < loop->num_nodes; i++)
1540 basic_block bb = ifc_bbs[i];
1541 gphi_iterator itr;
1543 for (itr = gsi_start_phis (bb); !gsi_end_p (itr); gsi_next (&itr))
1544 if (!if_convertible_phi_p (loop, bb, itr.phi ()))
1545 return false;
1548 if (dump_file)
1549 fprintf (dump_file, "Applying if-conversion\n");
1551 return true;
1554 /* Return true when LOOP is if-convertible.
1555 LOOP is if-convertible if:
1556 - it is innermost,
1557 - it has two or more basic blocks,
1558 - it has only one exit,
1559 - loop header is not the exit edge,
1560 - if its basic blocks and phi nodes are if convertible. */
1562 static bool
1563 if_convertible_loop_p (class loop *loop, vec<data_reference_p> *refs)
1565 edge e;
1566 edge_iterator ei;
1567 bool res = false;
1569 /* Handle only innermost loop. */
1570 if (!loop || loop->inner)
1572 if (dump_file && (dump_flags & TDF_DETAILS))
1573 fprintf (dump_file, "not innermost loop\n");
1574 return false;
1577 /* If only one block, no need for if-conversion. */
1578 if (loop->num_nodes <= 2)
1580 if (dump_file && (dump_flags & TDF_DETAILS))
1581 fprintf (dump_file, "less than 2 basic blocks\n");
1582 return false;
1585 /* More than one loop exit is too much to handle. */
1586 if (!single_exit (loop))
1588 if (dump_file && (dump_flags & TDF_DETAILS))
1589 fprintf (dump_file, "multiple exits\n");
1590 return false;
1593 /* If one of the loop header's edge is an exit edge then do not
1594 apply if-conversion. */
1595 FOR_EACH_EDGE (e, ei, loop->header->succs)
1596 if (loop_exit_edge_p (loop, e))
1597 return false;
1599 res = if_convertible_loop_p_1 (loop, refs);
1601 delete innermost_DR_map;
1602 innermost_DR_map = NULL;
1604 delete baseref_DR_map;
1605 baseref_DR_map = NULL;
1607 return res;
1610 /* Return reduc_1 if has_nop.
1612 if (...)
1613 tmp1 = (unsigned type) reduc_1;
1614 tmp2 = tmp1 + rhs2;
1615 reduc_3 = (signed type) tmp2. */
1616 static tree
1617 strip_nop_cond_scalar_reduction (bool has_nop, tree op)
1619 if (!has_nop)
1620 return op;
1622 if (TREE_CODE (op) != SSA_NAME)
1623 return NULL_TREE;
1625 gassign *stmt = safe_dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op));
1626 if (!stmt
1627 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt))
1628 || !tree_nop_conversion_p (TREE_TYPE (op), TREE_TYPE
1629 (gimple_assign_rhs1 (stmt))))
1630 return NULL_TREE;
1632 return gimple_assign_rhs1 (stmt);
1635 /* Returns true if def-stmt for phi argument ARG is simple increment/decrement
1636 which is in predicated basic block.
1637 In fact, the following PHI pattern is searching:
1638 loop-header:
1639 reduc_1 = PHI <..., reduc_2>
1641 if (...)
1642 reduc_3 = ...
1643 reduc_2 = PHI <reduc_1, reduc_3>
1645 ARG_0 and ARG_1 are correspondent PHI arguments.
1646 REDUC, OP0 and OP1 contain reduction stmt and its operands.
1647 EXTENDED is true if PHI has > 2 arguments. */
1649 static bool
1650 is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1,
1651 tree *op0, tree *op1, bool extended, bool* has_nop,
1652 gimple **nop_reduc)
1654 tree lhs, r_op1, r_op2, r_nop1, r_nop2;
1655 gimple *stmt;
1656 gimple *header_phi = NULL;
1657 enum tree_code reduction_op;
1658 basic_block bb = gimple_bb (phi);
1659 class loop *loop = bb->loop_father;
1660 edge latch_e = loop_latch_edge (loop);
1661 imm_use_iterator imm_iter;
1662 use_operand_p use_p;
1663 edge e;
1664 edge_iterator ei;
1665 bool result = *has_nop = false;
1666 if (TREE_CODE (arg_0) != SSA_NAME || TREE_CODE (arg_1) != SSA_NAME)
1667 return false;
1669 if (!extended && gimple_code (SSA_NAME_DEF_STMT (arg_0)) == GIMPLE_PHI)
1671 lhs = arg_1;
1672 header_phi = SSA_NAME_DEF_STMT (arg_0);
1673 stmt = SSA_NAME_DEF_STMT (arg_1);
1675 else if (gimple_code (SSA_NAME_DEF_STMT (arg_1)) == GIMPLE_PHI)
1677 lhs = arg_0;
1678 header_phi = SSA_NAME_DEF_STMT (arg_1);
1679 stmt = SSA_NAME_DEF_STMT (arg_0);
1681 else
1682 return false;
1683 if (gimple_bb (header_phi) != loop->header)
1684 return false;
1686 if (PHI_ARG_DEF_FROM_EDGE (header_phi, latch_e) != PHI_RESULT (phi))
1687 return false;
1689 if (gimple_code (stmt) != GIMPLE_ASSIGN
1690 || gimple_has_volatile_ops (stmt))
1691 return false;
1693 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
1694 return false;
1696 if (!is_predicated (gimple_bb (stmt)))
1697 return false;
1699 /* Check that stmt-block is predecessor of phi-block. */
1700 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->succs)
1701 if (e->dest == bb)
1703 result = true;
1704 break;
1706 if (!result)
1707 return false;
1709 if (!has_single_use (lhs))
1710 return false;
1712 reduction_op = gimple_assign_rhs_code (stmt);
1714 /* Catch something like below
1716 loop-header:
1717 reduc_1 = PHI <..., reduc_2>
1719 if (...)
1720 tmp1 = (unsigned type) reduc_1;
1721 tmp2 = tmp1 + rhs2;
1722 reduc_3 = (signed type) tmp2;
1724 reduc_2 = PHI <reduc_1, reduc_3>
1726 and convert to
1728 reduc_2 = PHI <0, reduc_3>
1729 tmp1 = (unsigned type)reduce_1;
1730 ifcvt = cond_expr ? rhs2 : 0
1731 tmp2 = tmp1 +/- ifcvt;
1732 reduce_1 = (signed type)tmp2; */
1734 if (CONVERT_EXPR_CODE_P (reduction_op))
1736 lhs = gimple_assign_rhs1 (stmt);
1737 if (TREE_CODE (lhs) != SSA_NAME
1738 || !has_single_use (lhs))
1739 return false;
1741 *nop_reduc = stmt;
1742 stmt = SSA_NAME_DEF_STMT (lhs);
1743 if (gimple_bb (stmt) != gimple_bb (*nop_reduc)
1744 || !is_gimple_assign (stmt))
1745 return false;
1747 *has_nop = true;
1748 reduction_op = gimple_assign_rhs_code (stmt);
1751 if (reduction_op != PLUS_EXPR
1752 && reduction_op != MINUS_EXPR
1753 && reduction_op != MULT_EXPR
1754 && reduction_op != BIT_IOR_EXPR
1755 && reduction_op != BIT_XOR_EXPR
1756 && reduction_op != BIT_AND_EXPR)
1757 return false;
1758 r_op1 = gimple_assign_rhs1 (stmt);
1759 r_op2 = gimple_assign_rhs2 (stmt);
1761 r_nop1 = strip_nop_cond_scalar_reduction (*has_nop, r_op1);
1762 r_nop2 = strip_nop_cond_scalar_reduction (*has_nop, r_op2);
1764 /* Make R_OP1 to hold reduction variable. */
1765 if (r_nop2 == PHI_RESULT (header_phi)
1766 && commutative_tree_code (reduction_op))
1768 std::swap (r_op1, r_op2);
1769 std::swap (r_nop1, r_nop2);
1771 else if (r_nop1 != PHI_RESULT (header_phi))
1772 return false;
1774 if (*has_nop)
1776 /* Check that R_NOP1 is used in nop_stmt or in PHI only. */
1777 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_nop1)
1779 gimple *use_stmt = USE_STMT (use_p);
1780 if (is_gimple_debug (use_stmt))
1781 continue;
1782 if (use_stmt == SSA_NAME_DEF_STMT (r_op1))
1783 continue;
1784 if (use_stmt != phi)
1785 return false;
1789 /* Check that R_OP1 is used in reduction stmt or in PHI only. */
1790 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_op1)
1792 gimple *use_stmt = USE_STMT (use_p);
1793 if (is_gimple_debug (use_stmt))
1794 continue;
1795 if (use_stmt == stmt)
1796 continue;
1797 if (gimple_code (use_stmt) != GIMPLE_PHI)
1798 return false;
1801 *op0 = r_op1; *op1 = r_op2;
1802 *reduc = stmt;
1803 return true;
1806 /* Converts conditional scalar reduction into unconditional form, e.g.
1807 bb_4
1808 if (_5 != 0) goto bb_5 else goto bb_6
1809 end_bb_4
1810 bb_5
1811 res_6 = res_13 + 1;
1812 end_bb_5
1813 bb_6
1814 # res_2 = PHI <res_13(4), res_6(5)>
1815 end_bb_6
1817 will be converted into sequence
1818 _ifc__1 = _5 != 0 ? 1 : 0;
1819 res_2 = res_13 + _ifc__1;
1820 Argument SWAP tells that arguments of conditional expression should be
1821 swapped.
1822 Returns rhs of resulting PHI assignment. */
1824 static tree
1825 convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi,
1826 tree cond, tree op0, tree op1, bool swap,
1827 bool has_nop, gimple* nop_reduc)
1829 gimple_stmt_iterator stmt_it;
1830 gimple *new_assign;
1831 tree rhs;
1832 tree rhs1 = gimple_assign_rhs1 (reduc);
1833 tree tmp = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, "_ifc_");
1834 tree c;
1835 enum tree_code reduction_op = gimple_assign_rhs_code (reduc);
1836 tree op_nochange = neutral_op_for_reduction (TREE_TYPE (rhs1), reduction_op, NULL);
1837 gimple_seq stmts = NULL;
1839 if (dump_file && (dump_flags & TDF_DETAILS))
1841 fprintf (dump_file, "Found cond scalar reduction.\n");
1842 print_gimple_stmt (dump_file, reduc, 0, TDF_SLIM);
1845 /* Build cond expression using COND and constant operand
1846 of reduction rhs. */
1847 c = fold_build_cond_expr (TREE_TYPE (rhs1),
1848 unshare_expr (cond),
1849 swap ? op_nochange : op1,
1850 swap ? op1 : op_nochange);
1852 /* Create assignment stmt and insert it at GSI. */
1853 new_assign = gimple_build_assign (tmp, c);
1854 gsi_insert_before (gsi, new_assign, GSI_SAME_STMT);
1855 /* Build rhs for unconditional increment/decrement/logic_operation. */
1856 rhs = gimple_build (&stmts, reduction_op,
1857 TREE_TYPE (rhs1), op0, tmp);
1859 if (has_nop)
1861 rhs = gimple_convert (&stmts,
1862 TREE_TYPE (gimple_assign_lhs (nop_reduc)), rhs);
1863 stmt_it = gsi_for_stmt (nop_reduc);
1864 gsi_remove (&stmt_it, true);
1865 release_defs (nop_reduc);
1867 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
1869 /* Delete original reduction stmt. */
1870 stmt_it = gsi_for_stmt (reduc);
1871 gsi_remove (&stmt_it, true);
1872 release_defs (reduc);
1873 return rhs;
1876 /* Produce condition for all occurrences of ARG in PHI node. Set *INVERT
1877 as to whether the condition is inverted. */
1879 static tree
1880 gen_phi_arg_condition (gphi *phi, vec<int> *occur,
1881 gimple_stmt_iterator *gsi, bool *invert)
1883 int len;
1884 int i;
1885 tree cond = NULL_TREE;
1886 tree c;
1887 edge e;
1889 *invert = false;
1890 len = occur->length ();
1891 gcc_assert (len > 0);
1892 for (i = 0; i < len; i++)
1894 e = gimple_phi_arg_edge (phi, (*occur)[i]);
1895 c = bb_predicate (e->src);
1896 if (is_true_predicate (c))
1898 cond = c;
1899 break;
1901 /* If we have just a single inverted predicate, signal that and
1902 instead invert the COND_EXPR arms. */
1903 if (len == 1 && TREE_CODE (c) == TRUTH_NOT_EXPR)
1905 c = TREE_OPERAND (c, 0);
1906 *invert = true;
1908 c = force_gimple_operand_gsi (gsi, unshare_expr (c),
1909 true, NULL_TREE, true, GSI_SAME_STMT);
1910 if (cond != NULL_TREE)
1912 /* Must build OR expression. */
1913 cond = fold_or_predicates (EXPR_LOCATION (c), c, cond);
1914 cond = force_gimple_operand_gsi (gsi, unshare_expr (cond), true,
1915 NULL_TREE, true, GSI_SAME_STMT);
1917 else
1918 cond = c;
1920 gcc_assert (cond != NULL_TREE);
1921 return cond;
1924 /* Replace a scalar PHI node with a COND_EXPR using COND as condition.
1925 This routine can handle PHI nodes with more than two arguments.
1927 For example,
1928 S1: A = PHI <x1(1), x2(5)>
1929 is converted into,
1930 S2: A = cond ? x1 : x2;
1932 The generated code is inserted at GSI that points to the top of
1933 basic block's statement list.
1934 If PHI node has more than two arguments a chain of conditional
1935 expression is produced. */
1938 static void
1939 predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
1941 gimple *new_stmt = NULL, *reduc, *nop_reduc;
1942 tree rhs, res, arg0, arg1, op0, op1, scev;
1943 tree cond;
1944 unsigned int index0;
1945 unsigned int max, args_len;
1946 edge e;
1947 basic_block bb;
1948 unsigned int i;
1949 bool has_nop;
1951 res = gimple_phi_result (phi);
1952 if (virtual_operand_p (res))
1953 return;
1955 if ((rhs = degenerate_phi_result (phi))
1956 || ((scev = analyze_scalar_evolution (gimple_bb (phi)->loop_father,
1957 res))
1958 && !chrec_contains_undetermined (scev)
1959 && scev != res
1960 && (rhs = gimple_phi_arg_def (phi, 0))))
1962 if (dump_file && (dump_flags & TDF_DETAILS))
1964 fprintf (dump_file, "Degenerate phi!\n");
1965 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
1967 new_stmt = gimple_build_assign (res, rhs);
1968 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1969 update_stmt (new_stmt);
1970 return;
1973 bb = gimple_bb (phi);
1974 if (EDGE_COUNT (bb->preds) == 2)
1976 /* Predicate ordinary PHI node with 2 arguments. */
1977 edge first_edge, second_edge;
1978 basic_block true_bb;
1979 first_edge = EDGE_PRED (bb, 0);
1980 second_edge = EDGE_PRED (bb, 1);
1981 cond = bb_predicate (first_edge->src);
1982 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1983 std::swap (first_edge, second_edge);
1984 if (EDGE_COUNT (first_edge->src->succs) > 1)
1986 cond = bb_predicate (second_edge->src);
1987 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1988 cond = TREE_OPERAND (cond, 0);
1989 else
1990 first_edge = second_edge;
1992 else
1993 cond = bb_predicate (first_edge->src);
1994 /* Gimplify the condition to a valid cond-expr conditonal operand. */
1995 cond = force_gimple_operand_gsi (gsi, unshare_expr (cond), true,
1996 NULL_TREE, true, GSI_SAME_STMT);
1997 true_bb = first_edge->src;
1998 if (EDGE_PRED (bb, 1)->src == true_bb)
2000 arg0 = gimple_phi_arg_def (phi, 1);
2001 arg1 = gimple_phi_arg_def (phi, 0);
2003 else
2005 arg0 = gimple_phi_arg_def (phi, 0);
2006 arg1 = gimple_phi_arg_def (phi, 1);
2008 if (is_cond_scalar_reduction (phi, &reduc, arg0, arg1,
2009 &op0, &op1, false, &has_nop,
2010 &nop_reduc))
2012 /* Convert reduction stmt into vectorizable form. */
2013 rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
2014 true_bb != gimple_bb (reduc),
2015 has_nop, nop_reduc);
2016 redundant_ssa_names.safe_push (std::make_pair (res, rhs));
2018 else
2019 /* Build new RHS using selected condition and arguments. */
2020 rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
2021 arg0, arg1);
2022 new_stmt = gimple_build_assign (res, rhs);
2023 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2024 gimple_stmt_iterator new_gsi = gsi_for_stmt (new_stmt);
2025 if (fold_stmt (&new_gsi, follow_all_ssa_edges))
2027 new_stmt = gsi_stmt (new_gsi);
2028 update_stmt (new_stmt);
2031 if (dump_file && (dump_flags & TDF_DETAILS))
2033 fprintf (dump_file, "new phi replacement stmt\n");
2034 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
2036 return;
2039 /* Create hashmap for PHI node which contain vector of argument indexes
2040 having the same value. */
2041 bool swap = false;
2042 hash_map<tree_operand_hash, auto_vec<int> > phi_arg_map;
2043 unsigned int num_args = gimple_phi_num_args (phi);
2044 int max_ind = -1;
2045 /* Vector of different PHI argument values. */
2046 auto_vec<tree> args (num_args);
2048 /* Compute phi_arg_map. */
2049 for (i = 0; i < num_args; i++)
2051 tree arg;
2053 arg = gimple_phi_arg_def (phi, i);
2054 if (!phi_arg_map.get (arg))
2055 args.quick_push (arg);
2056 phi_arg_map.get_or_insert (arg).safe_push (i);
2059 /* Determine element with max number of occurrences. */
2060 max_ind = -1;
2061 max = 1;
2062 args_len = args.length ();
2063 for (i = 0; i < args_len; i++)
2065 unsigned int len;
2066 if ((len = phi_arg_map.get (args[i])->length ()) > max)
2068 max_ind = (int) i;
2069 max = len;
2073 /* Put element with max number of occurences to the end of ARGS. */
2074 if (max_ind != -1 && max_ind + 1 != (int) args_len)
2075 std::swap (args[args_len - 1], args[max_ind]);
2077 /* Handle one special case when number of arguments with different values
2078 is equal 2 and one argument has the only occurrence. Such PHI can be
2079 handled as if would have only 2 arguments. */
2080 if (args_len == 2 && phi_arg_map.get (args[0])->length () == 1)
2082 vec<int> *indexes;
2083 indexes = phi_arg_map.get (args[0]);
2084 index0 = (*indexes)[0];
2085 arg0 = args[0];
2086 arg1 = args[1];
2087 e = gimple_phi_arg_edge (phi, index0);
2088 cond = bb_predicate (e->src);
2089 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2091 swap = true;
2092 cond = TREE_OPERAND (cond, 0);
2094 /* Gimplify the condition to a valid cond-expr conditonal operand. */
2095 cond = force_gimple_operand_gsi (gsi, unshare_expr (cond), true,
2096 NULL_TREE, true, GSI_SAME_STMT);
2097 if (!(is_cond_scalar_reduction (phi, &reduc, arg0 , arg1,
2098 &op0, &op1, true, &has_nop, &nop_reduc)))
2099 rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
2100 swap? arg1 : arg0,
2101 swap? arg0 : arg1);
2102 else
2104 /* Convert reduction stmt into vectorizable form. */
2105 rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
2106 swap,has_nop, nop_reduc);
2107 redundant_ssa_names.safe_push (std::make_pair (res, rhs));
2109 new_stmt = gimple_build_assign (res, rhs);
2110 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2111 update_stmt (new_stmt);
2113 else
2115 /* Common case. */
2116 vec<int> *indexes;
2117 tree type = TREE_TYPE (gimple_phi_result (phi));
2118 tree lhs;
2119 arg1 = args[args_len - 1];
2120 for (i = args_len - 1; i > 0; i--)
2122 arg0 = args[i - 1];
2123 indexes = phi_arg_map.get (args[i - 1]);
2124 if (i != 1)
2125 lhs = make_temp_ssa_name (type, NULL, "_ifc_");
2126 else
2127 lhs = res;
2128 bool invert;
2129 cond = gen_phi_arg_condition (phi, indexes, gsi, &invert);
2130 if (invert)
2131 rhs = fold_build_cond_expr (type, unshare_expr (cond),
2132 arg1, arg0);
2133 else
2134 rhs = fold_build_cond_expr (type, unshare_expr (cond),
2135 arg0, arg1);
2136 new_stmt = gimple_build_assign (lhs, rhs);
2137 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2138 update_stmt (new_stmt);
2139 arg1 = lhs;
2143 if (dump_file && (dump_flags & TDF_DETAILS))
2145 fprintf (dump_file, "new extended phi replacement stmt\n");
2146 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
2150 /* Replaces in LOOP all the scalar phi nodes other than those in the
2151 LOOP->header block with conditional modify expressions. */
2153 static void
2154 predicate_all_scalar_phis (class loop *loop)
2156 basic_block bb;
2157 unsigned int orig_loop_num_nodes = loop->num_nodes;
2158 unsigned int i;
2160 for (i = 1; i < orig_loop_num_nodes; i++)
2162 gphi *phi;
2163 gimple_stmt_iterator gsi;
2164 gphi_iterator phi_gsi;
2165 bb = ifc_bbs[i];
2167 if (bb == loop->header)
2168 continue;
2170 phi_gsi = gsi_start_phis (bb);
2171 if (gsi_end_p (phi_gsi))
2172 continue;
2174 gsi = gsi_after_labels (bb);
2175 while (!gsi_end_p (phi_gsi))
2177 phi = phi_gsi.phi ();
2178 if (virtual_operand_p (gimple_phi_result (phi)))
2179 gsi_next (&phi_gsi);
2180 else
2182 predicate_scalar_phi (phi, &gsi);
2183 remove_phi_node (&phi_gsi, false);
2189 /* Insert in each basic block of LOOP the statements produced by the
2190 gimplification of the predicates. */
2192 static void
2193 insert_gimplified_predicates (loop_p loop)
2195 unsigned int i;
2197 for (i = 0; i < loop->num_nodes; i++)
2199 basic_block bb = ifc_bbs[i];
2200 gimple_seq stmts;
2201 if (!is_predicated (bb))
2202 gcc_assert (bb_predicate_gimplified_stmts (bb) == NULL);
2203 if (!is_predicated (bb))
2205 /* Do not insert statements for a basic block that is not
2206 predicated. Also make sure that the predicate of the
2207 basic block is set to true. */
2208 reset_bb_predicate (bb);
2209 continue;
2212 stmts = bb_predicate_gimplified_stmts (bb);
2213 if (stmts)
2215 if (need_to_predicate)
2217 /* Insert the predicate of the BB just after the label,
2218 as the if-conversion of memory writes will use this
2219 predicate. */
2220 gimple_stmt_iterator gsi = gsi_after_labels (bb);
2221 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2223 else
2225 /* Insert the predicate of the BB at the end of the BB
2226 as this would reduce the register pressure: the only
2227 use of this predicate will be in successor BBs. */
2228 gimple_stmt_iterator gsi = gsi_last_bb (bb);
2230 if (gsi_end_p (gsi)
2231 || stmt_ends_bb_p (gsi_stmt (gsi)))
2232 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2233 else
2234 gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
2237 /* Once the sequence is code generated, set it to NULL. */
2238 set_bb_predicate_gimplified_stmts (bb, NULL);
2243 /* Helper function for predicate_statements. Returns index of existent
2244 mask if it was created for given SIZE and -1 otherwise. */
2246 static int
2247 mask_exists (int size, const vec<int> &vec)
2249 unsigned int ix;
2250 int v;
2251 FOR_EACH_VEC_ELT (vec, ix, v)
2252 if (v == size)
2253 return (int) ix;
2254 return -1;
2257 /* Helper function for predicate_statements. STMT is a memory read or
2258 write and it needs to be predicated by MASK. Return a statement
2259 that does so. */
2261 static gimple *
2262 predicate_load_or_store (gimple_stmt_iterator *gsi, gassign *stmt, tree mask)
2264 gcall *new_stmt;
2266 tree lhs = gimple_assign_lhs (stmt);
2267 tree rhs = gimple_assign_rhs1 (stmt);
2268 tree ref = TREE_CODE (lhs) == SSA_NAME ? rhs : lhs;
2269 mark_addressable (ref);
2270 tree addr = force_gimple_operand_gsi (gsi, build_fold_addr_expr (ref),
2271 true, NULL_TREE, true, GSI_SAME_STMT);
2272 tree ptr = build_int_cst (reference_alias_ptr_type (ref),
2273 get_object_alignment (ref));
2274 /* Copy points-to info if possible. */
2275 if (TREE_CODE (addr) == SSA_NAME && !SSA_NAME_PTR_INFO (addr))
2276 copy_ref_info (build2 (MEM_REF, TREE_TYPE (ref), addr, ptr),
2277 ref);
2278 if (TREE_CODE (lhs) == SSA_NAME)
2280 new_stmt
2281 = gimple_build_call_internal (IFN_MASK_LOAD, 3, addr,
2282 ptr, mask);
2283 gimple_call_set_lhs (new_stmt, lhs);
2284 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
2286 else
2288 new_stmt
2289 = gimple_build_call_internal (IFN_MASK_STORE, 4, addr, ptr,
2290 mask, rhs);
2291 gimple_move_vops (new_stmt, stmt);
2293 gimple_call_set_nothrow (new_stmt, true);
2294 return new_stmt;
2297 /* STMT uses OP_LHS. Check whether it is equivalent to:
2299 ... = OP_MASK ? OP_LHS : X;
2301 Return X if so, otherwise return null. OP_MASK is an SSA_NAME that is
2302 known to have value OP_COND. */
2304 static tree
2305 check_redundant_cond_expr (gimple *stmt, tree op_mask, tree op_cond,
2306 tree op_lhs)
2308 gassign *assign = dyn_cast <gassign *> (stmt);
2309 if (!assign || gimple_assign_rhs_code (assign) != COND_EXPR)
2310 return NULL_TREE;
2312 tree use_cond = gimple_assign_rhs1 (assign);
2313 tree if_true = gimple_assign_rhs2 (assign);
2314 tree if_false = gimple_assign_rhs3 (assign);
2316 if ((use_cond == op_mask || operand_equal_p (use_cond, op_cond, 0))
2317 && if_true == op_lhs)
2318 return if_false;
2320 if (inverse_conditions_p (use_cond, op_cond) && if_false == op_lhs)
2321 return if_true;
2323 return NULL_TREE;
2326 /* Return true if VALUE is available for use at STMT. SSA_NAMES is
2327 the set of SSA names defined earlier in STMT's block. */
2329 static bool
2330 value_available_p (gimple *stmt, hash_set<tree_ssa_name_hash> *ssa_names,
2331 tree value)
2333 if (is_gimple_min_invariant (value))
2334 return true;
2336 if (TREE_CODE (value) == SSA_NAME)
2338 if (SSA_NAME_IS_DEFAULT_DEF (value))
2339 return true;
2341 basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (value));
2342 basic_block use_bb = gimple_bb (stmt);
2343 return (def_bb == use_bb
2344 ? ssa_names->contains (value)
2345 : dominated_by_p (CDI_DOMINATORS, use_bb, def_bb));
2348 return false;
2351 /* Helper function for predicate_statements. STMT is a potentially-trapping
2352 arithmetic operation that needs to be predicated by MASK, an SSA_NAME that
2353 has value COND. Return a statement that does so. SSA_NAMES is the set of
2354 SSA names defined earlier in STMT's block. */
2356 static gimple *
2357 predicate_rhs_code (gassign *stmt, tree mask, tree cond,
2358 hash_set<tree_ssa_name_hash> *ssa_names)
2360 tree lhs = gimple_assign_lhs (stmt);
2361 tree_code code = gimple_assign_rhs_code (stmt);
2362 unsigned int nops = gimple_num_ops (stmt);
2363 internal_fn cond_fn = get_conditional_internal_fn (code);
2365 /* Construct the arguments to the conditional internal function. */
2366 auto_vec<tree, 8> args;
2367 args.safe_grow (nops + 1, true);
2368 args[0] = mask;
2369 for (unsigned int i = 1; i < nops; ++i)
2370 args[i] = gimple_op (stmt, i);
2371 args[nops] = NULL_TREE;
2373 /* Look for uses of the result to see whether they are COND_EXPRs that can
2374 be folded into the conditional call. */
2375 imm_use_iterator imm_iter;
2376 gimple *use_stmt;
2377 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs)
2379 tree new_else = check_redundant_cond_expr (use_stmt, mask, cond, lhs);
2380 if (new_else && value_available_p (stmt, ssa_names, new_else))
2382 if (!args[nops])
2383 args[nops] = new_else;
2384 if (operand_equal_p (new_else, args[nops], 0))
2386 /* We have:
2388 LHS = IFN_COND (MASK, ..., ELSE);
2389 X = MASK ? LHS : ELSE;
2391 which makes X equivalent to LHS. */
2392 tree use_lhs = gimple_assign_lhs (use_stmt);
2393 redundant_ssa_names.safe_push (std::make_pair (use_lhs, lhs));
2397 if (!args[nops])
2398 args[nops] = targetm.preferred_else_value (cond_fn, TREE_TYPE (lhs),
2399 nops - 1, &args[1]);
2401 /* Create and insert the call. */
2402 gcall *new_stmt = gimple_build_call_internal_vec (cond_fn, args);
2403 gimple_call_set_lhs (new_stmt, lhs);
2404 gimple_call_set_nothrow (new_stmt, true);
2406 return new_stmt;
2409 /* Predicate each write to memory in LOOP.
2411 This function transforms control flow constructs containing memory
2412 writes of the form:
2414 | for (i = 0; i < N; i++)
2415 | if (cond)
2416 | A[i] = expr;
2418 into the following form that does not contain control flow:
2420 | for (i = 0; i < N; i++)
2421 | A[i] = cond ? expr : A[i];
2423 The original CFG looks like this:
2425 | bb_0
2426 | i = 0
2427 | end_bb_0
2429 | bb_1
2430 | if (i < N) goto bb_5 else goto bb_2
2431 | end_bb_1
2433 | bb_2
2434 | cond = some_computation;
2435 | if (cond) goto bb_3 else goto bb_4
2436 | end_bb_2
2438 | bb_3
2439 | A[i] = expr;
2440 | goto bb_4
2441 | end_bb_3
2443 | bb_4
2444 | goto bb_1
2445 | end_bb_4
2447 insert_gimplified_predicates inserts the computation of the COND
2448 expression at the beginning of the destination basic block:
2450 | bb_0
2451 | i = 0
2452 | end_bb_0
2454 | bb_1
2455 | if (i < N) goto bb_5 else goto bb_2
2456 | end_bb_1
2458 | bb_2
2459 | cond = some_computation;
2460 | if (cond) goto bb_3 else goto bb_4
2461 | end_bb_2
2463 | bb_3
2464 | cond = some_computation;
2465 | A[i] = expr;
2466 | goto bb_4
2467 | end_bb_3
2469 | bb_4
2470 | goto bb_1
2471 | end_bb_4
2473 predicate_statements is then predicating the memory write as follows:
2475 | bb_0
2476 | i = 0
2477 | end_bb_0
2479 | bb_1
2480 | if (i < N) goto bb_5 else goto bb_2
2481 | end_bb_1
2483 | bb_2
2484 | if (cond) goto bb_3 else goto bb_4
2485 | end_bb_2
2487 | bb_3
2488 | cond = some_computation;
2489 | A[i] = cond ? expr : A[i];
2490 | goto bb_4
2491 | end_bb_3
2493 | bb_4
2494 | goto bb_1
2495 | end_bb_4
2497 and finally combine_blocks removes the basic block boundaries making
2498 the loop vectorizable:
2500 | bb_0
2501 | i = 0
2502 | if (i < N) goto bb_5 else goto bb_1
2503 | end_bb_0
2505 | bb_1
2506 | cond = some_computation;
2507 | A[i] = cond ? expr : A[i];
2508 | if (i < N) goto bb_5 else goto bb_4
2509 | end_bb_1
2511 | bb_4
2512 | goto bb_1
2513 | end_bb_4
2516 static void
2517 predicate_statements (loop_p loop)
2519 unsigned int i, orig_loop_num_nodes = loop->num_nodes;
2520 auto_vec<int, 1> vect_sizes;
2521 auto_vec<tree, 1> vect_masks;
2522 hash_set<tree_ssa_name_hash> ssa_names;
2524 for (i = 1; i < orig_loop_num_nodes; i++)
2526 gimple_stmt_iterator gsi;
2527 basic_block bb = ifc_bbs[i];
2528 tree cond = bb_predicate (bb);
2529 bool swap;
2530 int index;
2532 if (is_true_predicate (cond))
2533 continue;
2535 swap = false;
2536 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2538 swap = true;
2539 cond = TREE_OPERAND (cond, 0);
2542 vect_sizes.truncate (0);
2543 vect_masks.truncate (0);
2545 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
2547 gassign *stmt = dyn_cast <gassign *> (gsi_stmt (gsi));
2548 tree lhs;
2549 if (!stmt)
2551 else if (is_false_predicate (cond)
2552 && gimple_vdef (stmt))
2554 unlink_stmt_vdef (stmt);
2555 gsi_remove (&gsi, true);
2556 release_defs (stmt);
2557 continue;
2559 else if (gimple_plf (stmt, GF_PLF_2)
2560 && is_gimple_assign (stmt))
2562 tree lhs = gimple_assign_lhs (stmt);
2563 tree mask;
2564 gimple *new_stmt;
2565 gimple_seq stmts = NULL;
2566 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
2567 /* We checked before setting GF_PLF_2 that an equivalent
2568 integer mode exists. */
2569 int bitsize = GET_MODE_BITSIZE (mode).to_constant ();
2570 if (!vect_sizes.is_empty ()
2571 && (index = mask_exists (bitsize, vect_sizes)) != -1)
2572 /* Use created mask. */
2573 mask = vect_masks[index];
2574 else
2576 if (COMPARISON_CLASS_P (cond))
2577 mask = gimple_build (&stmts, TREE_CODE (cond),
2578 boolean_type_node,
2579 TREE_OPERAND (cond, 0),
2580 TREE_OPERAND (cond, 1));
2581 else
2582 mask = cond;
2584 if (swap)
2586 tree true_val
2587 = constant_boolean_node (true, TREE_TYPE (mask));
2588 mask = gimple_build (&stmts, BIT_XOR_EXPR,
2589 TREE_TYPE (mask), mask, true_val);
2591 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2593 /* Save mask and its size for further use. */
2594 vect_sizes.safe_push (bitsize);
2595 vect_masks.safe_push (mask);
2597 if (gimple_assign_single_p (stmt))
2598 new_stmt = predicate_load_or_store (&gsi, stmt, mask);
2599 else
2600 new_stmt = predicate_rhs_code (stmt, mask, cond, &ssa_names);
2602 gsi_replace (&gsi, new_stmt, true);
2604 else if (((lhs = gimple_assign_lhs (stmt)), true)
2605 && (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2606 || POINTER_TYPE_P (TREE_TYPE (lhs)))
2607 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs))
2608 && arith_code_with_undefined_signed_overflow
2609 (gimple_assign_rhs_code (stmt)))
2611 gsi_remove (&gsi, true);
2612 gimple_seq stmts = rewrite_to_defined_overflow (stmt);
2613 bool first = true;
2614 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
2615 !gsi_end_p (gsi2);)
2617 gassign *stmt2 = as_a <gassign *> (gsi_stmt (gsi2));
2618 gsi_remove (&gsi2, false);
2619 if (first)
2621 gsi_insert_before (&gsi, stmt2, GSI_NEW_STMT);
2622 first = false;
2624 else
2625 gsi_insert_after (&gsi, stmt2, GSI_NEW_STMT);
2628 else if (gimple_vdef (stmt))
2630 tree lhs = gimple_assign_lhs (stmt);
2631 tree rhs = gimple_assign_rhs1 (stmt);
2632 tree type = TREE_TYPE (lhs);
2634 lhs = ifc_temp_var (type, unshare_expr (lhs), &gsi);
2635 rhs = ifc_temp_var (type, unshare_expr (rhs), &gsi);
2636 if (swap)
2637 std::swap (lhs, rhs);
2638 cond = force_gimple_operand_gsi (&gsi, unshare_expr (cond), true,
2639 NULL_TREE, true, GSI_SAME_STMT);
2640 rhs = fold_build_cond_expr (type, unshare_expr (cond), rhs, lhs);
2641 gimple_assign_set_rhs1 (stmt, ifc_temp_var (type, rhs, &gsi));
2642 update_stmt (stmt);
2645 if (gimple_plf (gsi_stmt (gsi), GF_PLF_2)
2646 && is_gimple_call (gsi_stmt (gsi)))
2648 /* Convert functions that have a SIMD clone to IFN_MASK_CALL.
2649 This will cause the vectorizer to match the "in branch"
2650 clone variants, and serves to build the mask vector
2651 in a natural way. */
2652 gcall *call = dyn_cast <gcall *> (gsi_stmt (gsi));
2653 tree orig_fn = gimple_call_fn (call);
2654 int orig_nargs = gimple_call_num_args (call);
2655 auto_vec<tree> args;
2656 args.safe_push (orig_fn);
2657 for (int i = 0; i < orig_nargs; i++)
2658 args.safe_push (gimple_call_arg (call, i));
2659 args.safe_push (cond);
2661 /* Replace the call with a IFN_MASK_CALL that has the extra
2662 condition parameter. */
2663 gcall *new_call = gimple_build_call_internal_vec (IFN_MASK_CALL,
2664 args);
2665 gimple_call_set_lhs (new_call, gimple_call_lhs (call));
2666 gsi_replace (&gsi, new_call, true);
2669 lhs = gimple_get_lhs (gsi_stmt (gsi));
2670 if (lhs && TREE_CODE (lhs) == SSA_NAME)
2671 ssa_names.add (lhs);
2672 gsi_next (&gsi);
2674 ssa_names.empty ();
2678 /* Remove all GIMPLE_CONDs and GIMPLE_LABELs of all the basic blocks
2679 other than the exit and latch of the LOOP. Also resets the
2680 GIMPLE_DEBUG information. */
2682 static void
2683 remove_conditions_and_labels (loop_p loop)
2685 gimple_stmt_iterator gsi;
2686 unsigned int i;
2688 for (i = 0; i < loop->num_nodes; i++)
2690 basic_block bb = ifc_bbs[i];
2692 if (bb_with_exit_edge_p (loop, bb)
2693 || bb == loop->latch)
2694 continue;
2696 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
2697 switch (gimple_code (gsi_stmt (gsi)))
2699 case GIMPLE_COND:
2700 case GIMPLE_LABEL:
2701 gsi_remove (&gsi, true);
2702 break;
2704 case GIMPLE_DEBUG:
2705 /* ??? Should there be conditional GIMPLE_DEBUG_BINDs? */
2706 if (gimple_debug_bind_p (gsi_stmt (gsi)))
2708 gimple_debug_bind_reset_value (gsi_stmt (gsi));
2709 update_stmt (gsi_stmt (gsi));
2711 gsi_next (&gsi);
2712 break;
2714 default:
2715 gsi_next (&gsi);
2720 /* Combine all the basic blocks from LOOP into one or two super basic
2721 blocks. Replace PHI nodes with conditional modify expressions. */
2723 static void
2724 combine_blocks (class loop *loop)
2726 basic_block bb, exit_bb, merge_target_bb;
2727 unsigned int orig_loop_num_nodes = loop->num_nodes;
2728 unsigned int i;
2729 edge e;
2730 edge_iterator ei;
2732 remove_conditions_and_labels (loop);
2733 insert_gimplified_predicates (loop);
2734 predicate_all_scalar_phis (loop);
2736 if (need_to_predicate || need_to_rewrite_undefined)
2737 predicate_statements (loop);
2739 /* Merge basic blocks. */
2740 exit_bb = NULL;
2741 bool *predicated = XNEWVEC (bool, orig_loop_num_nodes);
2742 for (i = 0; i < orig_loop_num_nodes; i++)
2744 bb = ifc_bbs[i];
2745 predicated[i] = !is_true_predicate (bb_predicate (bb));
2746 free_bb_predicate (bb);
2747 if (bb_with_exit_edge_p (loop, bb))
2749 gcc_assert (exit_bb == NULL);
2750 exit_bb = bb;
2753 gcc_assert (exit_bb != loop->latch);
2755 merge_target_bb = loop->header;
2757 /* Get at the virtual def valid for uses starting at the first block
2758 we merge into the header. Without a virtual PHI the loop has the
2759 same virtual use on all stmts. */
2760 gphi *vphi = get_virtual_phi (loop->header);
2761 tree last_vdef = NULL_TREE;
2762 if (vphi)
2764 last_vdef = gimple_phi_result (vphi);
2765 for (gimple_stmt_iterator gsi = gsi_start_bb (loop->header);
2766 ! gsi_end_p (gsi); gsi_next (&gsi))
2767 if (gimple_vdef (gsi_stmt (gsi)))
2768 last_vdef = gimple_vdef (gsi_stmt (gsi));
2770 for (i = 1; i < orig_loop_num_nodes; i++)
2772 gimple_stmt_iterator gsi;
2773 gimple_stmt_iterator last;
2775 bb = ifc_bbs[i];
2777 if (bb == exit_bb || bb == loop->latch)
2778 continue;
2780 /* We release virtual PHIs late because we have to propagate them
2781 out using the current VUSE. The def might be the one used
2782 after the loop. */
2783 vphi = get_virtual_phi (bb);
2784 if (vphi)
2786 /* When there's just loads inside the loop a stray virtual
2787 PHI merging the uses can appear, update last_vdef from
2788 it. */
2789 if (!last_vdef)
2790 last_vdef = gimple_phi_arg_def (vphi, 0);
2791 imm_use_iterator iter;
2792 use_operand_p use_p;
2793 gimple *use_stmt;
2794 FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi))
2796 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2797 SET_USE (use_p, last_vdef);
2799 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (vphi)))
2800 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (last_vdef) = 1;
2801 gsi = gsi_for_stmt (vphi);
2802 remove_phi_node (&gsi, true);
2805 /* Make stmts member of loop->header and clear range info from all stmts
2806 in BB which is now no longer executed conditional on a predicate we
2807 could have derived it from. */
2808 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2810 gimple *stmt = gsi_stmt (gsi);
2811 gimple_set_bb (stmt, merge_target_bb);
2812 /* Update virtual operands. */
2813 if (last_vdef)
2815 use_operand_p use_p = ssa_vuse_operand (stmt);
2816 if (use_p
2817 && USE_FROM_PTR (use_p) != last_vdef)
2818 SET_USE (use_p, last_vdef);
2819 if (gimple_vdef (stmt))
2820 last_vdef = gimple_vdef (stmt);
2822 else
2823 /* If this is the first load we arrive at update last_vdef
2824 so we handle stray PHIs correctly. */
2825 last_vdef = gimple_vuse (stmt);
2826 if (predicated[i])
2828 ssa_op_iter i;
2829 tree op;
2830 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF)
2831 reset_flow_sensitive_info (op);
2835 /* Update stmt list. */
2836 last = gsi_last_bb (merge_target_bb);
2837 gsi_insert_seq_after_without_update (&last, bb_seq (bb), GSI_NEW_STMT);
2838 set_bb_seq (bb, NULL);
2841 /* Fixup virtual operands in the exit block. */
2842 if (exit_bb
2843 && exit_bb != loop->header)
2845 /* We release virtual PHIs late because we have to propagate them
2846 out using the current VUSE. The def might be the one used
2847 after the loop. */
2848 vphi = get_virtual_phi (exit_bb);
2849 if (vphi)
2851 /* When there's just loads inside the loop a stray virtual
2852 PHI merging the uses can appear, update last_vdef from
2853 it. */
2854 if (!last_vdef)
2855 last_vdef = gimple_phi_arg_def (vphi, 0);
2856 imm_use_iterator iter;
2857 use_operand_p use_p;
2858 gimple *use_stmt;
2859 FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi))
2861 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2862 SET_USE (use_p, last_vdef);
2864 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (vphi)))
2865 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (last_vdef) = 1;
2866 gimple_stmt_iterator gsi = gsi_for_stmt (vphi);
2867 remove_phi_node (&gsi, true);
2871 /* Now remove all the edges in the loop, except for those from the exit
2872 block and delete the blocks we elided. */
2873 for (i = 1; i < orig_loop_num_nodes; i++)
2875 bb = ifc_bbs[i];
2877 for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));)
2879 if (e->src == exit_bb)
2880 ei_next (&ei);
2881 else
2882 remove_edge (e);
2885 for (i = 1; i < orig_loop_num_nodes; i++)
2887 bb = ifc_bbs[i];
2889 if (bb == exit_bb || bb == loop->latch)
2890 continue;
2892 delete_basic_block (bb);
2895 /* Re-connect the exit block. */
2896 if (exit_bb != NULL)
2898 if (exit_bb != loop->header)
2900 /* Connect this node to loop header. */
2901 make_single_succ_edge (loop->header, exit_bb, EDGE_FALLTHRU);
2902 set_immediate_dominator (CDI_DOMINATORS, exit_bb, loop->header);
2905 /* Redirect non-exit edges to loop->latch. */
2906 FOR_EACH_EDGE (e, ei, exit_bb->succs)
2908 if (!loop_exit_edge_p (loop, e))
2909 redirect_edge_and_branch (e, loop->latch);
2911 set_immediate_dominator (CDI_DOMINATORS, loop->latch, exit_bb);
2913 else
2915 /* If the loop does not have an exit, reconnect header and latch. */
2916 make_edge (loop->header, loop->latch, EDGE_FALLTHRU);
2917 set_immediate_dominator (CDI_DOMINATORS, loop->latch, loop->header);
2920 /* If possible, merge loop header to the block with the exit edge.
2921 This reduces the number of basic blocks to two, to please the
2922 vectorizer that handles only loops with two nodes. */
2923 if (exit_bb
2924 && exit_bb != loop->header)
2926 if (can_merge_blocks_p (loop->header, exit_bb))
2927 merge_blocks (loop->header, exit_bb);
2930 free (ifc_bbs);
2931 ifc_bbs = NULL;
2932 free (predicated);
2935 /* Version LOOP before if-converting it; the original loop
2936 will be if-converted, the new copy of the loop will not,
2937 and the LOOP_VECTORIZED internal call will be guarding which
2938 loop to execute. The vectorizer pass will fold this
2939 internal call into either true or false.
2941 Note that this function intentionally invalidates profile. Both edges
2942 out of LOOP_VECTORIZED must have 100% probability so the profile remains
2943 consistent after the condition is folded in the vectorizer. */
2945 static class loop *
2946 version_loop_for_if_conversion (class loop *loop, vec<gimple *> *preds)
2948 basic_block cond_bb;
2949 tree cond = make_ssa_name (boolean_type_node);
2950 class loop *new_loop;
2951 gimple *g;
2952 gimple_stmt_iterator gsi;
2953 unsigned int save_length = 0;
2955 g = gimple_build_call_internal (IFN_LOOP_VECTORIZED, 2,
2956 build_int_cst (integer_type_node, loop->num),
2957 integer_zero_node);
2958 gimple_call_set_lhs (g, cond);
2960 void **saved_preds = NULL;
2961 if (any_complicated_phi || need_to_predicate)
2963 /* Save BB->aux around loop_version as that uses the same field. */
2964 save_length = loop->inner ? loop->inner->num_nodes : loop->num_nodes;
2965 saved_preds = XALLOCAVEC (void *, save_length);
2966 for (unsigned i = 0; i < save_length; i++)
2967 saved_preds[i] = ifc_bbs[i]->aux;
2970 initialize_original_copy_tables ();
2971 /* At this point we invalidate porfile confistency until IFN_LOOP_VECTORIZED
2972 is re-merged in the vectorizer. */
2973 new_loop = loop_version (loop, cond, &cond_bb,
2974 profile_probability::always (),
2975 profile_probability::always (),
2976 profile_probability::always (),
2977 profile_probability::always (), true);
2978 free_original_copy_tables ();
2980 if (any_complicated_phi || need_to_predicate)
2981 for (unsigned i = 0; i < save_length; i++)
2982 ifc_bbs[i]->aux = saved_preds[i];
2984 if (new_loop == NULL)
2985 return NULL;
2987 new_loop->dont_vectorize = true;
2988 new_loop->force_vectorize = false;
2989 gsi = gsi_last_bb (cond_bb);
2990 gimple_call_set_arg (g, 1, build_int_cst (integer_type_node, new_loop->num));
2991 if (preds)
2992 preds->safe_push (g);
2993 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2994 update_ssa (TODO_update_ssa_no_phi);
2995 return new_loop;
2998 /* Return true when LOOP satisfies the follow conditions that will
2999 allow it to be recognized by the vectorizer for outer-loop
3000 vectorization:
3001 - The loop is not the root node of the loop tree.
3002 - The loop has exactly one inner loop.
3003 - The loop has a single exit.
3004 - The loop header has a single successor, which is the inner
3005 loop header.
3006 - Each of the inner and outer loop latches have a single
3007 predecessor.
3008 - The loop exit block has a single predecessor, which is the
3009 inner loop's exit block. */
3011 static bool
3012 versionable_outer_loop_p (class loop *loop)
3014 if (!loop_outer (loop)
3015 || loop->dont_vectorize
3016 || !loop->inner
3017 || loop->inner->next
3018 || !single_exit (loop)
3019 || !single_succ_p (loop->header)
3020 || single_succ (loop->header) != loop->inner->header
3021 || !single_pred_p (loop->latch)
3022 || !single_pred_p (loop->inner->latch))
3023 return false;
3025 basic_block outer_exit = single_pred (loop->latch);
3026 basic_block inner_exit = single_pred (loop->inner->latch);
3028 if (!single_pred_p (outer_exit) || single_pred (outer_exit) != inner_exit)
3029 return false;
3031 if (dump_file)
3032 fprintf (dump_file, "Found vectorizable outer loop for versioning\n");
3034 return true;
3037 /* Performs splitting of critical edges. Skip splitting and return false
3038 if LOOP will not be converted because:
3040 - LOOP is not well formed.
3041 - LOOP has PHI with more than MAX_PHI_ARG_NUM arguments.
3043 Last restriction is valid only if AGGRESSIVE_IF_CONV is false. */
3045 static bool
3046 ifcvt_split_critical_edges (class loop *loop, bool aggressive_if_conv)
3048 basic_block *body;
3049 basic_block bb;
3050 unsigned int num = loop->num_nodes;
3051 unsigned int i;
3052 gimple *stmt;
3053 edge e;
3054 edge_iterator ei;
3055 auto_vec<edge> critical_edges;
3057 /* Loop is not well formed. */
3058 if (loop->inner)
3059 return false;
3061 body = get_loop_body (loop);
3062 for (i = 0; i < num; i++)
3064 bb = body[i];
3065 if (!aggressive_if_conv
3066 && phi_nodes (bb)
3067 && EDGE_COUNT (bb->preds) > MAX_PHI_ARG_NUM)
3069 if (dump_file && (dump_flags & TDF_DETAILS))
3070 fprintf (dump_file,
3071 "BB %d has complicated PHI with more than %u args.\n",
3072 bb->index, MAX_PHI_ARG_NUM);
3074 free (body);
3075 return false;
3077 if (bb == loop->latch || bb_with_exit_edge_p (loop, bb))
3078 continue;
3080 stmt = last_stmt (bb);
3081 /* Skip basic blocks not ending with conditional branch. */
3082 if (!stmt || gimple_code (stmt) != GIMPLE_COND)
3083 continue;
3085 FOR_EACH_EDGE (e, ei, bb->succs)
3086 if (EDGE_CRITICAL_P (e) && e->dest->loop_father == loop)
3087 critical_edges.safe_push (e);
3089 free (body);
3091 while (critical_edges.length () > 0)
3093 e = critical_edges.pop ();
3094 /* Don't split if bb can be predicated along non-critical edge. */
3095 if (EDGE_COUNT (e->dest->preds) > 2 || all_preds_critical_p (e->dest))
3096 split_edge (e);
3099 return true;
3102 /* Delete redundant statements produced by predication which prevents
3103 loop vectorization. */
3105 static void
3106 ifcvt_local_dce (class loop *loop)
3108 gimple *stmt;
3109 gimple *stmt1;
3110 gimple *phi;
3111 gimple_stmt_iterator gsi;
3112 auto_vec<gimple *> worklist;
3113 enum gimple_code code;
3114 use_operand_p use_p;
3115 imm_use_iterator imm_iter;
3117 /* The loop has a single BB only. */
3118 basic_block bb = loop->header;
3119 tree latch_vdef = NULL_TREE;
3121 worklist.create (64);
3122 /* Consider all phi as live statements. */
3123 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3125 phi = gsi_stmt (gsi);
3126 gimple_set_plf (phi, GF_PLF_2, true);
3127 worklist.safe_push (phi);
3128 if (virtual_operand_p (gimple_phi_result (phi)))
3129 latch_vdef = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
3131 /* Consider load/store statements, CALL and COND as live. */
3132 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3134 stmt = gsi_stmt (gsi);
3135 if (is_gimple_debug (stmt))
3137 gimple_set_plf (stmt, GF_PLF_2, true);
3138 continue;
3140 if (gimple_store_p (stmt) || gimple_assign_load_p (stmt))
3142 gimple_set_plf (stmt, GF_PLF_2, true);
3143 worklist.safe_push (stmt);
3144 continue;
3146 code = gimple_code (stmt);
3147 if (code == GIMPLE_COND || code == GIMPLE_CALL)
3149 gimple_set_plf (stmt, GF_PLF_2, true);
3150 worklist.safe_push (stmt);
3151 continue;
3153 gimple_set_plf (stmt, GF_PLF_2, false);
3155 if (code == GIMPLE_ASSIGN)
3157 tree lhs = gimple_assign_lhs (stmt);
3158 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
3160 stmt1 = USE_STMT (use_p);
3161 if (!is_gimple_debug (stmt1) && gimple_bb (stmt1) != bb)
3163 gimple_set_plf (stmt, GF_PLF_2, true);
3164 worklist.safe_push (stmt);
3165 break;
3170 /* Propagate liveness through arguments of live stmt. */
3171 while (worklist.length () > 0)
3173 ssa_op_iter iter;
3174 use_operand_p use_p;
3175 tree use;
3177 stmt = worklist.pop ();
3178 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
3180 use = USE_FROM_PTR (use_p);
3181 if (TREE_CODE (use) != SSA_NAME)
3182 continue;
3183 stmt1 = SSA_NAME_DEF_STMT (use);
3184 if (gimple_bb (stmt1) != bb || gimple_plf (stmt1, GF_PLF_2))
3185 continue;
3186 gimple_set_plf (stmt1, GF_PLF_2, true);
3187 worklist.safe_push (stmt1);
3190 /* Delete dead statements. */
3191 gsi = gsi_last_bb (bb);
3192 while (!gsi_end_p (gsi))
3194 gimple_stmt_iterator gsiprev = gsi;
3195 gsi_prev (&gsiprev);
3196 stmt = gsi_stmt (gsi);
3197 if (gimple_store_p (stmt) && gimple_vdef (stmt))
3199 tree lhs = gimple_get_lhs (stmt);
3200 ao_ref write;
3201 ao_ref_init (&write, lhs);
3203 if (dse_classify_store (&write, stmt, false, NULL, NULL, latch_vdef)
3204 == DSE_STORE_DEAD)
3205 delete_dead_or_redundant_assignment (&gsi, "dead");
3206 gsi = gsiprev;
3207 continue;
3210 if (gimple_plf (stmt, GF_PLF_2))
3212 gsi = gsiprev;
3213 continue;
3215 if (dump_file && (dump_flags & TDF_DETAILS))
3217 fprintf (dump_file, "Delete dead stmt in bb#%d\n", bb->index);
3218 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3220 gsi_remove (&gsi, true);
3221 release_defs (stmt);
3222 gsi = gsiprev;
3226 /* Return true if VALUE is already available on edge PE. */
3228 static bool
3229 ifcvt_available_on_edge_p (edge pe, tree value)
3231 if (is_gimple_min_invariant (value))
3232 return true;
3234 if (TREE_CODE (value) == SSA_NAME)
3236 basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (value));
3237 if (!def_bb || dominated_by_p (CDI_DOMINATORS, pe->dest, def_bb))
3238 return true;
3241 return false;
3244 /* Return true if STMT can be hoisted from if-converted loop LOOP to
3245 edge PE. */
3247 static bool
3248 ifcvt_can_hoist (class loop *loop, edge pe, gimple *stmt)
3250 if (auto *call = dyn_cast<gcall *> (stmt))
3252 if (gimple_call_internal_p (call)
3253 && internal_fn_mask_index (gimple_call_internal_fn (call)) >= 0)
3254 return false;
3256 else if (auto *assign = dyn_cast<gassign *> (stmt))
3258 if (gimple_assign_rhs_code (assign) == COND_EXPR)
3259 return false;
3261 else
3262 return false;
3264 if (gimple_has_side_effects (stmt)
3265 || gimple_could_trap_p (stmt)
3266 || stmt_could_throw_p (cfun, stmt)
3267 || gimple_vdef (stmt)
3268 || gimple_vuse (stmt))
3269 return false;
3271 int num_args = gimple_num_args (stmt);
3272 if (pe != loop_preheader_edge (loop))
3274 for (int i = 0; i < num_args; ++i)
3275 if (!ifcvt_available_on_edge_p (pe, gimple_arg (stmt, i)))
3276 return false;
3278 else
3280 for (int i = 0; i < num_args; ++i)
3281 if (!expr_invariant_in_loop_p (loop, gimple_arg (stmt, i)))
3282 return false;
3285 return true;
3288 /* Hoist invariant statements from LOOP to edge PE. */
3290 static void
3291 ifcvt_hoist_invariants (class loop *loop, edge pe)
3293 gimple_stmt_iterator hoist_gsi = {};
3294 unsigned int num_blocks = loop->num_nodes;
3295 basic_block *body = get_loop_body (loop);
3296 for (unsigned int i = 0; i < num_blocks; ++i)
3297 for (gimple_stmt_iterator gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi);)
3299 gimple *stmt = gsi_stmt (gsi);
3300 if (ifcvt_can_hoist (loop, pe, stmt))
3302 /* Once we've hoisted one statement, insert other statements
3303 after it. */
3304 gsi_remove (&gsi, false);
3305 if (hoist_gsi.ptr)
3306 gsi_insert_after (&hoist_gsi, stmt, GSI_NEW_STMT);
3307 else
3309 gsi_insert_on_edge_immediate (pe, stmt);
3310 hoist_gsi = gsi_for_stmt (stmt);
3312 continue;
3314 gsi_next (&gsi);
3316 free (body);
3319 /* Returns the DECL_FIELD_BIT_OFFSET of the bitfield accesse in stmt iff its
3320 type mode is not BLKmode. If BITPOS is not NULL it will hold the poly_int64
3321 value of the DECL_FIELD_BIT_OFFSET of the bitfield access and STRUCT_EXPR,
3322 if not NULL, will hold the tree representing the base struct of this
3323 bitfield. */
3325 static tree
3326 get_bitfield_rep (gassign *stmt, bool write, tree *bitpos,
3327 tree *struct_expr)
3329 tree comp_ref = write ? gimple_assign_lhs (stmt)
3330 : gimple_assign_rhs1 (stmt);
3332 tree field_decl = TREE_OPERAND (comp_ref, 1);
3333 tree rep_decl = DECL_BIT_FIELD_REPRESENTATIVE (field_decl);
3335 /* Bail out if the representative is not a suitable type for a scalar
3336 register variable. */
3337 if (!is_gimple_reg_type (TREE_TYPE (rep_decl)))
3338 return NULL_TREE;
3340 /* Bail out if the DECL_SIZE of the field_decl isn't the same as the BF's
3341 precision. */
3342 unsigned HOST_WIDE_INT bf_prec
3343 = TYPE_PRECISION (TREE_TYPE (gimple_assign_lhs (stmt)));
3344 if (compare_tree_int (DECL_SIZE (field_decl), bf_prec) != 0)
3345 return NULL_TREE;
3347 if (struct_expr)
3348 *struct_expr = TREE_OPERAND (comp_ref, 0);
3350 if (bitpos)
3352 /* To calculate the bitposition of the BITFIELD_REF we have to determine
3353 where our bitfield starts in relation to the container REP_DECL. The
3354 DECL_FIELD_OFFSET of the original bitfield's member FIELD_DECL tells
3355 us how many bytes from the start of the structure there are until the
3356 start of the group of bitfield members the FIELD_DECL belongs to,
3357 whereas DECL_FIELD_BIT_OFFSET will tell us how many bits from that
3358 position our actual bitfield member starts. For the container
3359 REP_DECL adding DECL_FIELD_OFFSET and DECL_FIELD_BIT_OFFSET will tell
3360 us the distance between the start of the structure and the start of
3361 the container, though the first is in bytes and the later other in
3362 bits. With this in mind we calculate the bit position of our new
3363 BITFIELD_REF by subtracting the number of bits between the start of
3364 the structure and the container from the number of bits from the start
3365 of the structure and the actual bitfield member. */
3366 tree bf_pos = fold_build2 (MULT_EXPR, bitsizetype,
3367 DECL_FIELD_OFFSET (field_decl),
3368 build_int_cst (bitsizetype, BITS_PER_UNIT));
3369 bf_pos = fold_build2 (PLUS_EXPR, bitsizetype, bf_pos,
3370 DECL_FIELD_BIT_OFFSET (field_decl));
3371 tree rep_pos = fold_build2 (MULT_EXPR, bitsizetype,
3372 DECL_FIELD_OFFSET (rep_decl),
3373 build_int_cst (bitsizetype, BITS_PER_UNIT));
3374 rep_pos = fold_build2 (PLUS_EXPR, bitsizetype, rep_pos,
3375 DECL_FIELD_BIT_OFFSET (rep_decl));
3377 *bitpos = fold_build2 (MINUS_EXPR, bitsizetype, bf_pos, rep_pos);
3380 return rep_decl;
3384 /* Lowers the bitfield described by DATA.
3385 For a write like:
3387 struct.bf = _1;
3389 lower to:
3391 __ifc_1 = struct.<representative>;
3392 __ifc_2 = BIT_INSERT_EXPR (__ifc_1, _1, bitpos);
3393 struct.<representative> = __ifc_2;
3395 For a read:
3397 _1 = struct.bf;
3399 lower to:
3401 __ifc_1 = struct.<representative>;
3402 _1 = BIT_FIELD_REF (__ifc_1, bitsize, bitpos);
3404 where representative is a legal load that contains the bitfield value,
3405 bitsize is the size of the bitfield and bitpos the offset to the start of
3406 the bitfield within the representative. */
3408 static void
3409 lower_bitfield (gassign *stmt, bool write)
3411 tree struct_expr;
3412 tree bitpos;
3413 tree rep_decl = get_bitfield_rep (stmt, write, &bitpos, &struct_expr);
3414 tree rep_type = TREE_TYPE (rep_decl);
3415 tree bf_type = TREE_TYPE (gimple_assign_lhs (stmt));
3417 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3418 if (dump_file && (dump_flags & TDF_DETAILS))
3420 fprintf (dump_file, "Lowering:\n");
3421 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3422 fprintf (dump_file, "to:\n");
3425 /* REP_COMP_REF is a COMPONENT_REF for the representative. NEW_VAL is it's
3426 defining SSA_NAME. */
3427 tree rep_comp_ref = build3 (COMPONENT_REF, rep_type, struct_expr, rep_decl,
3428 NULL_TREE);
3429 tree new_val = ifc_temp_var (rep_type, rep_comp_ref, &gsi);
3431 if (dump_file && (dump_flags & TDF_DETAILS))
3432 print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (new_val), 0, TDF_SLIM);
3434 if (write)
3436 new_val = ifc_temp_var (rep_type,
3437 build3 (BIT_INSERT_EXPR, rep_type, new_val,
3438 unshare_expr (gimple_assign_rhs1 (stmt)),
3439 bitpos), &gsi);
3441 if (dump_file && (dump_flags & TDF_DETAILS))
3442 print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (new_val), 0, TDF_SLIM);
3444 gimple *new_stmt = gimple_build_assign (unshare_expr (rep_comp_ref),
3445 new_val);
3446 gimple_move_vops (new_stmt, stmt);
3447 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
3449 if (dump_file && (dump_flags & TDF_DETAILS))
3450 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
3452 else
3454 tree bfr = build3 (BIT_FIELD_REF, bf_type, new_val,
3455 build_int_cst (bitsizetype, TYPE_PRECISION (bf_type)),
3456 bitpos);
3457 new_val = ifc_temp_var (bf_type, bfr, &gsi);
3459 gimple *new_stmt = gimple_build_assign (gimple_assign_lhs (stmt),
3460 new_val);
3461 gimple_move_vops (new_stmt, stmt);
3462 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
3464 if (dump_file && (dump_flags & TDF_DETAILS))
3465 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
3468 gsi_remove (&gsi, true);
3471 /* Return TRUE if there are bitfields to lower in this LOOP. Fill TO_LOWER
3472 with data structures representing these bitfields. */
3474 static bool
3475 bitfields_to_lower_p (class loop *loop,
3476 vec <gassign *> &reads_to_lower,
3477 vec <gassign *> &writes_to_lower)
3479 gimple_stmt_iterator gsi;
3481 if (dump_file && (dump_flags & TDF_DETAILS))
3483 fprintf (dump_file, "Analyzing loop %d for bitfields:\n", loop->num);
3486 for (unsigned i = 0; i < loop->num_nodes; ++i)
3488 basic_block bb = ifc_bbs[i];
3489 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3491 gassign *stmt = dyn_cast<gassign*> (gsi_stmt (gsi));
3492 if (!stmt)
3493 continue;
3495 tree op = gimple_assign_lhs (stmt);
3496 bool write = TREE_CODE (op) == COMPONENT_REF;
3498 if (!write)
3499 op = gimple_assign_rhs1 (stmt);
3501 if (TREE_CODE (op) != COMPONENT_REF)
3502 continue;
3504 if (DECL_BIT_FIELD_TYPE (TREE_OPERAND (op, 1)))
3506 if (dump_file && (dump_flags & TDF_DETAILS))
3507 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3509 if (!INTEGRAL_TYPE_P (TREE_TYPE (op)))
3511 if (dump_file && (dump_flags & TDF_DETAILS))
3512 fprintf (dump_file, "\t Bitfield NO OK to lower,"
3513 " field type is not Integral.\n");
3514 return false;
3517 if (!get_bitfield_rep (stmt, write, NULL, NULL))
3519 if (dump_file && (dump_flags & TDF_DETAILS))
3520 fprintf (dump_file, "\t Bitfield NOT OK to lower,"
3521 " representative is BLKmode.\n");
3522 return false;
3525 if (dump_file && (dump_flags & TDF_DETAILS))
3526 fprintf (dump_file, "\tBitfield OK to lower.\n");
3527 if (write)
3528 writes_to_lower.safe_push (stmt);
3529 else
3530 reads_to_lower.safe_push (stmt);
3534 return !reads_to_lower.is_empty () || !writes_to_lower.is_empty ();
3538 /* If-convert LOOP when it is legal. For the moment this pass has no
3539 profitability analysis. Returns non-zero todo flags when something
3540 changed. */
3542 unsigned int
3543 tree_if_conversion (class loop *loop, vec<gimple *> *preds)
3545 unsigned int todo = 0;
3546 bool aggressive_if_conv;
3547 class loop *rloop;
3548 auto_vec <gassign *, 4> reads_to_lower;
3549 auto_vec <gassign *, 4> writes_to_lower;
3550 bitmap exit_bbs;
3551 edge pe;
3552 auto_vec<data_reference_p, 10> refs;
3554 again:
3555 rloop = NULL;
3556 ifc_bbs = NULL;
3557 need_to_lower_bitfields = false;
3558 need_to_ifcvt = false;
3559 need_to_predicate = false;
3560 need_to_rewrite_undefined = false;
3561 any_complicated_phi = false;
3563 /* Apply more aggressive if-conversion when loop or its outer loop were
3564 marked with simd pragma. When that's the case, we try to if-convert
3565 loop containing PHIs with more than MAX_PHI_ARG_NUM arguments. */
3566 aggressive_if_conv = loop->force_vectorize;
3567 if (!aggressive_if_conv)
3569 class loop *outer_loop = loop_outer (loop);
3570 if (outer_loop && outer_loop->force_vectorize)
3571 aggressive_if_conv = true;
3574 if (!single_exit (loop))
3575 goto cleanup;
3577 /* If there are more than two BBs in the loop then there is at least one if
3578 to convert. */
3579 if (loop->num_nodes > 2
3580 && !ifcvt_split_critical_edges (loop, aggressive_if_conv))
3581 goto cleanup;
3583 ifc_bbs = get_loop_body_in_if_conv_order (loop);
3584 if (!ifc_bbs)
3586 if (dump_file && (dump_flags & TDF_DETAILS))
3587 fprintf (dump_file, "Irreducible loop\n");
3588 goto cleanup;
3591 if (find_data_references_in_loop (loop, &refs) == chrec_dont_know)
3592 goto cleanup;
3594 if (loop->num_nodes > 2)
3596 need_to_ifcvt = true;
3598 if (!if_convertible_loop_p (loop, &refs) || !dbg_cnt (if_conversion_tree))
3599 goto cleanup;
3601 if ((need_to_predicate || any_complicated_phi)
3602 && ((!flag_tree_loop_vectorize && !loop->force_vectorize)
3603 || loop->dont_vectorize))
3604 goto cleanup;
3607 if ((flag_tree_loop_vectorize || loop->force_vectorize)
3608 && !loop->dont_vectorize)
3609 need_to_lower_bitfields = bitfields_to_lower_p (loop, reads_to_lower,
3610 writes_to_lower);
3612 if (!need_to_ifcvt && !need_to_lower_bitfields)
3613 goto cleanup;
3615 /* The edge to insert invariant stmts on. */
3616 pe = loop_preheader_edge (loop);
3618 /* Since we have no cost model, always version loops unless the user
3619 specified -ftree-loop-if-convert or unless versioning is required.
3620 Either version this loop, or if the pattern is right for outer-loop
3621 vectorization, version the outer loop. In the latter case we will
3622 still if-convert the original inner loop. */
3623 if (need_to_lower_bitfields
3624 || need_to_predicate
3625 || any_complicated_phi
3626 || flag_tree_loop_if_convert != 1)
3628 class loop *vloop
3629 = (versionable_outer_loop_p (loop_outer (loop))
3630 ? loop_outer (loop) : loop);
3631 class loop *nloop = version_loop_for_if_conversion (vloop, preds);
3632 if (nloop == NULL)
3633 goto cleanup;
3634 if (vloop != loop)
3636 /* If versionable_outer_loop_p decided to version the
3637 outer loop, version also the inner loop of the non-vectorized
3638 loop copy. So we transform:
3639 loop1
3640 loop2
3641 into:
3642 if (LOOP_VECTORIZED (1, 3))
3644 loop1
3645 loop2
3647 else
3648 loop3 (copy of loop1)
3649 if (LOOP_VECTORIZED (4, 5))
3650 loop4 (copy of loop2)
3651 else
3652 loop5 (copy of loop4) */
3653 gcc_assert (nloop->inner && nloop->inner->next == NULL);
3654 rloop = nloop->inner;
3656 else
3657 /* If we versioned loop then make sure to insert invariant
3658 stmts before the .LOOP_VECTORIZED check since the vectorizer
3659 will re-use that for things like runtime alias versioning
3660 whose condition can end up using those invariants. */
3661 pe = single_pred_edge (gimple_bb (preds->last ()));
3664 if (need_to_lower_bitfields)
3666 if (dump_file && (dump_flags & TDF_DETAILS))
3668 fprintf (dump_file, "-------------------------\n");
3669 fprintf (dump_file, "Start lowering bitfields\n");
3671 while (!reads_to_lower.is_empty ())
3672 lower_bitfield (reads_to_lower.pop (), false);
3673 while (!writes_to_lower.is_empty ())
3674 lower_bitfield (writes_to_lower.pop (), true);
3676 if (dump_file && (dump_flags & TDF_DETAILS))
3678 fprintf (dump_file, "Done lowering bitfields\n");
3679 fprintf (dump_file, "-------------------------\n");
3682 if (need_to_ifcvt)
3684 /* Now all statements are if-convertible. Combine all the basic
3685 blocks into one huge basic block doing the if-conversion
3686 on-the-fly. */
3687 combine_blocks (loop);
3690 /* Perform local CSE, this esp. helps the vectorizer analysis if loads
3691 and stores are involved. CSE only the loop body, not the entry
3692 PHIs, those are to be kept in sync with the non-if-converted copy.
3693 ??? We'll still keep dead stores though. */
3694 exit_bbs = BITMAP_ALLOC (NULL);
3695 bitmap_set_bit (exit_bbs, single_exit (loop)->dest->index);
3696 bitmap_set_bit (exit_bbs, loop->latch->index);
3698 std::pair <tree, tree> *name_pair;
3699 unsigned ssa_names_idx;
3700 FOR_EACH_VEC_ELT (redundant_ssa_names, ssa_names_idx, name_pair)
3701 replace_uses_by (name_pair->first, name_pair->second);
3702 redundant_ssa_names.release ();
3704 todo |= do_rpo_vn (cfun, loop_preheader_edge (loop), exit_bbs);
3706 /* Delete dead predicate computations. */
3707 ifcvt_local_dce (loop);
3708 BITMAP_FREE (exit_bbs);
3710 ifcvt_hoist_invariants (loop, pe);
3712 todo |= TODO_cleanup_cfg;
3714 cleanup:
3715 data_reference_p dr;
3716 unsigned int i;
3717 for (i = 0; refs.iterate (i, &dr); i++)
3719 free (dr->aux);
3720 free_data_ref (dr);
3722 refs.truncate (0);
3724 if (ifc_bbs)
3726 unsigned int i;
3728 for (i = 0; i < loop->num_nodes; i++)
3729 free_bb_predicate (ifc_bbs[i]);
3731 free (ifc_bbs);
3732 ifc_bbs = NULL;
3734 if (rloop != NULL)
3736 loop = rloop;
3737 reads_to_lower.truncate (0);
3738 writes_to_lower.truncate (0);
3739 goto again;
3742 return todo;
3745 /* Tree if-conversion pass management. */
3747 namespace {
3749 const pass_data pass_data_if_conversion =
3751 GIMPLE_PASS, /* type */
3752 "ifcvt", /* name */
3753 OPTGROUP_NONE, /* optinfo_flags */
3754 TV_TREE_LOOP_IFCVT, /* tv_id */
3755 ( PROP_cfg | PROP_ssa ), /* properties_required */
3756 0, /* properties_provided */
3757 0, /* properties_destroyed */
3758 0, /* todo_flags_start */
3759 0, /* todo_flags_finish */
3762 class pass_if_conversion : public gimple_opt_pass
3764 public:
3765 pass_if_conversion (gcc::context *ctxt)
3766 : gimple_opt_pass (pass_data_if_conversion, ctxt)
3769 /* opt_pass methods: */
3770 bool gate (function *) final override;
3771 unsigned int execute (function *) final override;
3773 }; // class pass_if_conversion
3775 bool
3776 pass_if_conversion::gate (function *fun)
3778 return (((flag_tree_loop_vectorize || fun->has_force_vectorize_loops)
3779 && flag_tree_loop_if_convert != 0)
3780 || flag_tree_loop_if_convert == 1);
3783 unsigned int
3784 pass_if_conversion::execute (function *fun)
3786 unsigned todo = 0;
3788 if (number_of_loops (fun) <= 1)
3789 return 0;
3791 auto_vec<gimple *> preds;
3792 for (auto loop : loops_list (cfun, 0))
3793 if (flag_tree_loop_if_convert == 1
3794 || ((flag_tree_loop_vectorize || loop->force_vectorize)
3795 && !loop->dont_vectorize))
3796 todo |= tree_if_conversion (loop, &preds);
3798 if (todo)
3800 free_numbers_of_iterations_estimates (fun);
3801 scev_reset ();
3804 if (flag_checking)
3806 basic_block bb;
3807 FOR_EACH_BB_FN (bb, fun)
3808 gcc_assert (!bb->aux);
3811 /* Perform IL update now, it might elide some loops. */
3812 if (todo & TODO_cleanup_cfg)
3814 cleanup_tree_cfg ();
3815 if (need_ssa_update_p (fun))
3816 todo |= TODO_update_ssa;
3818 if (todo & TODO_update_ssa_any)
3819 update_ssa (todo & TODO_update_ssa_any);
3821 /* If if-conversion elided the loop fall back to the original one. */
3822 for (unsigned i = 0; i < preds.length (); ++i)
3824 gimple *g = preds[i];
3825 if (!gimple_bb (g))
3826 continue;
3827 unsigned ifcvt_loop = tree_to_uhwi (gimple_call_arg (g, 0));
3828 unsigned orig_loop = tree_to_uhwi (gimple_call_arg (g, 1));
3829 if (!get_loop (fun, ifcvt_loop) || !get_loop (fun, orig_loop))
3831 if (dump_file)
3832 fprintf (dump_file, "If-converted loop vanished\n");
3833 fold_loop_internal_call (g, boolean_false_node);
3837 return 0;
3840 } // anon namespace
3842 gimple_opt_pass *
3843 make_pass_if_conversion (gcc::context *ctxt)
3845 return new pass_if_conversion (ctxt);