Small fix for -fdump-ada-spec
[official-gcc.git] / gcc / tree-if-conv.cc
blobb19eebd2dfb2c71173e1a237603fff0457431404
1 /* If-conversion for vectorizer.
2 Copyright (C) 2004-2023 Free Software Foundation, Inc.
3 Contributed by Devang Patel <dpatel@apple.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass implements a tree level if-conversion of loops. Its
22 initial goal is to help the vectorizer to vectorize loops with
23 conditions.
25 A short description of if-conversion:
27 o Decide if a loop is if-convertible or not.
28 o Walk all loop basic blocks in breadth first order (BFS order).
29 o Remove conditional statements (at the end of basic block)
30 and propagate condition into destination basic blocks'
31 predicate list.
32 o Replace modify expression with conditional modify expression
33 using current basic block's condition.
34 o Merge all basic blocks
35 o Replace phi nodes with conditional modify expr
36 o Merge all basic blocks into header
38 Sample transformation:
40 INPUT
41 -----
43 # i_23 = PHI <0(0), i_18(10)>;
44 <L0>:;
45 j_15 = A[i_23];
46 if (j_15 > 41) goto <L1>; else goto <L17>;
48 <L17>:;
49 goto <bb 3> (<L3>);
51 <L1>:;
53 # iftmp.2_4 = PHI <0(8), 42(2)>;
54 <L3>:;
55 A[i_23] = iftmp.2_4;
56 i_18 = i_23 + 1;
57 if (i_18 <= 15) goto <L19>; else goto <L18>;
59 <L19>:;
60 goto <bb 1> (<L0>);
62 <L18>:;
64 OUTPUT
65 ------
67 # i_23 = PHI <0(0), i_18(10)>;
68 <L0>:;
69 j_15 = A[i_23];
71 <L3>:;
72 iftmp.2_4 = j_15 > 41 ? 42 : 0;
73 A[i_23] = iftmp.2_4;
74 i_18 = i_23 + 1;
75 if (i_18 <= 15) goto <L19>; else goto <L18>;
77 <L19>:;
78 goto <bb 1> (<L0>);
80 <L18>:;
83 #include "config.h"
84 #include "system.h"
85 #include "coretypes.h"
86 #include "backend.h"
87 #include "rtl.h"
88 #include "tree.h"
89 #include "gimple.h"
90 #include "cfghooks.h"
91 #include "tree-pass.h"
92 #include "ssa.h"
93 #include "expmed.h"
94 #include "expr.h"
95 #include "optabs-query.h"
96 #include "gimple-pretty-print.h"
97 #include "alias.h"
98 #include "fold-const.h"
99 #include "stor-layout.h"
100 #include "gimple-iterator.h"
101 #include "gimple-fold.h"
102 #include "gimplify.h"
103 #include "gimplify-me.h"
104 #include "tree-cfg.h"
105 #include "tree-into-ssa.h"
106 #include "tree-ssa.h"
107 #include "cfgloop.h"
108 #include "tree-data-ref.h"
109 #include "tree-scalar-evolution.h"
110 #include "tree-ssa-loop.h"
111 #include "tree-ssa-loop-niter.h"
112 #include "tree-ssa-loop-ivopts.h"
113 #include "tree-ssa-address.h"
114 #include "dbgcnt.h"
115 #include "tree-hash-traits.h"
116 #include "varasm.h"
117 #include "builtins.h"
118 #include "cfganal.h"
119 #include "internal-fn.h"
120 #include "fold-const.h"
121 #include "tree-ssa-sccvn.h"
122 #include "tree-cfgcleanup.h"
123 #include "tree-ssa-dse.h"
124 #include "tree-vectorizer.h"
125 #include "tree-eh.h"
127 /* For lang_hooks.types.type_for_mode. */
128 #include "langhooks.h"
130 /* Only handle PHIs with no more arguments unless we are asked to by
131 simd pragma. */
132 #define MAX_PHI_ARG_NUM \
133 ((unsigned) param_max_tree_if_conversion_phi_args)
135 /* True if we've converted a statement that was only executed when some
136 condition C was true, and if for correctness we need to predicate the
137 statement to ensure that it is a no-op when C is false. See
138 predicate_statements for the kinds of predication we support. */
139 static bool need_to_predicate;
141 /* True if we have to rewrite stmts that may invoke undefined behavior
142 when a condition C was false so it doesn't if it is always executed.
143 See predicate_statements for the kinds of predication we support. */
144 static bool need_to_rewrite_undefined;
146 /* Indicate if there are any complicated PHIs that need to be handled in
147 if-conversion. Complicated PHI has more than two arguments and can't
148 be degenerated to two arguments PHI. See more information in comment
149 before phi_convertible_by_degenerating_args. */
150 static bool any_complicated_phi;
152 /* True if we have bitfield accesses we can lower. */
153 static bool need_to_lower_bitfields;
155 /* True if there is any ifcvting to be done. */
156 static bool need_to_ifcvt;
158 /* Hash for struct innermost_loop_behavior. It depends on the user to
159 free the memory. */
161 struct innermost_loop_behavior_hash : nofree_ptr_hash <innermost_loop_behavior>
163 static inline hashval_t hash (const value_type &);
164 static inline bool equal (const value_type &,
165 const compare_type &);
168 inline hashval_t
169 innermost_loop_behavior_hash::hash (const value_type &e)
171 hashval_t hash;
173 hash = iterative_hash_expr (e->base_address, 0);
174 hash = iterative_hash_expr (e->offset, hash);
175 hash = iterative_hash_expr (e->init, hash);
176 return iterative_hash_expr (e->step, hash);
179 inline bool
180 innermost_loop_behavior_hash::equal (const value_type &e1,
181 const compare_type &e2)
183 if ((e1->base_address && !e2->base_address)
184 || (!e1->base_address && e2->base_address)
185 || (!e1->offset && e2->offset)
186 || (e1->offset && !e2->offset)
187 || (!e1->init && e2->init)
188 || (e1->init && !e2->init)
189 || (!e1->step && e2->step)
190 || (e1->step && !e2->step))
191 return false;
193 if (e1->base_address && e2->base_address
194 && !operand_equal_p (e1->base_address, e2->base_address, 0))
195 return false;
196 if (e1->offset && e2->offset
197 && !operand_equal_p (e1->offset, e2->offset, 0))
198 return false;
199 if (e1->init && e2->init
200 && !operand_equal_p (e1->init, e2->init, 0))
201 return false;
202 if (e1->step && e2->step
203 && !operand_equal_p (e1->step, e2->step, 0))
204 return false;
206 return true;
209 /* List of basic blocks in if-conversion-suitable order. */
210 static basic_block *ifc_bbs;
212 /* Hash table to store <DR's innermost loop behavior, DR> pairs. */
213 static hash_map<innermost_loop_behavior_hash,
214 data_reference_p> *innermost_DR_map;
216 /* Hash table to store <base reference, DR> pairs. */
217 static hash_map<tree_operand_hash, data_reference_p> *baseref_DR_map;
219 /* List of redundant SSA names: the first should be replaced by the second. */
220 static vec< std::pair<tree, tree> > redundant_ssa_names;
222 /* Structure used to predicate basic blocks. This is attached to the
223 ->aux field of the BBs in the loop to be if-converted. */
224 struct bb_predicate {
226 /* The condition under which this basic block is executed. */
227 tree predicate;
229 /* PREDICATE is gimplified, and the sequence of statements is
230 recorded here, in order to avoid the duplication of computations
231 that occur in previous conditions. See PR44483. */
232 gimple_seq predicate_gimplified_stmts;
235 /* Returns true when the basic block BB has a predicate. */
237 static inline bool
238 bb_has_predicate (basic_block bb)
240 return bb->aux != NULL;
243 /* Returns the gimplified predicate for basic block BB. */
245 static inline tree
246 bb_predicate (basic_block bb)
248 return ((struct bb_predicate *) bb->aux)->predicate;
251 /* Sets the gimplified predicate COND for basic block BB. */
253 static inline void
254 set_bb_predicate (basic_block bb, tree cond)
256 gcc_assert ((TREE_CODE (cond) == TRUTH_NOT_EXPR
257 && is_gimple_val (TREE_OPERAND (cond, 0)))
258 || is_gimple_val (cond));
259 ((struct bb_predicate *) bb->aux)->predicate = cond;
262 /* Returns the sequence of statements of the gimplification of the
263 predicate for basic block BB. */
265 static inline gimple_seq
266 bb_predicate_gimplified_stmts (basic_block bb)
268 return ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts;
271 /* Sets the sequence of statements STMTS of the gimplification of the
272 predicate for basic block BB. */
274 static inline void
275 set_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
277 ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts = stmts;
280 /* Adds the sequence of statements STMTS to the sequence of statements
281 of the predicate for basic block BB. */
283 static inline void
284 add_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
286 /* We might have updated some stmts in STMTS via force_gimple_operand
287 calling fold_stmt and that producing multiple stmts. Delink immediate
288 uses so update_ssa after loop versioning doesn't get confused for
289 the not yet inserted predicates.
290 ??? This should go away once we reliably avoid updating stmts
291 not in any BB. */
292 for (gimple_stmt_iterator gsi = gsi_start (stmts);
293 !gsi_end_p (gsi); gsi_next (&gsi))
295 gimple *stmt = gsi_stmt (gsi);
296 delink_stmt_imm_use (stmt);
297 gimple_set_modified (stmt, true);
299 gimple_seq_add_seq_without_update
300 (&(((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts), stmts);
303 /* Initializes to TRUE the predicate of basic block BB. */
305 static inline void
306 init_bb_predicate (basic_block bb)
308 bb->aux = XNEW (struct bb_predicate);
309 set_bb_predicate_gimplified_stmts (bb, NULL);
310 set_bb_predicate (bb, boolean_true_node);
313 /* Release the SSA_NAMEs associated with the predicate of basic block BB. */
315 static inline void
316 release_bb_predicate (basic_block bb)
318 gimple_seq stmts = bb_predicate_gimplified_stmts (bb);
319 if (stmts)
321 /* Ensure that these stmts haven't yet been added to a bb. */
322 if (flag_checking)
323 for (gimple_stmt_iterator i = gsi_start (stmts);
324 !gsi_end_p (i); gsi_next (&i))
325 gcc_assert (! gimple_bb (gsi_stmt (i)));
327 /* Discard them. */
328 gimple_seq_discard (stmts);
329 set_bb_predicate_gimplified_stmts (bb, NULL);
333 /* Free the predicate of basic block BB. */
335 static inline void
336 free_bb_predicate (basic_block bb)
338 if (!bb_has_predicate (bb))
339 return;
341 release_bb_predicate (bb);
342 free (bb->aux);
343 bb->aux = NULL;
346 /* Reinitialize predicate of BB with the true predicate. */
348 static inline void
349 reset_bb_predicate (basic_block bb)
351 if (!bb_has_predicate (bb))
352 init_bb_predicate (bb);
353 else
355 release_bb_predicate (bb);
356 set_bb_predicate (bb, boolean_true_node);
360 /* Returns a new SSA_NAME of type TYPE that is assigned the value of
361 the expression EXPR. Inserts the statement created for this
362 computation before GSI and leaves the iterator GSI at the same
363 statement. */
365 static tree
366 ifc_temp_var (tree type, tree expr, gimple_stmt_iterator *gsi)
368 tree new_name = make_temp_ssa_name (type, NULL, "_ifc_");
369 gimple *stmt = gimple_build_assign (new_name, expr);
370 gimple_set_vuse (stmt, gimple_vuse (gsi_stmt (*gsi)));
371 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
372 return new_name;
375 /* Return true when COND is a false predicate. */
377 static inline bool
378 is_false_predicate (tree cond)
380 return (cond != NULL_TREE
381 && (cond == boolean_false_node
382 || integer_zerop (cond)));
385 /* Return true when COND is a true predicate. */
387 static inline bool
388 is_true_predicate (tree cond)
390 return (cond == NULL_TREE
391 || cond == boolean_true_node
392 || integer_onep (cond));
395 /* Returns true when BB has a predicate that is not trivial: true or
396 NULL_TREE. */
398 static inline bool
399 is_predicated (basic_block bb)
401 return !is_true_predicate (bb_predicate (bb));
404 /* Parses the predicate COND and returns its comparison code and
405 operands OP0 and OP1. */
407 static enum tree_code
408 parse_predicate (tree cond, tree *op0, tree *op1)
410 gimple *s;
412 if (TREE_CODE (cond) == SSA_NAME
413 && is_gimple_assign (s = SSA_NAME_DEF_STMT (cond)))
415 if (TREE_CODE_CLASS (gimple_assign_rhs_code (s)) == tcc_comparison)
417 *op0 = gimple_assign_rhs1 (s);
418 *op1 = gimple_assign_rhs2 (s);
419 return gimple_assign_rhs_code (s);
422 else if (gimple_assign_rhs_code (s) == TRUTH_NOT_EXPR)
424 tree op = gimple_assign_rhs1 (s);
425 tree type = TREE_TYPE (op);
426 enum tree_code code = parse_predicate (op, op0, op1);
428 return code == ERROR_MARK ? ERROR_MARK
429 : invert_tree_comparison (code, HONOR_NANS (type));
432 return ERROR_MARK;
435 if (COMPARISON_CLASS_P (cond))
437 *op0 = TREE_OPERAND (cond, 0);
438 *op1 = TREE_OPERAND (cond, 1);
439 return TREE_CODE (cond);
442 return ERROR_MARK;
445 /* Returns the fold of predicate C1 OR C2 at location LOC. */
447 static tree
448 fold_or_predicates (location_t loc, tree c1, tree c2)
450 tree op1a, op1b, op2a, op2b;
451 enum tree_code code1 = parse_predicate (c1, &op1a, &op1b);
452 enum tree_code code2 = parse_predicate (c2, &op2a, &op2b);
454 if (code1 != ERROR_MARK && code2 != ERROR_MARK)
456 tree t = maybe_fold_or_comparisons (boolean_type_node, code1, op1a, op1b,
457 code2, op2a, op2b);
458 if (t)
459 return t;
462 return fold_build2_loc (loc, TRUTH_OR_EXPR, boolean_type_node, c1, c2);
465 /* Returns either a COND_EXPR or the folded expression if the folded
466 expression is a MIN_EXPR, a MAX_EXPR, an ABS_EXPR,
467 a constant or a SSA_NAME. */
469 static tree
470 fold_build_cond_expr (tree type, tree cond, tree rhs, tree lhs)
472 /* If COND is comparison r != 0 and r has boolean type, convert COND
473 to SSA_NAME to accept by vect bool pattern. */
474 if (TREE_CODE (cond) == NE_EXPR)
476 tree op0 = TREE_OPERAND (cond, 0);
477 tree op1 = TREE_OPERAND (cond, 1);
478 if (TREE_CODE (op0) == SSA_NAME
479 && TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE
480 && (integer_zerop (op1)))
481 cond = op0;
484 gimple_match_op cexpr (gimple_match_cond::UNCOND, COND_EXPR,
485 type, cond, rhs, lhs);
486 if (cexpr.resimplify (NULL, follow_all_ssa_edges))
488 if (gimple_simplified_result_is_gimple_val (&cexpr))
489 return cexpr.ops[0];
490 else if (cexpr.code == ABS_EXPR)
491 return build1 (ABS_EXPR, type, cexpr.ops[0]);
492 else if (cexpr.code == MIN_EXPR
493 || cexpr.code == MAX_EXPR)
494 return build2 ((tree_code)cexpr.code, type, cexpr.ops[0], cexpr.ops[1]);
497 return build3 (COND_EXPR, type, cond, rhs, lhs);
500 /* Add condition NC to the predicate list of basic block BB. LOOP is
501 the loop to be if-converted. Use predicate of cd-equivalent block
502 for join bb if it exists: we call basic blocks bb1 and bb2
503 cd-equivalent if they are executed under the same condition. */
505 static inline void
506 add_to_predicate_list (class loop *loop, basic_block bb, tree nc)
508 tree bc, *tp;
509 basic_block dom_bb;
511 if (is_true_predicate (nc))
512 return;
514 /* If dominance tells us this basic block is always executed,
515 don't record any predicates for it. */
516 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
517 return;
519 dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb);
520 /* We use notion of cd equivalence to get simpler predicate for
521 join block, e.g. if join block has 2 predecessors with predicates
522 p1 & p2 and p1 & !p2, we'd like to get p1 for it instead of
523 p1 & p2 | p1 & !p2. */
524 if (dom_bb != loop->header
525 && get_immediate_dominator (CDI_POST_DOMINATORS, dom_bb) == bb)
527 gcc_assert (flow_bb_inside_loop_p (loop, dom_bb));
528 bc = bb_predicate (dom_bb);
529 if (!is_true_predicate (bc))
530 set_bb_predicate (bb, bc);
531 else
532 gcc_assert (is_true_predicate (bb_predicate (bb)));
533 if (dump_file && (dump_flags & TDF_DETAILS))
534 fprintf (dump_file, "Use predicate of bb#%d for bb#%d\n",
535 dom_bb->index, bb->index);
536 return;
539 if (!is_predicated (bb))
540 bc = nc;
541 else
543 bc = bb_predicate (bb);
544 bc = fold_or_predicates (EXPR_LOCATION (bc), nc, bc);
545 if (is_true_predicate (bc))
547 reset_bb_predicate (bb);
548 return;
552 /* Allow a TRUTH_NOT_EXPR around the main predicate. */
553 if (TREE_CODE (bc) == TRUTH_NOT_EXPR)
554 tp = &TREE_OPERAND (bc, 0);
555 else
556 tp = &bc;
557 if (!is_gimple_val (*tp))
559 gimple_seq stmts;
560 *tp = force_gimple_operand (*tp, &stmts, true, NULL_TREE);
561 add_bb_predicate_gimplified_stmts (bb, stmts);
563 set_bb_predicate (bb, bc);
566 /* Add the condition COND to the previous condition PREV_COND, and add
567 this to the predicate list of the destination of edge E. LOOP is
568 the loop to be if-converted. */
570 static void
571 add_to_dst_predicate_list (class loop *loop, edge e,
572 tree prev_cond, tree cond)
574 if (!flow_bb_inside_loop_p (loop, e->dest))
575 return;
577 if (!is_true_predicate (prev_cond))
578 cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
579 prev_cond, cond);
581 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, e->dest))
582 add_to_predicate_list (loop, e->dest, cond);
585 /* Return true if one of the successor edges of BB exits LOOP. */
587 static bool
588 bb_with_exit_edge_p (class loop *loop, basic_block bb)
590 edge e;
591 edge_iterator ei;
593 FOR_EACH_EDGE (e, ei, bb->succs)
594 if (loop_exit_edge_p (loop, e))
595 return true;
597 return false;
600 /* Given PHI which has more than two arguments, this function checks if
601 it's if-convertible by degenerating its arguments. Specifically, if
602 below two conditions are satisfied:
604 1) Number of PHI arguments with different values equals to 2 and one
605 argument has the only occurrence.
606 2) The edge corresponding to the unique argument isn't critical edge.
608 Such PHI can be handled as PHIs have only two arguments. For example,
609 below PHI:
611 res = PHI <A_1(e1), A_1(e2), A_2(e3)>;
613 can be transformed into:
615 res = (predicate of e3) ? A_2 : A_1;
617 Return TRUE if it is the case, FALSE otherwise. */
619 static bool
620 phi_convertible_by_degenerating_args (gphi *phi)
622 edge e;
623 tree arg, t1 = NULL, t2 = NULL;
624 unsigned int i, i1 = 0, i2 = 0, n1 = 0, n2 = 0;
625 unsigned int num_args = gimple_phi_num_args (phi);
627 gcc_assert (num_args > 2);
629 for (i = 0; i < num_args; i++)
631 arg = gimple_phi_arg_def (phi, i);
632 if (t1 == NULL || operand_equal_p (t1, arg, 0))
634 n1++;
635 i1 = i;
636 t1 = arg;
638 else if (t2 == NULL || operand_equal_p (t2, arg, 0))
640 n2++;
641 i2 = i;
642 t2 = arg;
644 else
645 return false;
648 if (n1 != 1 && n2 != 1)
649 return false;
651 /* Check if the edge corresponding to the unique arg is critical. */
652 e = gimple_phi_arg_edge (phi, (n1 == 1) ? i1 : i2);
653 if (EDGE_COUNT (e->src->succs) > 1)
654 return false;
656 return true;
659 /* Return true when PHI is if-convertible. PHI is part of loop LOOP
660 and it belongs to basic block BB. Note at this point, it is sure
661 that PHI is if-convertible. This function updates global variable
662 ANY_COMPLICATED_PHI if PHI is complicated. */
664 static bool
665 if_convertible_phi_p (class loop *loop, basic_block bb, gphi *phi)
667 if (dump_file && (dump_flags & TDF_DETAILS))
669 fprintf (dump_file, "-------------------------\n");
670 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
673 if (bb != loop->header
674 && gimple_phi_num_args (phi) > 2
675 && !phi_convertible_by_degenerating_args (phi))
676 any_complicated_phi = true;
678 return true;
681 /* Records the status of a data reference. This struct is attached to
682 each DR->aux field. */
684 struct ifc_dr {
685 bool rw_unconditionally;
686 bool w_unconditionally;
687 bool written_at_least_once;
689 tree rw_predicate;
690 tree w_predicate;
691 tree base_w_predicate;
694 #define IFC_DR(DR) ((struct ifc_dr *) (DR)->aux)
695 #define DR_BASE_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->written_at_least_once)
696 #define DR_RW_UNCONDITIONALLY(DR) (IFC_DR (DR)->rw_unconditionally)
697 #define DR_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->w_unconditionally)
699 /* Iterates over DR's and stores refs, DR and base refs, DR pairs in
700 HASH tables. While storing them in HASH table, it checks if the
701 reference is unconditionally read or written and stores that as a flag
702 information. For base reference it checks if it is written atlest once
703 unconditionally and stores it as flag information along with DR.
704 In other words for every data reference A in STMT there exist other
705 accesses to a data reference with the same base with predicates that
706 add up (OR-up) to the true predicate: this ensures that the data
707 reference A is touched (read or written) on every iteration of the
708 if-converted loop. */
709 static void
710 hash_memrefs_baserefs_and_store_DRs_read_written_info (data_reference_p a)
713 data_reference_p *master_dr, *base_master_dr;
714 tree base_ref = DR_BASE_OBJECT (a);
715 innermost_loop_behavior *innermost = &DR_INNERMOST (a);
716 tree ca = bb_predicate (gimple_bb (DR_STMT (a)));
717 bool exist1, exist2;
719 master_dr = &innermost_DR_map->get_or_insert (innermost, &exist1);
720 if (!exist1)
721 *master_dr = a;
723 if (DR_IS_WRITE (a))
725 IFC_DR (*master_dr)->w_predicate
726 = fold_or_predicates (UNKNOWN_LOCATION, ca,
727 IFC_DR (*master_dr)->w_predicate);
728 if (is_true_predicate (IFC_DR (*master_dr)->w_predicate))
729 DR_W_UNCONDITIONALLY (*master_dr) = true;
731 IFC_DR (*master_dr)->rw_predicate
732 = fold_or_predicates (UNKNOWN_LOCATION, ca,
733 IFC_DR (*master_dr)->rw_predicate);
734 if (is_true_predicate (IFC_DR (*master_dr)->rw_predicate))
735 DR_RW_UNCONDITIONALLY (*master_dr) = true;
737 if (DR_IS_WRITE (a))
739 base_master_dr = &baseref_DR_map->get_or_insert (base_ref, &exist2);
740 if (!exist2)
741 *base_master_dr = a;
742 IFC_DR (*base_master_dr)->base_w_predicate
743 = fold_or_predicates (UNKNOWN_LOCATION, ca,
744 IFC_DR (*base_master_dr)->base_w_predicate);
745 if (is_true_predicate (IFC_DR (*base_master_dr)->base_w_predicate))
746 DR_BASE_W_UNCONDITIONALLY (*base_master_dr) = true;
750 /* Return TRUE if can prove the index IDX of an array reference REF is
751 within array bound. Return false otherwise. */
753 static bool
754 idx_within_array_bound (tree ref, tree *idx, void *dta)
756 wi::overflow_type overflow;
757 widest_int niter, valid_niter, delta, wi_step;
758 tree ev, init, step;
759 tree low, high;
760 class loop *loop = (class loop*) dta;
762 /* Only support within-bound access for array references. */
763 if (TREE_CODE (ref) != ARRAY_REF)
764 return false;
766 /* For arrays that might have flexible sizes, it is not guaranteed that they
767 do not extend over their declared size. */
768 if (array_ref_flexible_size_p (ref))
769 return false;
771 ev = analyze_scalar_evolution (loop, *idx);
772 ev = instantiate_parameters (loop, ev);
773 init = initial_condition (ev);
774 step = evolution_part_in_loop_num (ev, loop->num);
776 if (!init || TREE_CODE (init) != INTEGER_CST
777 || (step && TREE_CODE (step) != INTEGER_CST))
778 return false;
780 low = array_ref_low_bound (ref);
781 high = array_ref_up_bound (ref);
783 /* The case of nonconstant bounds could be handled, but it would be
784 complicated. */
785 if (TREE_CODE (low) != INTEGER_CST
786 || !high || TREE_CODE (high) != INTEGER_CST)
787 return false;
789 /* Check if the intial idx is within bound. */
790 if (wi::to_widest (init) < wi::to_widest (low)
791 || wi::to_widest (init) > wi::to_widest (high))
792 return false;
794 /* The idx is always within bound. */
795 if (!step || integer_zerop (step))
796 return true;
798 if (!max_loop_iterations (loop, &niter))
799 return false;
801 if (wi::to_widest (step) < 0)
803 delta = wi::to_widest (init) - wi::to_widest (low);
804 wi_step = -wi::to_widest (step);
806 else
808 delta = wi::to_widest (high) - wi::to_widest (init);
809 wi_step = wi::to_widest (step);
812 valid_niter = wi::div_floor (delta, wi_step, SIGNED, &overflow);
813 /* The iteration space of idx is within array bound. */
814 if (!overflow && niter <= valid_niter)
815 return true;
817 return false;
820 /* Return TRUE if ref is a within bound array reference. */
822 static bool
823 ref_within_array_bound (gimple *stmt, tree ref)
825 class loop *loop = loop_containing_stmt (stmt);
827 gcc_assert (loop != NULL);
828 return for_each_index (&ref, idx_within_array_bound, loop);
832 /* Given a memory reference expression T, return TRUE if base object
833 it refers to is writable. The base object of a memory reference
834 is the main object being referenced, which is returned by function
835 get_base_address. */
837 static bool
838 base_object_writable (tree ref)
840 tree base_tree = get_base_address (ref);
842 return (base_tree
843 && DECL_P (base_tree)
844 && decl_binds_to_current_def_p (base_tree)
845 && !TREE_READONLY (base_tree));
848 /* Return true when the memory references of STMT won't trap in the
849 if-converted code. There are two things that we have to check for:
851 - writes to memory occur to writable memory: if-conversion of
852 memory writes transforms the conditional memory writes into
853 unconditional writes, i.e. "if (cond) A[i] = foo" is transformed
854 into "A[i] = cond ? foo : A[i]", and as the write to memory may not
855 be executed at all in the original code, it may be a readonly
856 memory. To check that A is not const-qualified, we check that
857 there exists at least an unconditional write to A in the current
858 function.
860 - reads or writes to memory are valid memory accesses for every
861 iteration. To check that the memory accesses are correctly formed
862 and that we are allowed to read and write in these locations, we
863 check that the memory accesses to be if-converted occur at every
864 iteration unconditionally.
866 Returns true for the memory reference in STMT, same memory reference
867 is read or written unconditionally atleast once and the base memory
868 reference is written unconditionally once. This is to check reference
869 will not write fault. Also retuns true if the memory reference is
870 unconditionally read once then we are conditionally writing to memory
871 which is defined as read and write and is bound to the definition
872 we are seeing. */
873 static bool
874 ifcvt_memrefs_wont_trap (gimple *stmt, vec<data_reference_p> drs)
876 /* If DR didn't see a reference here we can't use it to tell
877 whether the ref traps or not. */
878 if (gimple_uid (stmt) == 0)
879 return false;
881 data_reference_p *master_dr, *base_master_dr;
882 data_reference_p a = drs[gimple_uid (stmt) - 1];
884 tree base = DR_BASE_OBJECT (a);
885 innermost_loop_behavior *innermost = &DR_INNERMOST (a);
887 gcc_assert (DR_STMT (a) == stmt);
888 gcc_assert (DR_BASE_ADDRESS (a) || DR_OFFSET (a)
889 || DR_INIT (a) || DR_STEP (a));
891 master_dr = innermost_DR_map->get (innermost);
892 gcc_assert (master_dr != NULL);
894 base_master_dr = baseref_DR_map->get (base);
896 /* If a is unconditionally written to it doesn't trap. */
897 if (DR_W_UNCONDITIONALLY (*master_dr))
898 return true;
900 /* If a is unconditionally accessed then ...
902 Even a is conditional access, we can treat it as an unconditional
903 one if it's an array reference and all its index are within array
904 bound. */
905 if (DR_RW_UNCONDITIONALLY (*master_dr)
906 || ref_within_array_bound (stmt, DR_REF (a)))
908 /* an unconditional read won't trap. */
909 if (DR_IS_READ (a))
910 return true;
912 /* an unconditionaly write won't trap if the base is written
913 to unconditionally. */
914 if (base_master_dr
915 && DR_BASE_W_UNCONDITIONALLY (*base_master_dr))
916 return flag_store_data_races;
917 /* or the base is known to be not readonly. */
918 else if (base_object_writable (DR_REF (a)))
919 return flag_store_data_races;
922 return false;
925 /* Return true if STMT could be converted into a masked load or store
926 (conditional load or store based on a mask computed from bb predicate). */
928 static bool
929 ifcvt_can_use_mask_load_store (gimple *stmt)
931 /* Check whether this is a load or store. */
932 tree lhs = gimple_assign_lhs (stmt);
933 bool is_load;
934 tree ref;
935 if (gimple_store_p (stmt))
937 if (!is_gimple_val (gimple_assign_rhs1 (stmt)))
938 return false;
939 is_load = false;
940 ref = lhs;
942 else if (gimple_assign_load_p (stmt))
944 is_load = true;
945 ref = gimple_assign_rhs1 (stmt);
947 else
948 return false;
950 if (may_be_nonaddressable_p (ref))
951 return false;
953 /* Mask should be integer mode of the same size as the load/store
954 mode. */
955 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
956 if (!int_mode_for_mode (mode).exists () || VECTOR_MODE_P (mode))
957 return false;
959 if (can_vec_mask_load_store_p (mode, VOIDmode, is_load))
960 return true;
962 return false;
965 /* Return true if STMT could be converted from an operation that is
966 unconditional to one that is conditional on a bb predicate mask. */
968 static bool
969 ifcvt_can_predicate (gimple *stmt)
971 basic_block bb = gimple_bb (stmt);
973 if (!(flag_tree_loop_vectorize || bb->loop_father->force_vectorize)
974 || bb->loop_father->dont_vectorize
975 || gimple_has_volatile_ops (stmt))
976 return false;
978 if (gimple_assign_single_p (stmt))
979 return ifcvt_can_use_mask_load_store (stmt);
981 tree_code code = gimple_assign_rhs_code (stmt);
982 tree lhs_type = TREE_TYPE (gimple_assign_lhs (stmt));
983 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
984 if (!types_compatible_p (lhs_type, rhs_type))
985 return false;
986 internal_fn cond_fn = get_conditional_internal_fn (code);
987 return (cond_fn != IFN_LAST
988 && vectorized_internal_fn_supported_p (cond_fn, lhs_type));
991 /* Return true when STMT is if-convertible.
993 GIMPLE_ASSIGN statement is not if-convertible if,
994 - it is not movable,
995 - it could trap,
996 - LHS is not var decl. */
998 static bool
999 if_convertible_gimple_assign_stmt_p (gimple *stmt,
1000 vec<data_reference_p> refs)
1002 tree lhs = gimple_assign_lhs (stmt);
1004 if (dump_file && (dump_flags & TDF_DETAILS))
1006 fprintf (dump_file, "-------------------------\n");
1007 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1010 if (!is_gimple_reg_type (TREE_TYPE (lhs)))
1011 return false;
1013 /* Some of these constrains might be too conservative. */
1014 if (stmt_ends_bb_p (stmt)
1015 || gimple_has_volatile_ops (stmt)
1016 || (TREE_CODE (lhs) == SSA_NAME
1017 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
1018 || gimple_has_side_effects (stmt))
1020 if (dump_file && (dump_flags & TDF_DETAILS))
1021 fprintf (dump_file, "stmt not suitable for ifcvt\n");
1022 return false;
1025 /* tree-into-ssa.cc uses GF_PLF_1, so avoid it, because
1026 in between if_convertible_loop_p and combine_blocks
1027 we can perform loop versioning. */
1028 gimple_set_plf (stmt, GF_PLF_2, false);
1030 if ((! gimple_vuse (stmt)
1031 || gimple_could_trap_p_1 (stmt, false, false)
1032 || ! ifcvt_memrefs_wont_trap (stmt, refs))
1033 && gimple_could_trap_p (stmt))
1035 if (ifcvt_can_predicate (stmt))
1037 gimple_set_plf (stmt, GF_PLF_2, true);
1038 need_to_predicate = true;
1039 return true;
1041 if (dump_file && (dump_flags & TDF_DETAILS))
1042 fprintf (dump_file, "tree could trap...\n");
1043 return false;
1045 else if ((INTEGRAL_TYPE_P (TREE_TYPE (lhs))
1046 || POINTER_TYPE_P (TREE_TYPE (lhs)))
1047 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs))
1048 && arith_code_with_undefined_signed_overflow
1049 (gimple_assign_rhs_code (stmt)))
1050 /* We have to rewrite stmts with undefined overflow. */
1051 need_to_rewrite_undefined = true;
1053 /* When if-converting stores force versioning, likewise if we
1054 ended up generating store data races. */
1055 if (gimple_vdef (stmt))
1056 need_to_predicate = true;
1058 return true;
1061 /* Return true when STMT is if-convertible.
1063 A statement is if-convertible if:
1064 - it is an if-convertible GIMPLE_ASSIGN,
1065 - it is a GIMPLE_LABEL or a GIMPLE_COND,
1066 - it is builtins call. */
1068 static bool
1069 if_convertible_stmt_p (gimple *stmt, vec<data_reference_p> refs)
1071 switch (gimple_code (stmt))
1073 case GIMPLE_LABEL:
1074 case GIMPLE_DEBUG:
1075 case GIMPLE_COND:
1076 return true;
1078 case GIMPLE_ASSIGN:
1079 return if_convertible_gimple_assign_stmt_p (stmt, refs);
1081 case GIMPLE_CALL:
1083 tree fndecl = gimple_call_fndecl (stmt);
1084 if (fndecl)
1086 int flags = gimple_call_flags (stmt);
1087 if ((flags & ECF_CONST)
1088 && !(flags & ECF_LOOPING_CONST_OR_PURE)
1089 /* We can only vectorize some builtins at the moment,
1090 so restrict if-conversion to those. */
1091 && fndecl_built_in_p (fndecl))
1092 return true;
1094 return false;
1097 default:
1098 /* Don't know what to do with 'em so don't do anything. */
1099 if (dump_file && (dump_flags & TDF_DETAILS))
1101 fprintf (dump_file, "don't know what to do\n");
1102 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1104 return false;
1108 /* Assumes that BB has more than 1 predecessors.
1109 Returns false if at least one successor is not on critical edge
1110 and true otherwise. */
1112 static inline bool
1113 all_preds_critical_p (basic_block bb)
1115 edge e;
1116 edge_iterator ei;
1118 FOR_EACH_EDGE (e, ei, bb->preds)
1119 if (EDGE_COUNT (e->src->succs) == 1)
1120 return false;
1121 return true;
1124 /* Return true when BB is if-convertible. This routine does not check
1125 basic block's statements and phis.
1127 A basic block is not if-convertible if:
1128 - it is non-empty and it is after the exit block (in BFS order),
1129 - it is after the exit block but before the latch,
1130 - its edges are not normal.
1132 EXIT_BB is the basic block containing the exit of the LOOP. BB is
1133 inside LOOP. */
1135 static bool
1136 if_convertible_bb_p (class loop *loop, basic_block bb, basic_block exit_bb)
1138 edge e;
1139 edge_iterator ei;
1141 if (dump_file && (dump_flags & TDF_DETAILS))
1142 fprintf (dump_file, "----------[%d]-------------\n", bb->index);
1144 if (EDGE_COUNT (bb->succs) > 2)
1145 return false;
1147 gimple *last = last_stmt (bb);
1148 if (gcall *call = safe_dyn_cast <gcall *> (last))
1149 if (gimple_call_ctrl_altering_p (call))
1150 return false;
1152 if (exit_bb)
1154 if (bb != loop->latch)
1156 if (dump_file && (dump_flags & TDF_DETAILS))
1157 fprintf (dump_file, "basic block after exit bb but before latch\n");
1158 return false;
1160 else if (!empty_block_p (bb))
1162 if (dump_file && (dump_flags & TDF_DETAILS))
1163 fprintf (dump_file, "non empty basic block after exit bb\n");
1164 return false;
1166 else if (bb == loop->latch
1167 && bb != exit_bb
1168 && !dominated_by_p (CDI_DOMINATORS, bb, exit_bb))
1170 if (dump_file && (dump_flags & TDF_DETAILS))
1171 fprintf (dump_file, "latch is not dominated by exit_block\n");
1172 return false;
1176 /* Be less adventurous and handle only normal edges. */
1177 FOR_EACH_EDGE (e, ei, bb->succs)
1178 if (e->flags & (EDGE_EH | EDGE_ABNORMAL | EDGE_IRREDUCIBLE_LOOP))
1180 if (dump_file && (dump_flags & TDF_DETAILS))
1181 fprintf (dump_file, "Difficult to handle edges\n");
1182 return false;
1185 return true;
1188 /* Return true when all predecessor blocks of BB are visited. The
1189 VISITED bitmap keeps track of the visited blocks. */
1191 static bool
1192 pred_blocks_visited_p (basic_block bb, bitmap *visited)
1194 edge e;
1195 edge_iterator ei;
1196 FOR_EACH_EDGE (e, ei, bb->preds)
1197 if (!bitmap_bit_p (*visited, e->src->index))
1198 return false;
1200 return true;
1203 /* Get body of a LOOP in suitable order for if-conversion. It is
1204 caller's responsibility to deallocate basic block list.
1205 If-conversion suitable order is, breadth first sort (BFS) order
1206 with an additional constraint: select a block only if all its
1207 predecessors are already selected. */
1209 static basic_block *
1210 get_loop_body_in_if_conv_order (const class loop *loop)
1212 basic_block *blocks, *blocks_in_bfs_order;
1213 basic_block bb;
1214 bitmap visited;
1215 unsigned int index = 0;
1216 unsigned int visited_count = 0;
1218 gcc_assert (loop->num_nodes);
1219 gcc_assert (loop->latch != EXIT_BLOCK_PTR_FOR_FN (cfun));
1221 blocks = XCNEWVEC (basic_block, loop->num_nodes);
1222 visited = BITMAP_ALLOC (NULL);
1224 blocks_in_bfs_order = get_loop_body_in_bfs_order (loop);
1226 index = 0;
1227 while (index < loop->num_nodes)
1229 bb = blocks_in_bfs_order [index];
1231 if (bb->flags & BB_IRREDUCIBLE_LOOP)
1233 free (blocks_in_bfs_order);
1234 BITMAP_FREE (visited);
1235 free (blocks);
1236 return NULL;
1239 if (!bitmap_bit_p (visited, bb->index))
1241 if (pred_blocks_visited_p (bb, &visited)
1242 || bb == loop->header)
1244 /* This block is now visited. */
1245 bitmap_set_bit (visited, bb->index);
1246 blocks[visited_count++] = bb;
1250 index++;
1252 if (index == loop->num_nodes
1253 && visited_count != loop->num_nodes)
1254 /* Not done yet. */
1255 index = 0;
1257 free (blocks_in_bfs_order);
1258 BITMAP_FREE (visited);
1259 return blocks;
1262 /* Returns true when the analysis of the predicates for all the basic
1263 blocks in LOOP succeeded.
1265 predicate_bbs first allocates the predicates of the basic blocks.
1266 These fields are then initialized with the tree expressions
1267 representing the predicates under which a basic block is executed
1268 in the LOOP. As the loop->header is executed at each iteration, it
1269 has the "true" predicate. Other statements executed under a
1270 condition are predicated with that condition, for example
1272 | if (x)
1273 | S1;
1274 | else
1275 | S2;
1277 S1 will be predicated with "x", and
1278 S2 will be predicated with "!x". */
1280 static void
1281 predicate_bbs (loop_p loop)
1283 unsigned int i;
1285 for (i = 0; i < loop->num_nodes; i++)
1286 init_bb_predicate (ifc_bbs[i]);
1288 for (i = 0; i < loop->num_nodes; i++)
1290 basic_block bb = ifc_bbs[i];
1291 tree cond;
1292 gimple *stmt;
1294 /* The loop latch and loop exit block are always executed and
1295 have no extra conditions to be processed: skip them. */
1296 if (bb == loop->latch
1297 || bb_with_exit_edge_p (loop, bb))
1299 reset_bb_predicate (bb);
1300 continue;
1303 cond = bb_predicate (bb);
1304 stmt = last_stmt (bb);
1305 if (stmt && gimple_code (stmt) == GIMPLE_COND)
1307 tree c2;
1308 edge true_edge, false_edge;
1309 location_t loc = gimple_location (stmt);
1310 tree c;
1311 /* gcc.dg/fold-bopcond-1.c shows that despite all forwprop passes
1312 conditions can remain unfolded because of multiple uses so
1313 try to re-fold here, especially to get precision changing
1314 conversions sorted out. Do not simply fold the stmt since
1315 this is analysis only. When conditions were embedded in
1316 COND_EXPRs those were folded separately before folding the
1317 COND_EXPR but as they are now outside we have to make sure
1318 to fold them. Do it here - another opportunity would be to
1319 fold predicates as they are inserted. */
1320 gimple_match_op cexpr (gimple_match_cond::UNCOND,
1321 gimple_cond_code (stmt),
1322 boolean_type_node,
1323 gimple_cond_lhs (stmt),
1324 gimple_cond_rhs (stmt));
1325 if (cexpr.resimplify (NULL, follow_all_ssa_edges)
1326 && cexpr.code.is_tree_code ()
1327 && TREE_CODE_CLASS ((tree_code)cexpr.code) == tcc_comparison)
1328 c = build2_loc (loc, (tree_code)cexpr.code, boolean_type_node,
1329 cexpr.ops[0], cexpr.ops[1]);
1330 else
1331 c = build2_loc (loc, gimple_cond_code (stmt),
1332 boolean_type_node,
1333 gimple_cond_lhs (stmt),
1334 gimple_cond_rhs (stmt));
1336 /* Add new condition into destination's predicate list. */
1337 extract_true_false_edges_from_block (gimple_bb (stmt),
1338 &true_edge, &false_edge);
1340 /* If C is true, then TRUE_EDGE is taken. */
1341 add_to_dst_predicate_list (loop, true_edge, unshare_expr (cond),
1342 unshare_expr (c));
1344 /* If C is false, then FALSE_EDGE is taken. */
1345 c2 = build1_loc (loc, TRUTH_NOT_EXPR, boolean_type_node,
1346 unshare_expr (c));
1347 add_to_dst_predicate_list (loop, false_edge,
1348 unshare_expr (cond), c2);
1350 cond = NULL_TREE;
1353 /* If current bb has only one successor, then consider it as an
1354 unconditional goto. */
1355 if (single_succ_p (bb))
1357 basic_block bb_n = single_succ (bb);
1359 /* The successor bb inherits the predicate of its
1360 predecessor. If there is no predicate in the predecessor
1361 bb, then consider the successor bb as always executed. */
1362 if (cond == NULL_TREE)
1363 cond = boolean_true_node;
1365 add_to_predicate_list (loop, bb_n, cond);
1369 /* The loop header is always executed. */
1370 reset_bb_predicate (loop->header);
1371 gcc_assert (bb_predicate_gimplified_stmts (loop->header) == NULL
1372 && bb_predicate_gimplified_stmts (loop->latch) == NULL);
1375 /* Build region by adding loop pre-header and post-header blocks. */
1377 static vec<basic_block>
1378 build_region (class loop *loop)
1380 vec<basic_block> region = vNULL;
1381 basic_block exit_bb = NULL;
1383 gcc_assert (ifc_bbs);
1384 /* The first element is loop pre-header. */
1385 region.safe_push (loop_preheader_edge (loop)->src);
1387 for (unsigned int i = 0; i < loop->num_nodes; i++)
1389 basic_block bb = ifc_bbs[i];
1390 region.safe_push (bb);
1391 /* Find loop postheader. */
1392 edge e;
1393 edge_iterator ei;
1394 FOR_EACH_EDGE (e, ei, bb->succs)
1395 if (loop_exit_edge_p (loop, e))
1397 exit_bb = e->dest;
1398 break;
1401 /* The last element is loop post-header. */
1402 gcc_assert (exit_bb);
1403 region.safe_push (exit_bb);
1404 return region;
1407 /* Return true when LOOP is if-convertible. This is a helper function
1408 for if_convertible_loop_p. REFS and DDRS are initialized and freed
1409 in if_convertible_loop_p. */
1411 static bool
1412 if_convertible_loop_p_1 (class loop *loop, vec<data_reference_p> *refs)
1414 unsigned int i;
1415 basic_block exit_bb = NULL;
1416 vec<basic_block> region;
1418 calculate_dominance_info (CDI_DOMINATORS);
1420 for (i = 0; i < loop->num_nodes; i++)
1422 basic_block bb = ifc_bbs[i];
1424 if (!if_convertible_bb_p (loop, bb, exit_bb))
1425 return false;
1427 if (bb_with_exit_edge_p (loop, bb))
1428 exit_bb = bb;
1431 for (i = 0; i < loop->num_nodes; i++)
1433 basic_block bb = ifc_bbs[i];
1434 gimple_stmt_iterator gsi;
1436 bool may_have_nonlocal_labels
1437 = bb_with_exit_edge_p (loop, bb) || bb == loop->latch;
1438 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1439 switch (gimple_code (gsi_stmt (gsi)))
1441 case GIMPLE_LABEL:
1442 if (!may_have_nonlocal_labels)
1444 tree label
1445 = gimple_label_label (as_a <glabel *> (gsi_stmt (gsi)));
1446 if (DECL_NONLOCAL (label) || FORCED_LABEL (label))
1447 return false;
1449 /* Fallthru. */
1450 case GIMPLE_ASSIGN:
1451 case GIMPLE_CALL:
1452 case GIMPLE_DEBUG:
1453 case GIMPLE_COND:
1454 gimple_set_uid (gsi_stmt (gsi), 0);
1455 break;
1456 default:
1457 return false;
1461 data_reference_p dr;
1463 innermost_DR_map
1464 = new hash_map<innermost_loop_behavior_hash, data_reference_p>;
1465 baseref_DR_map = new hash_map<tree_operand_hash, data_reference_p>;
1467 /* Compute post-dominator tree locally. */
1468 region = build_region (loop);
1469 calculate_dominance_info_for_region (CDI_POST_DOMINATORS, region);
1471 predicate_bbs (loop);
1473 /* Free post-dominator tree since it is not used after predication. */
1474 free_dominance_info_for_region (cfun, CDI_POST_DOMINATORS, region);
1475 region.release ();
1477 for (i = 0; refs->iterate (i, &dr); i++)
1479 tree ref = DR_REF (dr);
1481 dr->aux = XNEW (struct ifc_dr);
1482 DR_BASE_W_UNCONDITIONALLY (dr) = false;
1483 DR_RW_UNCONDITIONALLY (dr) = false;
1484 DR_W_UNCONDITIONALLY (dr) = false;
1485 IFC_DR (dr)->rw_predicate = boolean_false_node;
1486 IFC_DR (dr)->w_predicate = boolean_false_node;
1487 IFC_DR (dr)->base_w_predicate = boolean_false_node;
1488 if (gimple_uid (DR_STMT (dr)) == 0)
1489 gimple_set_uid (DR_STMT (dr), i + 1);
1491 /* If DR doesn't have innermost loop behavior or it's a compound
1492 memory reference, we synthesize its innermost loop behavior
1493 for hashing. */
1494 if (TREE_CODE (ref) == COMPONENT_REF
1495 || TREE_CODE (ref) == IMAGPART_EXPR
1496 || TREE_CODE (ref) == REALPART_EXPR
1497 || !(DR_BASE_ADDRESS (dr) || DR_OFFSET (dr)
1498 || DR_INIT (dr) || DR_STEP (dr)))
1500 while (TREE_CODE (ref) == COMPONENT_REF
1501 || TREE_CODE (ref) == IMAGPART_EXPR
1502 || TREE_CODE (ref) == REALPART_EXPR)
1503 ref = TREE_OPERAND (ref, 0);
1505 memset (&DR_INNERMOST (dr), 0, sizeof (DR_INNERMOST (dr)));
1506 DR_BASE_ADDRESS (dr) = ref;
1508 hash_memrefs_baserefs_and_store_DRs_read_written_info (dr);
1511 for (i = 0; i < loop->num_nodes; i++)
1513 basic_block bb = ifc_bbs[i];
1514 gimple_stmt_iterator itr;
1516 /* Check the if-convertibility of statements in predicated BBs. */
1517 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1518 for (itr = gsi_start_bb (bb); !gsi_end_p (itr); gsi_next (&itr))
1519 if (!if_convertible_stmt_p (gsi_stmt (itr), *refs))
1520 return false;
1523 /* Checking PHIs needs to be done after stmts, as the fact whether there
1524 are any masked loads or stores affects the tests. */
1525 for (i = 0; i < loop->num_nodes; i++)
1527 basic_block bb = ifc_bbs[i];
1528 gphi_iterator itr;
1530 for (itr = gsi_start_phis (bb); !gsi_end_p (itr); gsi_next (&itr))
1531 if (!if_convertible_phi_p (loop, bb, itr.phi ()))
1532 return false;
1535 if (dump_file)
1536 fprintf (dump_file, "Applying if-conversion\n");
1538 return true;
1541 /* Return true when LOOP is if-convertible.
1542 LOOP is if-convertible if:
1543 - it is innermost,
1544 - it has two or more basic blocks,
1545 - it has only one exit,
1546 - loop header is not the exit edge,
1547 - if its basic blocks and phi nodes are if convertible. */
1549 static bool
1550 if_convertible_loop_p (class loop *loop, vec<data_reference_p> *refs)
1552 edge e;
1553 edge_iterator ei;
1554 bool res = false;
1556 /* Handle only innermost loop. */
1557 if (!loop || loop->inner)
1559 if (dump_file && (dump_flags & TDF_DETAILS))
1560 fprintf (dump_file, "not innermost loop\n");
1561 return false;
1564 /* If only one block, no need for if-conversion. */
1565 if (loop->num_nodes <= 2)
1567 if (dump_file && (dump_flags & TDF_DETAILS))
1568 fprintf (dump_file, "less than 2 basic blocks\n");
1569 return false;
1572 /* More than one loop exit is too much to handle. */
1573 if (!single_exit (loop))
1575 if (dump_file && (dump_flags & TDF_DETAILS))
1576 fprintf (dump_file, "multiple exits\n");
1577 return false;
1580 /* If one of the loop header's edge is an exit edge then do not
1581 apply if-conversion. */
1582 FOR_EACH_EDGE (e, ei, loop->header->succs)
1583 if (loop_exit_edge_p (loop, e))
1584 return false;
1586 res = if_convertible_loop_p_1 (loop, refs);
1588 delete innermost_DR_map;
1589 innermost_DR_map = NULL;
1591 delete baseref_DR_map;
1592 baseref_DR_map = NULL;
1594 return res;
1597 /* Return reduc_1 if has_nop.
1599 if (...)
1600 tmp1 = (unsigned type) reduc_1;
1601 tmp2 = tmp1 + rhs2;
1602 reduc_3 = (signed type) tmp2. */
1603 static tree
1604 strip_nop_cond_scalar_reduction (bool has_nop, tree op)
1606 if (!has_nop)
1607 return op;
1609 if (TREE_CODE (op) != SSA_NAME)
1610 return NULL_TREE;
1612 gassign *stmt = safe_dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op));
1613 if (!stmt
1614 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt))
1615 || !tree_nop_conversion_p (TREE_TYPE (op), TREE_TYPE
1616 (gimple_assign_rhs1 (stmt))))
1617 return NULL_TREE;
1619 return gimple_assign_rhs1 (stmt);
1622 /* Returns true if def-stmt for phi argument ARG is simple increment/decrement
1623 which is in predicated basic block.
1624 In fact, the following PHI pattern is searching:
1625 loop-header:
1626 reduc_1 = PHI <..., reduc_2>
1628 if (...)
1629 reduc_3 = ...
1630 reduc_2 = PHI <reduc_1, reduc_3>
1632 ARG_0 and ARG_1 are correspondent PHI arguments.
1633 REDUC, OP0 and OP1 contain reduction stmt and its operands.
1634 EXTENDED is true if PHI has > 2 arguments. */
1636 static bool
1637 is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1,
1638 tree *op0, tree *op1, bool extended, bool* has_nop,
1639 gimple **nop_reduc)
1641 tree lhs, r_op1, r_op2, r_nop1, r_nop2;
1642 gimple *stmt;
1643 gimple *header_phi = NULL;
1644 enum tree_code reduction_op;
1645 basic_block bb = gimple_bb (phi);
1646 class loop *loop = bb->loop_father;
1647 edge latch_e = loop_latch_edge (loop);
1648 imm_use_iterator imm_iter;
1649 use_operand_p use_p;
1650 edge e;
1651 edge_iterator ei;
1652 bool result = *has_nop = false;
1653 if (TREE_CODE (arg_0) != SSA_NAME || TREE_CODE (arg_1) != SSA_NAME)
1654 return false;
1656 if (!extended && gimple_code (SSA_NAME_DEF_STMT (arg_0)) == GIMPLE_PHI)
1658 lhs = arg_1;
1659 header_phi = SSA_NAME_DEF_STMT (arg_0);
1660 stmt = SSA_NAME_DEF_STMT (arg_1);
1662 else if (gimple_code (SSA_NAME_DEF_STMT (arg_1)) == GIMPLE_PHI)
1664 lhs = arg_0;
1665 header_phi = SSA_NAME_DEF_STMT (arg_1);
1666 stmt = SSA_NAME_DEF_STMT (arg_0);
1668 else
1669 return false;
1670 if (gimple_bb (header_phi) != loop->header)
1671 return false;
1673 if (PHI_ARG_DEF_FROM_EDGE (header_phi, latch_e) != PHI_RESULT (phi))
1674 return false;
1676 if (gimple_code (stmt) != GIMPLE_ASSIGN
1677 || gimple_has_volatile_ops (stmt))
1678 return false;
1680 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
1681 return false;
1683 if (!is_predicated (gimple_bb (stmt)))
1684 return false;
1686 /* Check that stmt-block is predecessor of phi-block. */
1687 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->succs)
1688 if (e->dest == bb)
1690 result = true;
1691 break;
1693 if (!result)
1694 return false;
1696 if (!has_single_use (lhs))
1697 return false;
1699 reduction_op = gimple_assign_rhs_code (stmt);
1701 /* Catch something like below
1703 loop-header:
1704 reduc_1 = PHI <..., reduc_2>
1706 if (...)
1707 tmp1 = (unsigned type) reduc_1;
1708 tmp2 = tmp1 + rhs2;
1709 reduc_3 = (signed type) tmp2;
1711 reduc_2 = PHI <reduc_1, reduc_3>
1713 and convert to
1715 reduc_2 = PHI <0, reduc_3>
1716 tmp1 = (unsigned type)reduce_1;
1717 ifcvt = cond_expr ? rhs2 : 0
1718 tmp2 = tmp1 +/- ifcvt;
1719 reduce_1 = (signed type)tmp2; */
1721 if (CONVERT_EXPR_CODE_P (reduction_op))
1723 lhs = gimple_assign_rhs1 (stmt);
1724 if (TREE_CODE (lhs) != SSA_NAME
1725 || !has_single_use (lhs))
1726 return false;
1728 *nop_reduc = stmt;
1729 stmt = SSA_NAME_DEF_STMT (lhs);
1730 if (gimple_bb (stmt) != gimple_bb (*nop_reduc)
1731 || !is_gimple_assign (stmt))
1732 return false;
1734 *has_nop = true;
1735 reduction_op = gimple_assign_rhs_code (stmt);
1738 if (reduction_op != PLUS_EXPR
1739 && reduction_op != MINUS_EXPR
1740 && reduction_op != MULT_EXPR
1741 && reduction_op != BIT_IOR_EXPR
1742 && reduction_op != BIT_XOR_EXPR
1743 && reduction_op != BIT_AND_EXPR)
1744 return false;
1745 r_op1 = gimple_assign_rhs1 (stmt);
1746 r_op2 = gimple_assign_rhs2 (stmt);
1748 r_nop1 = strip_nop_cond_scalar_reduction (*has_nop, r_op1);
1749 r_nop2 = strip_nop_cond_scalar_reduction (*has_nop, r_op2);
1751 /* Make R_OP1 to hold reduction variable. */
1752 if (r_nop2 == PHI_RESULT (header_phi)
1753 && commutative_tree_code (reduction_op))
1755 std::swap (r_op1, r_op2);
1756 std::swap (r_nop1, r_nop2);
1758 else if (r_nop1 != PHI_RESULT (header_phi))
1759 return false;
1761 if (*has_nop)
1763 /* Check that R_NOP1 is used in nop_stmt or in PHI only. */
1764 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_nop1)
1766 gimple *use_stmt = USE_STMT (use_p);
1767 if (is_gimple_debug (use_stmt))
1768 continue;
1769 if (use_stmt == SSA_NAME_DEF_STMT (r_op1))
1770 continue;
1771 if (use_stmt != phi)
1772 return false;
1776 /* Check that R_OP1 is used in reduction stmt or in PHI only. */
1777 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_op1)
1779 gimple *use_stmt = USE_STMT (use_p);
1780 if (is_gimple_debug (use_stmt))
1781 continue;
1782 if (use_stmt == stmt)
1783 continue;
1784 if (gimple_code (use_stmt) != GIMPLE_PHI)
1785 return false;
1788 *op0 = r_op1; *op1 = r_op2;
1789 *reduc = stmt;
1790 return true;
1793 /* Converts conditional scalar reduction into unconditional form, e.g.
1794 bb_4
1795 if (_5 != 0) goto bb_5 else goto bb_6
1796 end_bb_4
1797 bb_5
1798 res_6 = res_13 + 1;
1799 end_bb_5
1800 bb_6
1801 # res_2 = PHI <res_13(4), res_6(5)>
1802 end_bb_6
1804 will be converted into sequence
1805 _ifc__1 = _5 != 0 ? 1 : 0;
1806 res_2 = res_13 + _ifc__1;
1807 Argument SWAP tells that arguments of conditional expression should be
1808 swapped.
1809 Returns rhs of resulting PHI assignment. */
1811 static tree
1812 convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi,
1813 tree cond, tree op0, tree op1, bool swap,
1814 bool has_nop, gimple* nop_reduc)
1816 gimple_stmt_iterator stmt_it;
1817 gimple *new_assign;
1818 tree rhs;
1819 tree rhs1 = gimple_assign_rhs1 (reduc);
1820 tree tmp = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, "_ifc_");
1821 tree c;
1822 enum tree_code reduction_op = gimple_assign_rhs_code (reduc);
1823 tree op_nochange = neutral_op_for_reduction (TREE_TYPE (rhs1), reduction_op, NULL);
1824 gimple_seq stmts = NULL;
1826 if (dump_file && (dump_flags & TDF_DETAILS))
1828 fprintf (dump_file, "Found cond scalar reduction.\n");
1829 print_gimple_stmt (dump_file, reduc, 0, TDF_SLIM);
1832 /* Build cond expression using COND and constant operand
1833 of reduction rhs. */
1834 c = fold_build_cond_expr (TREE_TYPE (rhs1),
1835 unshare_expr (cond),
1836 swap ? op_nochange : op1,
1837 swap ? op1 : op_nochange);
1839 /* Create assignment stmt and insert it at GSI. */
1840 new_assign = gimple_build_assign (tmp, c);
1841 gsi_insert_before (gsi, new_assign, GSI_SAME_STMT);
1842 /* Build rhs for unconditional increment/decrement/logic_operation. */
1843 rhs = gimple_build (&stmts, reduction_op,
1844 TREE_TYPE (rhs1), op0, tmp);
1846 if (has_nop)
1848 rhs = gimple_convert (&stmts,
1849 TREE_TYPE (gimple_assign_lhs (nop_reduc)), rhs);
1850 stmt_it = gsi_for_stmt (nop_reduc);
1851 gsi_remove (&stmt_it, true);
1852 release_defs (nop_reduc);
1854 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
1856 /* Delete original reduction stmt. */
1857 stmt_it = gsi_for_stmt (reduc);
1858 gsi_remove (&stmt_it, true);
1859 release_defs (reduc);
1860 return rhs;
1863 /* Produce condition for all occurrences of ARG in PHI node. */
1865 static tree
1866 gen_phi_arg_condition (gphi *phi, vec<int> *occur,
1867 gimple_stmt_iterator *gsi)
1869 int len;
1870 int i;
1871 tree cond = NULL_TREE;
1872 tree c;
1873 edge e;
1875 len = occur->length ();
1876 gcc_assert (len > 0);
1877 for (i = 0; i < len; i++)
1879 e = gimple_phi_arg_edge (phi, (*occur)[i]);
1880 c = bb_predicate (e->src);
1881 if (is_true_predicate (c))
1883 cond = c;
1884 break;
1886 c = force_gimple_operand_gsi (gsi, unshare_expr (c),
1887 true, NULL_TREE, true, GSI_SAME_STMT);
1888 if (cond != NULL_TREE)
1890 /* Must build OR expression. */
1891 cond = fold_or_predicates (EXPR_LOCATION (c), c, cond);
1892 cond = force_gimple_operand_gsi (gsi, unshare_expr (cond), true,
1893 NULL_TREE, true, GSI_SAME_STMT);
1895 else
1896 cond = c;
1898 gcc_assert (cond != NULL_TREE);
1899 return cond;
1902 /* Replace a scalar PHI node with a COND_EXPR using COND as condition.
1903 This routine can handle PHI nodes with more than two arguments.
1905 For example,
1906 S1: A = PHI <x1(1), x2(5)>
1907 is converted into,
1908 S2: A = cond ? x1 : x2;
1910 The generated code is inserted at GSI that points to the top of
1911 basic block's statement list.
1912 If PHI node has more than two arguments a chain of conditional
1913 expression is produced. */
1916 static void
1917 predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
1919 gimple *new_stmt = NULL, *reduc, *nop_reduc;
1920 tree rhs, res, arg0, arg1, op0, op1, scev;
1921 tree cond;
1922 unsigned int index0;
1923 unsigned int max, args_len;
1924 edge e;
1925 basic_block bb;
1926 unsigned int i;
1927 bool has_nop;
1929 res = gimple_phi_result (phi);
1930 if (virtual_operand_p (res))
1931 return;
1933 if ((rhs = degenerate_phi_result (phi))
1934 || ((scev = analyze_scalar_evolution (gimple_bb (phi)->loop_father,
1935 res))
1936 && !chrec_contains_undetermined (scev)
1937 && scev != res
1938 && (rhs = gimple_phi_arg_def (phi, 0))))
1940 if (dump_file && (dump_flags & TDF_DETAILS))
1942 fprintf (dump_file, "Degenerate phi!\n");
1943 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
1945 new_stmt = gimple_build_assign (res, rhs);
1946 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1947 update_stmt (new_stmt);
1948 return;
1951 bb = gimple_bb (phi);
1952 if (EDGE_COUNT (bb->preds) == 2)
1954 /* Predicate ordinary PHI node with 2 arguments. */
1955 edge first_edge, second_edge;
1956 basic_block true_bb;
1957 first_edge = EDGE_PRED (bb, 0);
1958 second_edge = EDGE_PRED (bb, 1);
1959 cond = bb_predicate (first_edge->src);
1960 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1961 std::swap (first_edge, second_edge);
1962 if (EDGE_COUNT (first_edge->src->succs) > 1)
1964 cond = bb_predicate (second_edge->src);
1965 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1966 cond = TREE_OPERAND (cond, 0);
1967 else
1968 first_edge = second_edge;
1970 else
1971 cond = bb_predicate (first_edge->src);
1972 /* Gimplify the condition to a valid cond-expr conditonal operand. */
1973 cond = force_gimple_operand_gsi (gsi, unshare_expr (cond), true,
1974 NULL_TREE, true, GSI_SAME_STMT);
1975 true_bb = first_edge->src;
1976 if (EDGE_PRED (bb, 1)->src == true_bb)
1978 arg0 = gimple_phi_arg_def (phi, 1);
1979 arg1 = gimple_phi_arg_def (phi, 0);
1981 else
1983 arg0 = gimple_phi_arg_def (phi, 0);
1984 arg1 = gimple_phi_arg_def (phi, 1);
1986 if (is_cond_scalar_reduction (phi, &reduc, arg0, arg1,
1987 &op0, &op1, false, &has_nop,
1988 &nop_reduc))
1990 /* Convert reduction stmt into vectorizable form. */
1991 rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
1992 true_bb != gimple_bb (reduc),
1993 has_nop, nop_reduc);
1994 redundant_ssa_names.safe_push (std::make_pair (res, rhs));
1996 else
1997 /* Build new RHS using selected condition and arguments. */
1998 rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
1999 arg0, arg1);
2000 new_stmt = gimple_build_assign (res, rhs);
2001 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2002 gimple_stmt_iterator new_gsi = gsi_for_stmt (new_stmt);
2003 if (fold_stmt (&new_gsi, follow_all_ssa_edges))
2005 new_stmt = gsi_stmt (new_gsi);
2006 update_stmt (new_stmt);
2009 if (dump_file && (dump_flags & TDF_DETAILS))
2011 fprintf (dump_file, "new phi replacement stmt\n");
2012 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
2014 return;
2017 /* Create hashmap for PHI node which contain vector of argument indexes
2018 having the same value. */
2019 bool swap = false;
2020 hash_map<tree_operand_hash, auto_vec<int> > phi_arg_map;
2021 unsigned int num_args = gimple_phi_num_args (phi);
2022 int max_ind = -1;
2023 /* Vector of different PHI argument values. */
2024 auto_vec<tree> args (num_args);
2026 /* Compute phi_arg_map. */
2027 for (i = 0; i < num_args; i++)
2029 tree arg;
2031 arg = gimple_phi_arg_def (phi, i);
2032 if (!phi_arg_map.get (arg))
2033 args.quick_push (arg);
2034 phi_arg_map.get_or_insert (arg).safe_push (i);
2037 /* Determine element with max number of occurrences. */
2038 max_ind = -1;
2039 max = 1;
2040 args_len = args.length ();
2041 for (i = 0; i < args_len; i++)
2043 unsigned int len;
2044 if ((len = phi_arg_map.get (args[i])->length ()) > max)
2046 max_ind = (int) i;
2047 max = len;
2051 /* Put element with max number of occurences to the end of ARGS. */
2052 if (max_ind != -1 && max_ind +1 != (int) args_len)
2053 std::swap (args[args_len - 1], args[max_ind]);
2055 /* Handle one special case when number of arguments with different values
2056 is equal 2 and one argument has the only occurrence. Such PHI can be
2057 handled as if would have only 2 arguments. */
2058 if (args_len == 2 && phi_arg_map.get (args[0])->length () == 1)
2060 vec<int> *indexes;
2061 indexes = phi_arg_map.get (args[0]);
2062 index0 = (*indexes)[0];
2063 arg0 = args[0];
2064 arg1 = args[1];
2065 e = gimple_phi_arg_edge (phi, index0);
2066 cond = bb_predicate (e->src);
2067 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2069 swap = true;
2070 cond = TREE_OPERAND (cond, 0);
2072 /* Gimplify the condition to a valid cond-expr conditonal operand. */
2073 cond = force_gimple_operand_gsi (gsi, unshare_expr (cond), true,
2074 NULL_TREE, true, GSI_SAME_STMT);
2075 if (!(is_cond_scalar_reduction (phi, &reduc, arg0 , arg1,
2076 &op0, &op1, true, &has_nop, &nop_reduc)))
2077 rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
2078 swap? arg1 : arg0,
2079 swap? arg0 : arg1);
2080 else
2082 /* Convert reduction stmt into vectorizable form. */
2083 rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
2084 swap,has_nop, nop_reduc);
2085 redundant_ssa_names.safe_push (std::make_pair (res, rhs));
2087 new_stmt = gimple_build_assign (res, rhs);
2088 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2089 update_stmt (new_stmt);
2091 else
2093 /* Common case. */
2094 vec<int> *indexes;
2095 tree type = TREE_TYPE (gimple_phi_result (phi));
2096 tree lhs;
2097 arg1 = args[1];
2098 for (i = 0; i < args_len; i++)
2100 arg0 = args[i];
2101 indexes = phi_arg_map.get (args[i]);
2102 if (i != args_len - 1)
2103 lhs = make_temp_ssa_name (type, NULL, "_ifc_");
2104 else
2105 lhs = res;
2106 cond = gen_phi_arg_condition (phi, indexes, gsi);
2107 rhs = fold_build_cond_expr (type, unshare_expr (cond),
2108 arg0, arg1);
2109 new_stmt = gimple_build_assign (lhs, rhs);
2110 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2111 update_stmt (new_stmt);
2112 arg1 = lhs;
2116 if (dump_file && (dump_flags & TDF_DETAILS))
2118 fprintf (dump_file, "new extended phi replacement stmt\n");
2119 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
2123 /* Replaces in LOOP all the scalar phi nodes other than those in the
2124 LOOP->header block with conditional modify expressions. */
2126 static void
2127 predicate_all_scalar_phis (class loop *loop)
2129 basic_block bb;
2130 unsigned int orig_loop_num_nodes = loop->num_nodes;
2131 unsigned int i;
2133 for (i = 1; i < orig_loop_num_nodes; i++)
2135 gphi *phi;
2136 gimple_stmt_iterator gsi;
2137 gphi_iterator phi_gsi;
2138 bb = ifc_bbs[i];
2140 if (bb == loop->header)
2141 continue;
2143 phi_gsi = gsi_start_phis (bb);
2144 if (gsi_end_p (phi_gsi))
2145 continue;
2147 gsi = gsi_after_labels (bb);
2148 while (!gsi_end_p (phi_gsi))
2150 phi = phi_gsi.phi ();
2151 if (virtual_operand_p (gimple_phi_result (phi)))
2152 gsi_next (&phi_gsi);
2153 else
2155 predicate_scalar_phi (phi, &gsi);
2156 remove_phi_node (&phi_gsi, false);
2162 /* Insert in each basic block of LOOP the statements produced by the
2163 gimplification of the predicates. */
2165 static void
2166 insert_gimplified_predicates (loop_p loop)
2168 unsigned int i;
2170 for (i = 0; i < loop->num_nodes; i++)
2172 basic_block bb = ifc_bbs[i];
2173 gimple_seq stmts;
2174 if (!is_predicated (bb))
2175 gcc_assert (bb_predicate_gimplified_stmts (bb) == NULL);
2176 if (!is_predicated (bb))
2178 /* Do not insert statements for a basic block that is not
2179 predicated. Also make sure that the predicate of the
2180 basic block is set to true. */
2181 reset_bb_predicate (bb);
2182 continue;
2185 stmts = bb_predicate_gimplified_stmts (bb);
2186 if (stmts)
2188 if (need_to_predicate)
2190 /* Insert the predicate of the BB just after the label,
2191 as the if-conversion of memory writes will use this
2192 predicate. */
2193 gimple_stmt_iterator gsi = gsi_after_labels (bb);
2194 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2196 else
2198 /* Insert the predicate of the BB at the end of the BB
2199 as this would reduce the register pressure: the only
2200 use of this predicate will be in successor BBs. */
2201 gimple_stmt_iterator gsi = gsi_last_bb (bb);
2203 if (gsi_end_p (gsi)
2204 || stmt_ends_bb_p (gsi_stmt (gsi)))
2205 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2206 else
2207 gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
2210 /* Once the sequence is code generated, set it to NULL. */
2211 set_bb_predicate_gimplified_stmts (bb, NULL);
2216 /* Helper function for predicate_statements. Returns index of existent
2217 mask if it was created for given SIZE and -1 otherwise. */
2219 static int
2220 mask_exists (int size, const vec<int> &vec)
2222 unsigned int ix;
2223 int v;
2224 FOR_EACH_VEC_ELT (vec, ix, v)
2225 if (v == size)
2226 return (int) ix;
2227 return -1;
2230 /* Helper function for predicate_statements. STMT is a memory read or
2231 write and it needs to be predicated by MASK. Return a statement
2232 that does so. */
2234 static gimple *
2235 predicate_load_or_store (gimple_stmt_iterator *gsi, gassign *stmt, tree mask)
2237 gcall *new_stmt;
2239 tree lhs = gimple_assign_lhs (stmt);
2240 tree rhs = gimple_assign_rhs1 (stmt);
2241 tree ref = TREE_CODE (lhs) == SSA_NAME ? rhs : lhs;
2242 mark_addressable (ref);
2243 tree addr = force_gimple_operand_gsi (gsi, build_fold_addr_expr (ref),
2244 true, NULL_TREE, true, GSI_SAME_STMT);
2245 tree ptr = build_int_cst (reference_alias_ptr_type (ref),
2246 get_object_alignment (ref));
2247 /* Copy points-to info if possible. */
2248 if (TREE_CODE (addr) == SSA_NAME && !SSA_NAME_PTR_INFO (addr))
2249 copy_ref_info (build2 (MEM_REF, TREE_TYPE (ref), addr, ptr),
2250 ref);
2251 if (TREE_CODE (lhs) == SSA_NAME)
2253 new_stmt
2254 = gimple_build_call_internal (IFN_MASK_LOAD, 3, addr,
2255 ptr, mask);
2256 gimple_call_set_lhs (new_stmt, lhs);
2257 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
2259 else
2261 new_stmt
2262 = gimple_build_call_internal (IFN_MASK_STORE, 4, addr, ptr,
2263 mask, rhs);
2264 gimple_move_vops (new_stmt, stmt);
2266 gimple_call_set_nothrow (new_stmt, true);
2267 return new_stmt;
2270 /* STMT uses OP_LHS. Check whether it is equivalent to:
2272 ... = OP_MASK ? OP_LHS : X;
2274 Return X if so, otherwise return null. OP_MASK is an SSA_NAME that is
2275 known to have value OP_COND. */
2277 static tree
2278 check_redundant_cond_expr (gimple *stmt, tree op_mask, tree op_cond,
2279 tree op_lhs)
2281 gassign *assign = dyn_cast <gassign *> (stmt);
2282 if (!assign || gimple_assign_rhs_code (assign) != COND_EXPR)
2283 return NULL_TREE;
2285 tree use_cond = gimple_assign_rhs1 (assign);
2286 tree if_true = gimple_assign_rhs2 (assign);
2287 tree if_false = gimple_assign_rhs3 (assign);
2289 if ((use_cond == op_mask || operand_equal_p (use_cond, op_cond, 0))
2290 && if_true == op_lhs)
2291 return if_false;
2293 if (inverse_conditions_p (use_cond, op_cond) && if_false == op_lhs)
2294 return if_true;
2296 return NULL_TREE;
2299 /* Return true if VALUE is available for use at STMT. SSA_NAMES is
2300 the set of SSA names defined earlier in STMT's block. */
2302 static bool
2303 value_available_p (gimple *stmt, hash_set<tree_ssa_name_hash> *ssa_names,
2304 tree value)
2306 if (is_gimple_min_invariant (value))
2307 return true;
2309 if (TREE_CODE (value) == SSA_NAME)
2311 if (SSA_NAME_IS_DEFAULT_DEF (value))
2312 return true;
2314 basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (value));
2315 basic_block use_bb = gimple_bb (stmt);
2316 return (def_bb == use_bb
2317 ? ssa_names->contains (value)
2318 : dominated_by_p (CDI_DOMINATORS, use_bb, def_bb));
2321 return false;
2324 /* Helper function for predicate_statements. STMT is a potentially-trapping
2325 arithmetic operation that needs to be predicated by MASK, an SSA_NAME that
2326 has value COND. Return a statement that does so. SSA_NAMES is the set of
2327 SSA names defined earlier in STMT's block. */
2329 static gimple *
2330 predicate_rhs_code (gassign *stmt, tree mask, tree cond,
2331 hash_set<tree_ssa_name_hash> *ssa_names)
2333 tree lhs = gimple_assign_lhs (stmt);
2334 tree_code code = gimple_assign_rhs_code (stmt);
2335 unsigned int nops = gimple_num_ops (stmt);
2336 internal_fn cond_fn = get_conditional_internal_fn (code);
2338 /* Construct the arguments to the conditional internal function. */
2339 auto_vec<tree, 8> args;
2340 args.safe_grow (nops + 1, true);
2341 args[0] = mask;
2342 for (unsigned int i = 1; i < nops; ++i)
2343 args[i] = gimple_op (stmt, i);
2344 args[nops] = NULL_TREE;
2346 /* Look for uses of the result to see whether they are COND_EXPRs that can
2347 be folded into the conditional call. */
2348 imm_use_iterator imm_iter;
2349 gimple *use_stmt;
2350 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs)
2352 tree new_else = check_redundant_cond_expr (use_stmt, mask, cond, lhs);
2353 if (new_else && value_available_p (stmt, ssa_names, new_else))
2355 if (!args[nops])
2356 args[nops] = new_else;
2357 if (operand_equal_p (new_else, args[nops], 0))
2359 /* We have:
2361 LHS = IFN_COND (MASK, ..., ELSE);
2362 X = MASK ? LHS : ELSE;
2364 which makes X equivalent to LHS. */
2365 tree use_lhs = gimple_assign_lhs (use_stmt);
2366 redundant_ssa_names.safe_push (std::make_pair (use_lhs, lhs));
2370 if (!args[nops])
2371 args[nops] = targetm.preferred_else_value (cond_fn, TREE_TYPE (lhs),
2372 nops - 1, &args[1]);
2374 /* Create and insert the call. */
2375 gcall *new_stmt = gimple_build_call_internal_vec (cond_fn, args);
2376 gimple_call_set_lhs (new_stmt, lhs);
2377 gimple_call_set_nothrow (new_stmt, true);
2379 return new_stmt;
2382 /* Predicate each write to memory in LOOP.
2384 This function transforms control flow constructs containing memory
2385 writes of the form:
2387 | for (i = 0; i < N; i++)
2388 | if (cond)
2389 | A[i] = expr;
2391 into the following form that does not contain control flow:
2393 | for (i = 0; i < N; i++)
2394 | A[i] = cond ? expr : A[i];
2396 The original CFG looks like this:
2398 | bb_0
2399 | i = 0
2400 | end_bb_0
2402 | bb_1
2403 | if (i < N) goto bb_5 else goto bb_2
2404 | end_bb_1
2406 | bb_2
2407 | cond = some_computation;
2408 | if (cond) goto bb_3 else goto bb_4
2409 | end_bb_2
2411 | bb_3
2412 | A[i] = expr;
2413 | goto bb_4
2414 | end_bb_3
2416 | bb_4
2417 | goto bb_1
2418 | end_bb_4
2420 insert_gimplified_predicates inserts the computation of the COND
2421 expression at the beginning of the destination basic block:
2423 | bb_0
2424 | i = 0
2425 | end_bb_0
2427 | bb_1
2428 | if (i < N) goto bb_5 else goto bb_2
2429 | end_bb_1
2431 | bb_2
2432 | cond = some_computation;
2433 | if (cond) goto bb_3 else goto bb_4
2434 | end_bb_2
2436 | bb_3
2437 | cond = some_computation;
2438 | A[i] = expr;
2439 | goto bb_4
2440 | end_bb_3
2442 | bb_4
2443 | goto bb_1
2444 | end_bb_4
2446 predicate_statements is then predicating the memory write as follows:
2448 | bb_0
2449 | i = 0
2450 | end_bb_0
2452 | bb_1
2453 | if (i < N) goto bb_5 else goto bb_2
2454 | end_bb_1
2456 | bb_2
2457 | if (cond) goto bb_3 else goto bb_4
2458 | end_bb_2
2460 | bb_3
2461 | cond = some_computation;
2462 | A[i] = cond ? expr : A[i];
2463 | goto bb_4
2464 | end_bb_3
2466 | bb_4
2467 | goto bb_1
2468 | end_bb_4
2470 and finally combine_blocks removes the basic block boundaries making
2471 the loop vectorizable:
2473 | bb_0
2474 | i = 0
2475 | if (i < N) goto bb_5 else goto bb_1
2476 | end_bb_0
2478 | bb_1
2479 | cond = some_computation;
2480 | A[i] = cond ? expr : A[i];
2481 | if (i < N) goto bb_5 else goto bb_4
2482 | end_bb_1
2484 | bb_4
2485 | goto bb_1
2486 | end_bb_4
2489 static void
2490 predicate_statements (loop_p loop)
2492 unsigned int i, orig_loop_num_nodes = loop->num_nodes;
2493 auto_vec<int, 1> vect_sizes;
2494 auto_vec<tree, 1> vect_masks;
2495 hash_set<tree_ssa_name_hash> ssa_names;
2497 for (i = 1; i < orig_loop_num_nodes; i++)
2499 gimple_stmt_iterator gsi;
2500 basic_block bb = ifc_bbs[i];
2501 tree cond = bb_predicate (bb);
2502 bool swap;
2503 int index;
2505 if (is_true_predicate (cond))
2506 continue;
2508 swap = false;
2509 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2511 swap = true;
2512 cond = TREE_OPERAND (cond, 0);
2515 vect_sizes.truncate (0);
2516 vect_masks.truncate (0);
2518 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
2520 gassign *stmt = dyn_cast <gassign *> (gsi_stmt (gsi));
2521 tree lhs;
2522 if (!stmt)
2524 else if (is_false_predicate (cond)
2525 && gimple_vdef (stmt))
2527 unlink_stmt_vdef (stmt);
2528 gsi_remove (&gsi, true);
2529 release_defs (stmt);
2530 continue;
2532 else if (gimple_plf (stmt, GF_PLF_2))
2534 tree lhs = gimple_assign_lhs (stmt);
2535 tree mask;
2536 gimple *new_stmt;
2537 gimple_seq stmts = NULL;
2538 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
2539 /* We checked before setting GF_PLF_2 that an equivalent
2540 integer mode exists. */
2541 int bitsize = GET_MODE_BITSIZE (mode).to_constant ();
2542 if (!vect_sizes.is_empty ()
2543 && (index = mask_exists (bitsize, vect_sizes)) != -1)
2544 /* Use created mask. */
2545 mask = vect_masks[index];
2546 else
2548 if (COMPARISON_CLASS_P (cond))
2549 mask = gimple_build (&stmts, TREE_CODE (cond),
2550 boolean_type_node,
2551 TREE_OPERAND (cond, 0),
2552 TREE_OPERAND (cond, 1));
2553 else
2554 mask = cond;
2556 if (swap)
2558 tree true_val
2559 = constant_boolean_node (true, TREE_TYPE (mask));
2560 mask = gimple_build (&stmts, BIT_XOR_EXPR,
2561 TREE_TYPE (mask), mask, true_val);
2563 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2565 /* Save mask and its size for further use. */
2566 vect_sizes.safe_push (bitsize);
2567 vect_masks.safe_push (mask);
2569 if (gimple_assign_single_p (stmt))
2570 new_stmt = predicate_load_or_store (&gsi, stmt, mask);
2571 else
2572 new_stmt = predicate_rhs_code (stmt, mask, cond, &ssa_names);
2574 gsi_replace (&gsi, new_stmt, true);
2576 else if (((lhs = gimple_assign_lhs (stmt)), true)
2577 && (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2578 || POINTER_TYPE_P (TREE_TYPE (lhs)))
2579 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs))
2580 && arith_code_with_undefined_signed_overflow
2581 (gimple_assign_rhs_code (stmt)))
2583 gsi_remove (&gsi, true);
2584 gimple_seq stmts = rewrite_to_defined_overflow (stmt);
2585 bool first = true;
2586 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
2587 !gsi_end_p (gsi2);)
2589 gassign *stmt2 = as_a <gassign *> (gsi_stmt (gsi2));
2590 gsi_remove (&gsi2, false);
2591 if (first)
2593 gsi_insert_before (&gsi, stmt2, GSI_NEW_STMT);
2594 first = false;
2596 else
2597 gsi_insert_after (&gsi, stmt2, GSI_NEW_STMT);
2600 else if (gimple_vdef (stmt))
2602 tree lhs = gimple_assign_lhs (stmt);
2603 tree rhs = gimple_assign_rhs1 (stmt);
2604 tree type = TREE_TYPE (lhs);
2606 lhs = ifc_temp_var (type, unshare_expr (lhs), &gsi);
2607 rhs = ifc_temp_var (type, unshare_expr (rhs), &gsi);
2608 if (swap)
2609 std::swap (lhs, rhs);
2610 cond = force_gimple_operand_gsi (&gsi, unshare_expr (cond), true,
2611 NULL_TREE, true, GSI_SAME_STMT);
2612 rhs = fold_build_cond_expr (type, unshare_expr (cond), rhs, lhs);
2613 gimple_assign_set_rhs1 (stmt, ifc_temp_var (type, rhs, &gsi));
2614 update_stmt (stmt);
2616 lhs = gimple_get_lhs (gsi_stmt (gsi));
2617 if (lhs && TREE_CODE (lhs) == SSA_NAME)
2618 ssa_names.add (lhs);
2619 gsi_next (&gsi);
2621 ssa_names.empty ();
2625 /* Remove all GIMPLE_CONDs and GIMPLE_LABELs of all the basic blocks
2626 other than the exit and latch of the LOOP. Also resets the
2627 GIMPLE_DEBUG information. */
2629 static void
2630 remove_conditions_and_labels (loop_p loop)
2632 gimple_stmt_iterator gsi;
2633 unsigned int i;
2635 for (i = 0; i < loop->num_nodes; i++)
2637 basic_block bb = ifc_bbs[i];
2639 if (bb_with_exit_edge_p (loop, bb)
2640 || bb == loop->latch)
2641 continue;
2643 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
2644 switch (gimple_code (gsi_stmt (gsi)))
2646 case GIMPLE_COND:
2647 case GIMPLE_LABEL:
2648 gsi_remove (&gsi, true);
2649 break;
2651 case GIMPLE_DEBUG:
2652 /* ??? Should there be conditional GIMPLE_DEBUG_BINDs? */
2653 if (gimple_debug_bind_p (gsi_stmt (gsi)))
2655 gimple_debug_bind_reset_value (gsi_stmt (gsi));
2656 update_stmt (gsi_stmt (gsi));
2658 gsi_next (&gsi);
2659 break;
2661 default:
2662 gsi_next (&gsi);
2667 /* Combine all the basic blocks from LOOP into one or two super basic
2668 blocks. Replace PHI nodes with conditional modify expressions. */
2670 static void
2671 combine_blocks (class loop *loop)
2673 basic_block bb, exit_bb, merge_target_bb;
2674 unsigned int orig_loop_num_nodes = loop->num_nodes;
2675 unsigned int i;
2676 edge e;
2677 edge_iterator ei;
2679 remove_conditions_and_labels (loop);
2680 insert_gimplified_predicates (loop);
2681 predicate_all_scalar_phis (loop);
2683 if (need_to_predicate || need_to_rewrite_undefined)
2684 predicate_statements (loop);
2686 /* Merge basic blocks. */
2687 exit_bb = NULL;
2688 bool *predicated = XNEWVEC (bool, orig_loop_num_nodes);
2689 for (i = 0; i < orig_loop_num_nodes; i++)
2691 bb = ifc_bbs[i];
2692 predicated[i] = !is_true_predicate (bb_predicate (bb));
2693 free_bb_predicate (bb);
2694 if (bb_with_exit_edge_p (loop, bb))
2696 gcc_assert (exit_bb == NULL);
2697 exit_bb = bb;
2700 gcc_assert (exit_bb != loop->latch);
2702 merge_target_bb = loop->header;
2704 /* Get at the virtual def valid for uses starting at the first block
2705 we merge into the header. Without a virtual PHI the loop has the
2706 same virtual use on all stmts. */
2707 gphi *vphi = get_virtual_phi (loop->header);
2708 tree last_vdef = NULL_TREE;
2709 if (vphi)
2711 last_vdef = gimple_phi_result (vphi);
2712 for (gimple_stmt_iterator gsi = gsi_start_bb (loop->header);
2713 ! gsi_end_p (gsi); gsi_next (&gsi))
2714 if (gimple_vdef (gsi_stmt (gsi)))
2715 last_vdef = gimple_vdef (gsi_stmt (gsi));
2717 for (i = 1; i < orig_loop_num_nodes; i++)
2719 gimple_stmt_iterator gsi;
2720 gimple_stmt_iterator last;
2722 bb = ifc_bbs[i];
2724 if (bb == exit_bb || bb == loop->latch)
2725 continue;
2727 /* We release virtual PHIs late because we have to propagate them
2728 out using the current VUSE. The def might be the one used
2729 after the loop. */
2730 vphi = get_virtual_phi (bb);
2731 if (vphi)
2733 /* When there's just loads inside the loop a stray virtual
2734 PHI merging the uses can appear, update last_vdef from
2735 it. */
2736 if (!last_vdef)
2737 last_vdef = gimple_phi_arg_def (vphi, 0);
2738 imm_use_iterator iter;
2739 use_operand_p use_p;
2740 gimple *use_stmt;
2741 FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi))
2743 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2744 SET_USE (use_p, last_vdef);
2746 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (vphi)))
2747 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (last_vdef) = 1;
2748 gsi = gsi_for_stmt (vphi);
2749 remove_phi_node (&gsi, true);
2752 /* Make stmts member of loop->header and clear range info from all stmts
2753 in BB which is now no longer executed conditional on a predicate we
2754 could have derived it from. */
2755 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2757 gimple *stmt = gsi_stmt (gsi);
2758 gimple_set_bb (stmt, merge_target_bb);
2759 /* Update virtual operands. */
2760 if (last_vdef)
2762 use_operand_p use_p = ssa_vuse_operand (stmt);
2763 if (use_p
2764 && USE_FROM_PTR (use_p) != last_vdef)
2765 SET_USE (use_p, last_vdef);
2766 if (gimple_vdef (stmt))
2767 last_vdef = gimple_vdef (stmt);
2769 else
2770 /* If this is the first load we arrive at update last_vdef
2771 so we handle stray PHIs correctly. */
2772 last_vdef = gimple_vuse (stmt);
2773 if (predicated[i])
2775 ssa_op_iter i;
2776 tree op;
2777 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF)
2778 reset_flow_sensitive_info (op);
2782 /* Update stmt list. */
2783 last = gsi_last_bb (merge_target_bb);
2784 gsi_insert_seq_after_without_update (&last, bb_seq (bb), GSI_NEW_STMT);
2785 set_bb_seq (bb, NULL);
2788 /* Fixup virtual operands in the exit block. */
2789 if (exit_bb
2790 && exit_bb != loop->header)
2792 /* We release virtual PHIs late because we have to propagate them
2793 out using the current VUSE. The def might be the one used
2794 after the loop. */
2795 vphi = get_virtual_phi (exit_bb);
2796 if (vphi)
2798 /* When there's just loads inside the loop a stray virtual
2799 PHI merging the uses can appear, update last_vdef from
2800 it. */
2801 if (!last_vdef)
2802 last_vdef = gimple_phi_arg_def (vphi, 0);
2803 imm_use_iterator iter;
2804 use_operand_p use_p;
2805 gimple *use_stmt;
2806 FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi))
2808 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2809 SET_USE (use_p, last_vdef);
2811 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (vphi)))
2812 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (last_vdef) = 1;
2813 gimple_stmt_iterator gsi = gsi_for_stmt (vphi);
2814 remove_phi_node (&gsi, true);
2818 /* Now remove all the edges in the loop, except for those from the exit
2819 block and delete the blocks we elided. */
2820 for (i = 1; i < orig_loop_num_nodes; i++)
2822 bb = ifc_bbs[i];
2824 for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));)
2826 if (e->src == exit_bb)
2827 ei_next (&ei);
2828 else
2829 remove_edge (e);
2832 for (i = 1; i < orig_loop_num_nodes; i++)
2834 bb = ifc_bbs[i];
2836 if (bb == exit_bb || bb == loop->latch)
2837 continue;
2839 delete_basic_block (bb);
2842 /* Re-connect the exit block. */
2843 if (exit_bb != NULL)
2845 if (exit_bb != loop->header)
2847 /* Connect this node to loop header. */
2848 make_single_succ_edge (loop->header, exit_bb, EDGE_FALLTHRU);
2849 set_immediate_dominator (CDI_DOMINATORS, exit_bb, loop->header);
2852 /* Redirect non-exit edges to loop->latch. */
2853 FOR_EACH_EDGE (e, ei, exit_bb->succs)
2855 if (!loop_exit_edge_p (loop, e))
2856 redirect_edge_and_branch (e, loop->latch);
2858 set_immediate_dominator (CDI_DOMINATORS, loop->latch, exit_bb);
2860 else
2862 /* If the loop does not have an exit, reconnect header and latch. */
2863 make_edge (loop->header, loop->latch, EDGE_FALLTHRU);
2864 set_immediate_dominator (CDI_DOMINATORS, loop->latch, loop->header);
2867 /* If possible, merge loop header to the block with the exit edge.
2868 This reduces the number of basic blocks to two, to please the
2869 vectorizer that handles only loops with two nodes. */
2870 if (exit_bb
2871 && exit_bb != loop->header)
2873 if (can_merge_blocks_p (loop->header, exit_bb))
2874 merge_blocks (loop->header, exit_bb);
2877 free (ifc_bbs);
2878 ifc_bbs = NULL;
2879 free (predicated);
2882 /* Version LOOP before if-converting it; the original loop
2883 will be if-converted, the new copy of the loop will not,
2884 and the LOOP_VECTORIZED internal call will be guarding which
2885 loop to execute. The vectorizer pass will fold this
2886 internal call into either true or false.
2888 Note that this function intentionally invalidates profile. Both edges
2889 out of LOOP_VECTORIZED must have 100% probability so the profile remains
2890 consistent after the condition is folded in the vectorizer. */
2892 static class loop *
2893 version_loop_for_if_conversion (class loop *loop, vec<gimple *> *preds)
2895 basic_block cond_bb;
2896 tree cond = make_ssa_name (boolean_type_node);
2897 class loop *new_loop;
2898 gimple *g;
2899 gimple_stmt_iterator gsi;
2900 unsigned int save_length = 0;
2902 g = gimple_build_call_internal (IFN_LOOP_VECTORIZED, 2,
2903 build_int_cst (integer_type_node, loop->num),
2904 integer_zero_node);
2905 gimple_call_set_lhs (g, cond);
2907 void **saved_preds = NULL;
2908 if (any_complicated_phi || need_to_predicate)
2910 /* Save BB->aux around loop_version as that uses the same field. */
2911 save_length = loop->inner ? loop->inner->num_nodes : loop->num_nodes;
2912 saved_preds = XALLOCAVEC (void *, save_length);
2913 for (unsigned i = 0; i < save_length; i++)
2914 saved_preds[i] = ifc_bbs[i]->aux;
2917 initialize_original_copy_tables ();
2918 /* At this point we invalidate porfile confistency until IFN_LOOP_VECTORIZED
2919 is re-merged in the vectorizer. */
2920 new_loop = loop_version (loop, cond, &cond_bb,
2921 profile_probability::always (),
2922 profile_probability::always (),
2923 profile_probability::always (),
2924 profile_probability::always (), true);
2925 free_original_copy_tables ();
2927 if (any_complicated_phi || need_to_predicate)
2928 for (unsigned i = 0; i < save_length; i++)
2929 ifc_bbs[i]->aux = saved_preds[i];
2931 if (new_loop == NULL)
2932 return NULL;
2934 new_loop->dont_vectorize = true;
2935 new_loop->force_vectorize = false;
2936 gsi = gsi_last_bb (cond_bb);
2937 gimple_call_set_arg (g, 1, build_int_cst (integer_type_node, new_loop->num));
2938 if (preds)
2939 preds->safe_push (g);
2940 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2941 update_ssa (TODO_update_ssa_no_phi);
2942 return new_loop;
2945 /* Return true when LOOP satisfies the follow conditions that will
2946 allow it to be recognized by the vectorizer for outer-loop
2947 vectorization:
2948 - The loop is not the root node of the loop tree.
2949 - The loop has exactly one inner loop.
2950 - The loop has a single exit.
2951 - The loop header has a single successor, which is the inner
2952 loop header.
2953 - Each of the inner and outer loop latches have a single
2954 predecessor.
2955 - The loop exit block has a single predecessor, which is the
2956 inner loop's exit block. */
2958 static bool
2959 versionable_outer_loop_p (class loop *loop)
2961 if (!loop_outer (loop)
2962 || loop->dont_vectorize
2963 || !loop->inner
2964 || loop->inner->next
2965 || !single_exit (loop)
2966 || !single_succ_p (loop->header)
2967 || single_succ (loop->header) != loop->inner->header
2968 || !single_pred_p (loop->latch)
2969 || !single_pred_p (loop->inner->latch))
2970 return false;
2972 basic_block outer_exit = single_pred (loop->latch);
2973 basic_block inner_exit = single_pred (loop->inner->latch);
2975 if (!single_pred_p (outer_exit) || single_pred (outer_exit) != inner_exit)
2976 return false;
2978 if (dump_file)
2979 fprintf (dump_file, "Found vectorizable outer loop for versioning\n");
2981 return true;
2984 /* Performs splitting of critical edges. Skip splitting and return false
2985 if LOOP will not be converted because:
2987 - LOOP is not well formed.
2988 - LOOP has PHI with more than MAX_PHI_ARG_NUM arguments.
2990 Last restriction is valid only if AGGRESSIVE_IF_CONV is false. */
2992 static bool
2993 ifcvt_split_critical_edges (class loop *loop, bool aggressive_if_conv)
2995 basic_block *body;
2996 basic_block bb;
2997 unsigned int num = loop->num_nodes;
2998 unsigned int i;
2999 gimple *stmt;
3000 edge e;
3001 edge_iterator ei;
3002 auto_vec<edge> critical_edges;
3004 /* Loop is not well formed. */
3005 if (loop->inner)
3006 return false;
3008 body = get_loop_body (loop);
3009 for (i = 0; i < num; i++)
3011 bb = body[i];
3012 if (!aggressive_if_conv
3013 && phi_nodes (bb)
3014 && EDGE_COUNT (bb->preds) > MAX_PHI_ARG_NUM)
3016 if (dump_file && (dump_flags & TDF_DETAILS))
3017 fprintf (dump_file,
3018 "BB %d has complicated PHI with more than %u args.\n",
3019 bb->index, MAX_PHI_ARG_NUM);
3021 free (body);
3022 return false;
3024 if (bb == loop->latch || bb_with_exit_edge_p (loop, bb))
3025 continue;
3027 stmt = last_stmt (bb);
3028 /* Skip basic blocks not ending with conditional branch. */
3029 if (!stmt || gimple_code (stmt) != GIMPLE_COND)
3030 continue;
3032 FOR_EACH_EDGE (e, ei, bb->succs)
3033 if (EDGE_CRITICAL_P (e) && e->dest->loop_father == loop)
3034 critical_edges.safe_push (e);
3036 free (body);
3038 while (critical_edges.length () > 0)
3040 e = critical_edges.pop ();
3041 /* Don't split if bb can be predicated along non-critical edge. */
3042 if (EDGE_COUNT (e->dest->preds) > 2 || all_preds_critical_p (e->dest))
3043 split_edge (e);
3046 return true;
3049 /* Delete redundant statements produced by predication which prevents
3050 loop vectorization. */
3052 static void
3053 ifcvt_local_dce (class loop *loop)
3055 gimple *stmt;
3056 gimple *stmt1;
3057 gimple *phi;
3058 gimple_stmt_iterator gsi;
3059 auto_vec<gimple *> worklist;
3060 enum gimple_code code;
3061 use_operand_p use_p;
3062 imm_use_iterator imm_iter;
3064 /* The loop has a single BB only. */
3065 basic_block bb = loop->header;
3066 tree latch_vdef = NULL_TREE;
3068 worklist.create (64);
3069 /* Consider all phi as live statements. */
3070 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3072 phi = gsi_stmt (gsi);
3073 gimple_set_plf (phi, GF_PLF_2, true);
3074 worklist.safe_push (phi);
3075 if (virtual_operand_p (gimple_phi_result (phi)))
3076 latch_vdef = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
3078 /* Consider load/store statements, CALL and COND as live. */
3079 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3081 stmt = gsi_stmt (gsi);
3082 if (is_gimple_debug (stmt))
3084 gimple_set_plf (stmt, GF_PLF_2, true);
3085 continue;
3087 if (gimple_store_p (stmt) || gimple_assign_load_p (stmt))
3089 gimple_set_plf (stmt, GF_PLF_2, true);
3090 worklist.safe_push (stmt);
3091 continue;
3093 code = gimple_code (stmt);
3094 if (code == GIMPLE_COND || code == GIMPLE_CALL)
3096 gimple_set_plf (stmt, GF_PLF_2, true);
3097 worklist.safe_push (stmt);
3098 continue;
3100 gimple_set_plf (stmt, GF_PLF_2, false);
3102 if (code == GIMPLE_ASSIGN)
3104 tree lhs = gimple_assign_lhs (stmt);
3105 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
3107 stmt1 = USE_STMT (use_p);
3108 if (!is_gimple_debug (stmt1) && gimple_bb (stmt1) != bb)
3110 gimple_set_plf (stmt, GF_PLF_2, true);
3111 worklist.safe_push (stmt);
3112 break;
3117 /* Propagate liveness through arguments of live stmt. */
3118 while (worklist.length () > 0)
3120 ssa_op_iter iter;
3121 use_operand_p use_p;
3122 tree use;
3124 stmt = worklist.pop ();
3125 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
3127 use = USE_FROM_PTR (use_p);
3128 if (TREE_CODE (use) != SSA_NAME)
3129 continue;
3130 stmt1 = SSA_NAME_DEF_STMT (use);
3131 if (gimple_bb (stmt1) != bb || gimple_plf (stmt1, GF_PLF_2))
3132 continue;
3133 gimple_set_plf (stmt1, GF_PLF_2, true);
3134 worklist.safe_push (stmt1);
3137 /* Delete dead statements. */
3138 gsi = gsi_last_bb (bb);
3139 while (!gsi_end_p (gsi))
3141 gimple_stmt_iterator gsiprev = gsi;
3142 gsi_prev (&gsiprev);
3143 stmt = gsi_stmt (gsi);
3144 if (gimple_store_p (stmt) && gimple_vdef (stmt))
3146 tree lhs = gimple_get_lhs (stmt);
3147 ao_ref write;
3148 ao_ref_init (&write, lhs);
3150 if (dse_classify_store (&write, stmt, false, NULL, NULL, latch_vdef)
3151 == DSE_STORE_DEAD)
3152 delete_dead_or_redundant_assignment (&gsi, "dead");
3153 gsi = gsiprev;
3154 continue;
3157 if (gimple_plf (stmt, GF_PLF_2))
3159 gsi = gsiprev;
3160 continue;
3162 if (dump_file && (dump_flags & TDF_DETAILS))
3164 fprintf (dump_file, "Delete dead stmt in bb#%d\n", bb->index);
3165 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3167 gsi_remove (&gsi, true);
3168 release_defs (stmt);
3169 gsi = gsiprev;
3173 /* Return true if VALUE is already available on edge PE. */
3175 static bool
3176 ifcvt_available_on_edge_p (edge pe, tree value)
3178 if (is_gimple_min_invariant (value))
3179 return true;
3181 if (TREE_CODE (value) == SSA_NAME)
3183 basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (value));
3184 if (!def_bb || dominated_by_p (CDI_DOMINATORS, pe->dest, def_bb))
3185 return true;
3188 return false;
3191 /* Return true if STMT can be hoisted from if-converted loop LOOP to
3192 edge PE. */
3194 static bool
3195 ifcvt_can_hoist (class loop *loop, edge pe, gimple *stmt)
3197 if (auto *call = dyn_cast<gcall *> (stmt))
3199 if (gimple_call_internal_p (call)
3200 && internal_fn_mask_index (gimple_call_internal_fn (call)) >= 0)
3201 return false;
3203 else if (auto *assign = dyn_cast<gassign *> (stmt))
3205 if (gimple_assign_rhs_code (assign) == COND_EXPR)
3206 return false;
3208 else
3209 return false;
3211 if (gimple_has_side_effects (stmt)
3212 || gimple_could_trap_p (stmt)
3213 || stmt_could_throw_p (cfun, stmt)
3214 || gimple_vdef (stmt)
3215 || gimple_vuse (stmt))
3216 return false;
3218 int num_args = gimple_num_args (stmt);
3219 if (pe != loop_preheader_edge (loop))
3221 for (int i = 0; i < num_args; ++i)
3222 if (!ifcvt_available_on_edge_p (pe, gimple_arg (stmt, i)))
3223 return false;
3225 else
3227 for (int i = 0; i < num_args; ++i)
3228 if (!expr_invariant_in_loop_p (loop, gimple_arg (stmt, i)))
3229 return false;
3232 return true;
3235 /* Hoist invariant statements from LOOP to edge PE. */
3237 static void
3238 ifcvt_hoist_invariants (class loop *loop, edge pe)
3240 gimple_stmt_iterator hoist_gsi = {};
3241 unsigned int num_blocks = loop->num_nodes;
3242 basic_block *body = get_loop_body (loop);
3243 for (unsigned int i = 0; i < num_blocks; ++i)
3244 for (gimple_stmt_iterator gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi);)
3246 gimple *stmt = gsi_stmt (gsi);
3247 if (ifcvt_can_hoist (loop, pe, stmt))
3249 /* Once we've hoisted one statement, insert other statements
3250 after it. */
3251 gsi_remove (&gsi, false);
3252 if (hoist_gsi.ptr)
3253 gsi_insert_after (&hoist_gsi, stmt, GSI_NEW_STMT);
3254 else
3256 gsi_insert_on_edge_immediate (pe, stmt);
3257 hoist_gsi = gsi_for_stmt (stmt);
3259 continue;
3261 gsi_next (&gsi);
3263 free (body);
3266 /* Returns the DECL_FIELD_BIT_OFFSET of the bitfield accesse in stmt iff its
3267 type mode is not BLKmode. If BITPOS is not NULL it will hold the poly_int64
3268 value of the DECL_FIELD_BIT_OFFSET of the bitfield access and STRUCT_EXPR,
3269 if not NULL, will hold the tree representing the base struct of this
3270 bitfield. */
3272 static tree
3273 get_bitfield_rep (gassign *stmt, bool write, tree *bitpos,
3274 tree *struct_expr)
3276 tree comp_ref = write ? gimple_assign_lhs (stmt)
3277 : gimple_assign_rhs1 (stmt);
3279 tree field_decl = TREE_OPERAND (comp_ref, 1);
3280 tree rep_decl = DECL_BIT_FIELD_REPRESENTATIVE (field_decl);
3282 /* Bail out if the representative is BLKmode as we will not be able to
3283 vectorize this. */
3284 if (TYPE_MODE (TREE_TYPE (rep_decl)) == E_BLKmode)
3285 return NULL_TREE;
3287 /* Bail out if the DECL_SIZE of the field_decl isn't the same as the BF's
3288 precision. */
3289 unsigned HOST_WIDE_INT bf_prec
3290 = TYPE_PRECISION (TREE_TYPE (gimple_assign_lhs (stmt)));
3291 if (compare_tree_int (DECL_SIZE (field_decl), bf_prec) != 0)
3292 return NULL_TREE;
3294 if (struct_expr)
3295 *struct_expr = TREE_OPERAND (comp_ref, 0);
3297 if (bitpos)
3299 /* To calculate the bitposition of the BITFIELD_REF we have to determine
3300 where our bitfield starts in relation to the container REP_DECL. The
3301 DECL_FIELD_OFFSET of the original bitfield's member FIELD_DECL tells
3302 us how many bytes from the start of the structure there are until the
3303 start of the group of bitfield members the FIELD_DECL belongs to,
3304 whereas DECL_FIELD_BIT_OFFSET will tell us how many bits from that
3305 position our actual bitfield member starts. For the container
3306 REP_DECL adding DECL_FIELD_OFFSET and DECL_FIELD_BIT_OFFSET will tell
3307 us the distance between the start of the structure and the start of
3308 the container, though the first is in bytes and the later other in
3309 bits. With this in mind we calculate the bit position of our new
3310 BITFIELD_REF by subtracting the number of bits between the start of
3311 the structure and the container from the number of bits from the start
3312 of the structure and the actual bitfield member. */
3313 tree bf_pos = fold_build2 (MULT_EXPR, bitsizetype,
3314 DECL_FIELD_OFFSET (field_decl),
3315 build_int_cst (bitsizetype, BITS_PER_UNIT));
3316 bf_pos = fold_build2 (PLUS_EXPR, bitsizetype, bf_pos,
3317 DECL_FIELD_BIT_OFFSET (field_decl));
3318 tree rep_pos = fold_build2 (MULT_EXPR, bitsizetype,
3319 DECL_FIELD_OFFSET (rep_decl),
3320 build_int_cst (bitsizetype, BITS_PER_UNIT));
3321 rep_pos = fold_build2 (PLUS_EXPR, bitsizetype, rep_pos,
3322 DECL_FIELD_BIT_OFFSET (rep_decl));
3324 *bitpos = fold_build2 (MINUS_EXPR, bitsizetype, bf_pos, rep_pos);
3327 return rep_decl;
3331 /* Lowers the bitfield described by DATA.
3332 For a write like:
3334 struct.bf = _1;
3336 lower to:
3338 __ifc_1 = struct.<representative>;
3339 __ifc_2 = BIT_INSERT_EXPR (__ifc_1, _1, bitpos);
3340 struct.<representative> = __ifc_2;
3342 For a read:
3344 _1 = struct.bf;
3346 lower to:
3348 __ifc_1 = struct.<representative>;
3349 _1 = BIT_FIELD_REF (__ifc_1, bitsize, bitpos);
3351 where representative is a legal load that contains the bitfield value,
3352 bitsize is the size of the bitfield and bitpos the offset to the start of
3353 the bitfield within the representative. */
3355 static void
3356 lower_bitfield (gassign *stmt, bool write)
3358 tree struct_expr;
3359 tree bitpos;
3360 tree rep_decl = get_bitfield_rep (stmt, write, &bitpos, &struct_expr);
3361 tree rep_type = TREE_TYPE (rep_decl);
3362 tree bf_type = TREE_TYPE (gimple_assign_lhs (stmt));
3364 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3365 if (dump_file && (dump_flags & TDF_DETAILS))
3367 fprintf (dump_file, "Lowering:\n");
3368 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3369 fprintf (dump_file, "to:\n");
3372 /* REP_COMP_REF is a COMPONENT_REF for the representative. NEW_VAL is it's
3373 defining SSA_NAME. */
3374 tree rep_comp_ref = build3 (COMPONENT_REF, rep_type, struct_expr, rep_decl,
3375 NULL_TREE);
3376 tree new_val = ifc_temp_var (rep_type, rep_comp_ref, &gsi);
3378 if (dump_file && (dump_flags & TDF_DETAILS))
3379 print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (new_val), 0, TDF_SLIM);
3381 if (write)
3383 new_val = ifc_temp_var (rep_type,
3384 build3 (BIT_INSERT_EXPR, rep_type, new_val,
3385 unshare_expr (gimple_assign_rhs1 (stmt)),
3386 bitpos), &gsi);
3388 if (dump_file && (dump_flags & TDF_DETAILS))
3389 print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (new_val), 0, TDF_SLIM);
3391 gimple *new_stmt = gimple_build_assign (unshare_expr (rep_comp_ref),
3392 new_val);
3393 gimple_move_vops (new_stmt, stmt);
3394 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
3396 if (dump_file && (dump_flags & TDF_DETAILS))
3397 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
3399 else
3401 tree bfr = build3 (BIT_FIELD_REF, bf_type, new_val,
3402 build_int_cst (bitsizetype, TYPE_PRECISION (bf_type)),
3403 bitpos);
3404 new_val = ifc_temp_var (bf_type, bfr, &gsi);
3406 gimple *new_stmt = gimple_build_assign (gimple_assign_lhs (stmt),
3407 new_val);
3408 gimple_move_vops (new_stmt, stmt);
3409 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
3411 if (dump_file && (dump_flags & TDF_DETAILS))
3412 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
3415 gsi_remove (&gsi, true);
3418 /* Return TRUE if there are bitfields to lower in this LOOP. Fill TO_LOWER
3419 with data structures representing these bitfields. */
3421 static bool
3422 bitfields_to_lower_p (class loop *loop,
3423 vec <gassign *> &reads_to_lower,
3424 vec <gassign *> &writes_to_lower)
3426 gimple_stmt_iterator gsi;
3428 if (dump_file && (dump_flags & TDF_DETAILS))
3430 fprintf (dump_file, "Analyzing loop %d for bitfields:\n", loop->num);
3433 for (unsigned i = 0; i < loop->num_nodes; ++i)
3435 basic_block bb = ifc_bbs[i];
3436 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3438 gassign *stmt = dyn_cast<gassign*> (gsi_stmt (gsi));
3439 if (!stmt)
3440 continue;
3442 tree op = gimple_assign_lhs (stmt);
3443 bool write = TREE_CODE (op) == COMPONENT_REF;
3445 if (!write)
3446 op = gimple_assign_rhs1 (stmt);
3448 if (TREE_CODE (op) != COMPONENT_REF)
3449 continue;
3451 if (DECL_BIT_FIELD_TYPE (TREE_OPERAND (op, 1)))
3453 if (dump_file && (dump_flags & TDF_DETAILS))
3454 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3456 if (!INTEGRAL_TYPE_P (TREE_TYPE (op)))
3458 if (dump_file && (dump_flags & TDF_DETAILS))
3459 fprintf (dump_file, "\t Bitfield NO OK to lower,"
3460 " field type is not Integral.\n");
3461 return false;
3464 if (!get_bitfield_rep (stmt, write, NULL, NULL))
3466 if (dump_file && (dump_flags & TDF_DETAILS))
3467 fprintf (dump_file, "\t Bitfield NOT OK to lower,"
3468 " representative is BLKmode.\n");
3469 return false;
3472 if (dump_file && (dump_flags & TDF_DETAILS))
3473 fprintf (dump_file, "\tBitfield OK to lower.\n");
3474 if (write)
3475 writes_to_lower.safe_push (stmt);
3476 else
3477 reads_to_lower.safe_push (stmt);
3481 return !reads_to_lower.is_empty () || !writes_to_lower.is_empty ();
3485 /* If-convert LOOP when it is legal. For the moment this pass has no
3486 profitability analysis. Returns non-zero todo flags when something
3487 changed. */
3489 unsigned int
3490 tree_if_conversion (class loop *loop, vec<gimple *> *preds)
3492 unsigned int todo = 0;
3493 bool aggressive_if_conv;
3494 class loop *rloop;
3495 auto_vec <gassign *, 4> reads_to_lower;
3496 auto_vec <gassign *, 4> writes_to_lower;
3497 bitmap exit_bbs;
3498 edge pe;
3499 vec<data_reference_p> refs;
3501 again:
3502 rloop = NULL;
3503 ifc_bbs = NULL;
3504 need_to_lower_bitfields = false;
3505 need_to_ifcvt = false;
3506 need_to_predicate = false;
3507 need_to_rewrite_undefined = false;
3508 any_complicated_phi = false;
3509 refs.create (5);
3511 /* Apply more aggressive if-conversion when loop or its outer loop were
3512 marked with simd pragma. When that's the case, we try to if-convert
3513 loop containing PHIs with more than MAX_PHI_ARG_NUM arguments. */
3514 aggressive_if_conv = loop->force_vectorize;
3515 if (!aggressive_if_conv)
3517 class loop *outer_loop = loop_outer (loop);
3518 if (outer_loop && outer_loop->force_vectorize)
3519 aggressive_if_conv = true;
3522 if (!single_exit (loop))
3523 goto cleanup;
3525 /* If there are more than two BBs in the loop then there is at least one if
3526 to convert. */
3527 if (loop->num_nodes > 2
3528 && !ifcvt_split_critical_edges (loop, aggressive_if_conv))
3529 goto cleanup;
3531 ifc_bbs = get_loop_body_in_if_conv_order (loop);
3532 if (!ifc_bbs)
3534 if (dump_file && (dump_flags & TDF_DETAILS))
3535 fprintf (dump_file, "Irreducible loop\n");
3536 goto cleanup;
3539 if (find_data_references_in_loop (loop, &refs) == chrec_dont_know)
3540 goto cleanup;
3542 if (loop->num_nodes > 2)
3544 need_to_ifcvt = true;
3546 if (!if_convertible_loop_p (loop, &refs) || !dbg_cnt (if_conversion_tree))
3547 goto cleanup;
3549 if ((need_to_predicate || any_complicated_phi)
3550 && ((!flag_tree_loop_vectorize && !loop->force_vectorize)
3551 || loop->dont_vectorize))
3552 goto cleanup;
3555 if ((flag_tree_loop_vectorize || loop->force_vectorize)
3556 && !loop->dont_vectorize)
3557 need_to_lower_bitfields = bitfields_to_lower_p (loop, reads_to_lower,
3558 writes_to_lower);
3560 if (!need_to_ifcvt && !need_to_lower_bitfields)
3561 goto cleanup;
3563 /* The edge to insert invariant stmts on. */
3564 pe = loop_preheader_edge (loop);
3566 /* Since we have no cost model, always version loops unless the user
3567 specified -ftree-loop-if-convert or unless versioning is required.
3568 Either version this loop, or if the pattern is right for outer-loop
3569 vectorization, version the outer loop. In the latter case we will
3570 still if-convert the original inner loop. */
3571 if (need_to_lower_bitfields
3572 || need_to_predicate
3573 || any_complicated_phi
3574 || flag_tree_loop_if_convert != 1)
3576 class loop *vloop
3577 = (versionable_outer_loop_p (loop_outer (loop))
3578 ? loop_outer (loop) : loop);
3579 class loop *nloop = version_loop_for_if_conversion (vloop, preds);
3580 if (nloop == NULL)
3581 goto cleanup;
3582 if (vloop != loop)
3584 /* If versionable_outer_loop_p decided to version the
3585 outer loop, version also the inner loop of the non-vectorized
3586 loop copy. So we transform:
3587 loop1
3588 loop2
3589 into:
3590 if (LOOP_VECTORIZED (1, 3))
3592 loop1
3593 loop2
3595 else
3596 loop3 (copy of loop1)
3597 if (LOOP_VECTORIZED (4, 5))
3598 loop4 (copy of loop2)
3599 else
3600 loop5 (copy of loop4) */
3601 gcc_assert (nloop->inner && nloop->inner->next == NULL);
3602 rloop = nloop->inner;
3604 else
3605 /* If we versioned loop then make sure to insert invariant
3606 stmts before the .LOOP_VECTORIZED check since the vectorizer
3607 will re-use that for things like runtime alias versioning
3608 whose condition can end up using those invariants. */
3609 pe = single_pred_edge (gimple_bb (preds->last ()));
3612 if (need_to_lower_bitfields)
3614 if (dump_file && (dump_flags & TDF_DETAILS))
3616 fprintf (dump_file, "-------------------------\n");
3617 fprintf (dump_file, "Start lowering bitfields\n");
3619 while (!reads_to_lower.is_empty ())
3620 lower_bitfield (reads_to_lower.pop (), false);
3621 while (!writes_to_lower.is_empty ())
3622 lower_bitfield (writes_to_lower.pop (), true);
3624 if (dump_file && (dump_flags & TDF_DETAILS))
3626 fprintf (dump_file, "Done lowering bitfields\n");
3627 fprintf (dump_file, "-------------------------\n");
3630 if (need_to_ifcvt)
3632 /* Now all statements are if-convertible. Combine all the basic
3633 blocks into one huge basic block doing the if-conversion
3634 on-the-fly. */
3635 combine_blocks (loop);
3638 /* Perform local CSE, this esp. helps the vectorizer analysis if loads
3639 and stores are involved. CSE only the loop body, not the entry
3640 PHIs, those are to be kept in sync with the non-if-converted copy.
3641 ??? We'll still keep dead stores though. */
3642 exit_bbs = BITMAP_ALLOC (NULL);
3643 bitmap_set_bit (exit_bbs, single_exit (loop)->dest->index);
3644 bitmap_set_bit (exit_bbs, loop->latch->index);
3646 std::pair <tree, tree> *name_pair;
3647 unsigned ssa_names_idx;
3648 FOR_EACH_VEC_ELT (redundant_ssa_names, ssa_names_idx, name_pair)
3649 replace_uses_by (name_pair->first, name_pair->second);
3650 redundant_ssa_names.release ();
3652 todo |= do_rpo_vn (cfun, loop_preheader_edge (loop), exit_bbs);
3654 /* Delete dead predicate computations. */
3655 ifcvt_local_dce (loop);
3656 BITMAP_FREE (exit_bbs);
3658 ifcvt_hoist_invariants (loop, pe);
3660 todo |= TODO_cleanup_cfg;
3662 cleanup:
3663 data_reference_p dr;
3664 unsigned int i;
3665 for (i = 0; refs.iterate (i, &dr); i++)
3666 free (dr->aux);
3668 refs.truncate (0);
3670 if (ifc_bbs)
3672 unsigned int i;
3674 for (i = 0; i < loop->num_nodes; i++)
3675 free_bb_predicate (ifc_bbs[i]);
3677 free (ifc_bbs);
3678 ifc_bbs = NULL;
3680 if (rloop != NULL)
3682 loop = rloop;
3683 reads_to_lower.truncate (0);
3684 writes_to_lower.truncate (0);
3685 goto again;
3688 return todo;
3691 /* Tree if-conversion pass management. */
3693 namespace {
3695 const pass_data pass_data_if_conversion =
3697 GIMPLE_PASS, /* type */
3698 "ifcvt", /* name */
3699 OPTGROUP_NONE, /* optinfo_flags */
3700 TV_TREE_LOOP_IFCVT, /* tv_id */
3701 ( PROP_cfg | PROP_ssa ), /* properties_required */
3702 0, /* properties_provided */
3703 0, /* properties_destroyed */
3704 0, /* todo_flags_start */
3705 0, /* todo_flags_finish */
3708 class pass_if_conversion : public gimple_opt_pass
3710 public:
3711 pass_if_conversion (gcc::context *ctxt)
3712 : gimple_opt_pass (pass_data_if_conversion, ctxt)
3715 /* opt_pass methods: */
3716 bool gate (function *) final override;
3717 unsigned int execute (function *) final override;
3719 }; // class pass_if_conversion
3721 bool
3722 pass_if_conversion::gate (function *fun)
3724 return (((flag_tree_loop_vectorize || fun->has_force_vectorize_loops)
3725 && flag_tree_loop_if_convert != 0)
3726 || flag_tree_loop_if_convert == 1);
3729 unsigned int
3730 pass_if_conversion::execute (function *fun)
3732 unsigned todo = 0;
3734 if (number_of_loops (fun) <= 1)
3735 return 0;
3737 auto_vec<gimple *> preds;
3738 for (auto loop : loops_list (cfun, 0))
3739 if (flag_tree_loop_if_convert == 1
3740 || ((flag_tree_loop_vectorize || loop->force_vectorize)
3741 && !loop->dont_vectorize))
3742 todo |= tree_if_conversion (loop, &preds);
3744 if (todo)
3746 free_numbers_of_iterations_estimates (fun);
3747 scev_reset ();
3750 if (flag_checking)
3752 basic_block bb;
3753 FOR_EACH_BB_FN (bb, fun)
3754 gcc_assert (!bb->aux);
3757 /* Perform IL update now, it might elide some loops. */
3758 if (todo & TODO_cleanup_cfg)
3760 cleanup_tree_cfg ();
3761 if (need_ssa_update_p (fun))
3762 todo |= TODO_update_ssa;
3764 if (todo & TODO_update_ssa_any)
3765 update_ssa (todo & TODO_update_ssa_any);
3767 /* If if-conversion elided the loop fall back to the original one. */
3768 for (unsigned i = 0; i < preds.length (); ++i)
3770 gimple *g = preds[i];
3771 if (!gimple_bb (g))
3772 continue;
3773 unsigned ifcvt_loop = tree_to_uhwi (gimple_call_arg (g, 0));
3774 unsigned orig_loop = tree_to_uhwi (gimple_call_arg (g, 1));
3775 if (!get_loop (fun, ifcvt_loop) || !get_loop (fun, orig_loop))
3777 if (dump_file)
3778 fprintf (dump_file, "If-converted loop vanished\n");
3779 fold_loop_internal_call (g, boolean_false_node);
3783 return 0;
3786 } // anon namespace
3788 gimple_opt_pass *
3789 make_pass_if_conversion (gcc::context *ctxt)
3791 return new pass_if_conversion (ctxt);