syscall, runtime: always call XSI strerror_r
[official-gcc.git] / gcc / tree-if-conv.cc
blob64b20b4a9e11fe53d7a4bd188c514fdf759b3f3b
1 /* If-conversion for vectorizer.
2 Copyright (C) 2004-2022 Free Software Foundation, Inc.
3 Contributed by Devang Patel <dpatel@apple.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass implements a tree level if-conversion of loops. Its
22 initial goal is to help the vectorizer to vectorize loops with
23 conditions.
25 A short description of if-conversion:
27 o Decide if a loop is if-convertible or not.
28 o Walk all loop basic blocks in breadth first order (BFS order).
29 o Remove conditional statements (at the end of basic block)
30 and propagate condition into destination basic blocks'
31 predicate list.
32 o Replace modify expression with conditional modify expression
33 using current basic block's condition.
34 o Merge all basic blocks
35 o Replace phi nodes with conditional modify expr
36 o Merge all basic blocks into header
38 Sample transformation:
40 INPUT
41 -----
43 # i_23 = PHI <0(0), i_18(10)>;
44 <L0>:;
45 j_15 = A[i_23];
46 if (j_15 > 41) goto <L1>; else goto <L17>;
48 <L17>:;
49 goto <bb 3> (<L3>);
51 <L1>:;
53 # iftmp.2_4 = PHI <0(8), 42(2)>;
54 <L3>:;
55 A[i_23] = iftmp.2_4;
56 i_18 = i_23 + 1;
57 if (i_18 <= 15) goto <L19>; else goto <L18>;
59 <L19>:;
60 goto <bb 1> (<L0>);
62 <L18>:;
64 OUTPUT
65 ------
67 # i_23 = PHI <0(0), i_18(10)>;
68 <L0>:;
69 j_15 = A[i_23];
71 <L3>:;
72 iftmp.2_4 = j_15 > 41 ? 42 : 0;
73 A[i_23] = iftmp.2_4;
74 i_18 = i_23 + 1;
75 if (i_18 <= 15) goto <L19>; else goto <L18>;
77 <L19>:;
78 goto <bb 1> (<L0>);
80 <L18>:;
83 #include "config.h"
84 #include "system.h"
85 #include "coretypes.h"
86 #include "backend.h"
87 #include "rtl.h"
88 #include "tree.h"
89 #include "gimple.h"
90 #include "cfghooks.h"
91 #include "tree-pass.h"
92 #include "ssa.h"
93 #include "expmed.h"
94 #include "expr.h"
95 #include "optabs-query.h"
96 #include "gimple-pretty-print.h"
97 #include "alias.h"
98 #include "fold-const.h"
99 #include "stor-layout.h"
100 #include "gimple-iterator.h"
101 #include "gimple-fold.h"
102 #include "gimplify.h"
103 #include "gimplify-me.h"
104 #include "tree-cfg.h"
105 #include "tree-into-ssa.h"
106 #include "tree-ssa.h"
107 #include "cfgloop.h"
108 #include "tree-data-ref.h"
109 #include "tree-scalar-evolution.h"
110 #include "tree-ssa-loop.h"
111 #include "tree-ssa-loop-niter.h"
112 #include "tree-ssa-loop-ivopts.h"
113 #include "tree-ssa-address.h"
114 #include "dbgcnt.h"
115 #include "tree-hash-traits.h"
116 #include "varasm.h"
117 #include "builtins.h"
118 #include "cfganal.h"
119 #include "internal-fn.h"
120 #include "fold-const.h"
121 #include "tree-ssa-sccvn.h"
122 #include "tree-cfgcleanup.h"
123 #include "tree-ssa-dse.h"
124 #include "tree-vectorizer.h"
125 #include "tree-eh.h"
127 /* For lang_hooks.types.type_for_mode. */
128 #include "langhooks.h"
130 /* Only handle PHIs with no more arguments unless we are asked to by
131 simd pragma. */
132 #define MAX_PHI_ARG_NUM \
133 ((unsigned) param_max_tree_if_conversion_phi_args)
135 /* True if we've converted a statement that was only executed when some
136 condition C was true, and if for correctness we need to predicate the
137 statement to ensure that it is a no-op when C is false. See
138 predicate_statements for the kinds of predication we support. */
139 static bool need_to_predicate;
141 /* True if we have to rewrite stmts that may invoke undefined behavior
142 when a condition C was false so it doesn't if it is always executed.
143 See predicate_statements for the kinds of predication we support. */
144 static bool need_to_rewrite_undefined;
146 /* Indicate if there are any complicated PHIs that need to be handled in
147 if-conversion. Complicated PHI has more than two arguments and can't
148 be degenerated to two arguments PHI. See more information in comment
149 before phi_convertible_by_degenerating_args. */
150 static bool any_complicated_phi;
152 /* True if we have bitfield accesses we can lower. */
153 static bool need_to_lower_bitfields;
155 /* True if there is any ifcvting to be done. */
156 static bool need_to_ifcvt;
158 /* Hash for struct innermost_loop_behavior. It depends on the user to
159 free the memory. */
161 struct innermost_loop_behavior_hash : nofree_ptr_hash <innermost_loop_behavior>
163 static inline hashval_t hash (const value_type &);
164 static inline bool equal (const value_type &,
165 const compare_type &);
168 inline hashval_t
169 innermost_loop_behavior_hash::hash (const value_type &e)
171 hashval_t hash;
173 hash = iterative_hash_expr (e->base_address, 0);
174 hash = iterative_hash_expr (e->offset, hash);
175 hash = iterative_hash_expr (e->init, hash);
176 return iterative_hash_expr (e->step, hash);
179 inline bool
180 innermost_loop_behavior_hash::equal (const value_type &e1,
181 const compare_type &e2)
183 if ((e1->base_address && !e2->base_address)
184 || (!e1->base_address && e2->base_address)
185 || (!e1->offset && e2->offset)
186 || (e1->offset && !e2->offset)
187 || (!e1->init && e2->init)
188 || (e1->init && !e2->init)
189 || (!e1->step && e2->step)
190 || (e1->step && !e2->step))
191 return false;
193 if (e1->base_address && e2->base_address
194 && !operand_equal_p (e1->base_address, e2->base_address, 0))
195 return false;
196 if (e1->offset && e2->offset
197 && !operand_equal_p (e1->offset, e2->offset, 0))
198 return false;
199 if (e1->init && e2->init
200 && !operand_equal_p (e1->init, e2->init, 0))
201 return false;
202 if (e1->step && e2->step
203 && !operand_equal_p (e1->step, e2->step, 0))
204 return false;
206 return true;
209 /* List of basic blocks in if-conversion-suitable order. */
210 static basic_block *ifc_bbs;
212 /* Hash table to store <DR's innermost loop behavior, DR> pairs. */
213 static hash_map<innermost_loop_behavior_hash,
214 data_reference_p> *innermost_DR_map;
216 /* Hash table to store <base reference, DR> pairs. */
217 static hash_map<tree_operand_hash, data_reference_p> *baseref_DR_map;
219 /* List of redundant SSA names: the first should be replaced by the second. */
220 static vec< std::pair<tree, tree> > redundant_ssa_names;
222 /* Structure used to predicate basic blocks. This is attached to the
223 ->aux field of the BBs in the loop to be if-converted. */
224 struct bb_predicate {
226 /* The condition under which this basic block is executed. */
227 tree predicate;
229 /* PREDICATE is gimplified, and the sequence of statements is
230 recorded here, in order to avoid the duplication of computations
231 that occur in previous conditions. See PR44483. */
232 gimple_seq predicate_gimplified_stmts;
235 /* Returns true when the basic block BB has a predicate. */
237 static inline bool
238 bb_has_predicate (basic_block bb)
240 return bb->aux != NULL;
243 /* Returns the gimplified predicate for basic block BB. */
245 static inline tree
246 bb_predicate (basic_block bb)
248 return ((struct bb_predicate *) bb->aux)->predicate;
251 /* Sets the gimplified predicate COND for basic block BB. */
253 static inline void
254 set_bb_predicate (basic_block bb, tree cond)
256 gcc_assert ((TREE_CODE (cond) == TRUTH_NOT_EXPR
257 && is_gimple_val (TREE_OPERAND (cond, 0)))
258 || is_gimple_val (cond));
259 ((struct bb_predicate *) bb->aux)->predicate = cond;
262 /* Returns the sequence of statements of the gimplification of the
263 predicate for basic block BB. */
265 static inline gimple_seq
266 bb_predicate_gimplified_stmts (basic_block bb)
268 return ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts;
271 /* Sets the sequence of statements STMTS of the gimplification of the
272 predicate for basic block BB. */
274 static inline void
275 set_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
277 ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts = stmts;
280 /* Adds the sequence of statements STMTS to the sequence of statements
281 of the predicate for basic block BB. */
283 static inline void
284 add_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
286 /* We might have updated some stmts in STMTS via force_gimple_operand
287 calling fold_stmt and that producing multiple stmts. Delink immediate
288 uses so update_ssa after loop versioning doesn't get confused for
289 the not yet inserted predicates.
290 ??? This should go away once we reliably avoid updating stmts
291 not in any BB. */
292 for (gimple_stmt_iterator gsi = gsi_start (stmts);
293 !gsi_end_p (gsi); gsi_next (&gsi))
295 gimple *stmt = gsi_stmt (gsi);
296 delink_stmt_imm_use (stmt);
297 gimple_set_modified (stmt, true);
299 gimple_seq_add_seq_without_update
300 (&(((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts), stmts);
303 /* Initializes to TRUE the predicate of basic block BB. */
305 static inline void
306 init_bb_predicate (basic_block bb)
308 bb->aux = XNEW (struct bb_predicate);
309 set_bb_predicate_gimplified_stmts (bb, NULL);
310 set_bb_predicate (bb, boolean_true_node);
313 /* Release the SSA_NAMEs associated with the predicate of basic block BB. */
315 static inline void
316 release_bb_predicate (basic_block bb)
318 gimple_seq stmts = bb_predicate_gimplified_stmts (bb);
319 if (stmts)
321 /* Ensure that these stmts haven't yet been added to a bb. */
322 if (flag_checking)
323 for (gimple_stmt_iterator i = gsi_start (stmts);
324 !gsi_end_p (i); gsi_next (&i))
325 gcc_assert (! gimple_bb (gsi_stmt (i)));
327 /* Discard them. */
328 gimple_seq_discard (stmts);
329 set_bb_predicate_gimplified_stmts (bb, NULL);
333 /* Free the predicate of basic block BB. */
335 static inline void
336 free_bb_predicate (basic_block bb)
338 if (!bb_has_predicate (bb))
339 return;
341 release_bb_predicate (bb);
342 free (bb->aux);
343 bb->aux = NULL;
346 /* Reinitialize predicate of BB with the true predicate. */
348 static inline void
349 reset_bb_predicate (basic_block bb)
351 if (!bb_has_predicate (bb))
352 init_bb_predicate (bb);
353 else
355 release_bb_predicate (bb);
356 set_bb_predicate (bb, boolean_true_node);
360 /* Returns a new SSA_NAME of type TYPE that is assigned the value of
361 the expression EXPR. Inserts the statement created for this
362 computation before GSI and leaves the iterator GSI at the same
363 statement. */
365 static tree
366 ifc_temp_var (tree type, tree expr, gimple_stmt_iterator *gsi)
368 tree new_name = make_temp_ssa_name (type, NULL, "_ifc_");
369 gimple *stmt = gimple_build_assign (new_name, expr);
370 gimple_set_vuse (stmt, gimple_vuse (gsi_stmt (*gsi)));
371 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
372 return new_name;
375 /* Return true when COND is a false predicate. */
377 static inline bool
378 is_false_predicate (tree cond)
380 return (cond != NULL_TREE
381 && (cond == boolean_false_node
382 || integer_zerop (cond)));
385 /* Return true when COND is a true predicate. */
387 static inline bool
388 is_true_predicate (tree cond)
390 return (cond == NULL_TREE
391 || cond == boolean_true_node
392 || integer_onep (cond));
395 /* Returns true when BB has a predicate that is not trivial: true or
396 NULL_TREE. */
398 static inline bool
399 is_predicated (basic_block bb)
401 return !is_true_predicate (bb_predicate (bb));
404 /* Parses the predicate COND and returns its comparison code and
405 operands OP0 and OP1. */
407 static enum tree_code
408 parse_predicate (tree cond, tree *op0, tree *op1)
410 gimple *s;
412 if (TREE_CODE (cond) == SSA_NAME
413 && is_gimple_assign (s = SSA_NAME_DEF_STMT (cond)))
415 if (TREE_CODE_CLASS (gimple_assign_rhs_code (s)) == tcc_comparison)
417 *op0 = gimple_assign_rhs1 (s);
418 *op1 = gimple_assign_rhs2 (s);
419 return gimple_assign_rhs_code (s);
422 else if (gimple_assign_rhs_code (s) == TRUTH_NOT_EXPR)
424 tree op = gimple_assign_rhs1 (s);
425 tree type = TREE_TYPE (op);
426 enum tree_code code = parse_predicate (op, op0, op1);
428 return code == ERROR_MARK ? ERROR_MARK
429 : invert_tree_comparison (code, HONOR_NANS (type));
432 return ERROR_MARK;
435 if (COMPARISON_CLASS_P (cond))
437 *op0 = TREE_OPERAND (cond, 0);
438 *op1 = TREE_OPERAND (cond, 1);
439 return TREE_CODE (cond);
442 return ERROR_MARK;
445 /* Returns the fold of predicate C1 OR C2 at location LOC. */
447 static tree
448 fold_or_predicates (location_t loc, tree c1, tree c2)
450 tree op1a, op1b, op2a, op2b;
451 enum tree_code code1 = parse_predicate (c1, &op1a, &op1b);
452 enum tree_code code2 = parse_predicate (c2, &op2a, &op2b);
454 if (code1 != ERROR_MARK && code2 != ERROR_MARK)
456 tree t = maybe_fold_or_comparisons (boolean_type_node, code1, op1a, op1b,
457 code2, op2a, op2b);
458 if (t)
459 return t;
462 return fold_build2_loc (loc, TRUTH_OR_EXPR, boolean_type_node, c1, c2);
465 /* Returns either a COND_EXPR or the folded expression if the folded
466 expression is a MIN_EXPR, a MAX_EXPR, an ABS_EXPR,
467 a constant or a SSA_NAME. */
469 static tree
470 fold_build_cond_expr (tree type, tree cond, tree rhs, tree lhs)
472 /* If COND is comparison r != 0 and r has boolean type, convert COND
473 to SSA_NAME to accept by vect bool pattern. */
474 if (TREE_CODE (cond) == NE_EXPR)
476 tree op0 = TREE_OPERAND (cond, 0);
477 tree op1 = TREE_OPERAND (cond, 1);
478 if (TREE_CODE (op0) == SSA_NAME
479 && TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE
480 && (integer_zerop (op1)))
481 cond = op0;
484 gimple_match_op cexpr (gimple_match_cond::UNCOND, COND_EXPR,
485 type, cond, rhs, lhs);
486 if (cexpr.resimplify (NULL, follow_all_ssa_edges))
488 if (gimple_simplified_result_is_gimple_val (&cexpr))
489 return cexpr.ops[0];
490 else if (cexpr.code == ABS_EXPR)
491 return build1 (ABS_EXPR, type, cexpr.ops[0]);
492 else if (cexpr.code == MIN_EXPR
493 || cexpr.code == MAX_EXPR)
494 return build2 ((tree_code)cexpr.code, type, cexpr.ops[0], cexpr.ops[1]);
497 return build3 (COND_EXPR, type, cond, rhs, lhs);
500 /* Add condition NC to the predicate list of basic block BB. LOOP is
501 the loop to be if-converted. Use predicate of cd-equivalent block
502 for join bb if it exists: we call basic blocks bb1 and bb2
503 cd-equivalent if they are executed under the same condition. */
505 static inline void
506 add_to_predicate_list (class loop *loop, basic_block bb, tree nc)
508 tree bc, *tp;
509 basic_block dom_bb;
511 if (is_true_predicate (nc))
512 return;
514 /* If dominance tells us this basic block is always executed,
515 don't record any predicates for it. */
516 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
517 return;
519 dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb);
520 /* We use notion of cd equivalence to get simpler predicate for
521 join block, e.g. if join block has 2 predecessors with predicates
522 p1 & p2 and p1 & !p2, we'd like to get p1 for it instead of
523 p1 & p2 | p1 & !p2. */
524 if (dom_bb != loop->header
525 && get_immediate_dominator (CDI_POST_DOMINATORS, dom_bb) == bb)
527 gcc_assert (flow_bb_inside_loop_p (loop, dom_bb));
528 bc = bb_predicate (dom_bb);
529 if (!is_true_predicate (bc))
530 set_bb_predicate (bb, bc);
531 else
532 gcc_assert (is_true_predicate (bb_predicate (bb)));
533 if (dump_file && (dump_flags & TDF_DETAILS))
534 fprintf (dump_file, "Use predicate of bb#%d for bb#%d\n",
535 dom_bb->index, bb->index);
536 return;
539 if (!is_predicated (bb))
540 bc = nc;
541 else
543 bc = bb_predicate (bb);
544 bc = fold_or_predicates (EXPR_LOCATION (bc), nc, bc);
545 if (is_true_predicate (bc))
547 reset_bb_predicate (bb);
548 return;
552 /* Allow a TRUTH_NOT_EXPR around the main predicate. */
553 if (TREE_CODE (bc) == TRUTH_NOT_EXPR)
554 tp = &TREE_OPERAND (bc, 0);
555 else
556 tp = &bc;
557 if (!is_gimple_val (*tp))
559 gimple_seq stmts;
560 *tp = force_gimple_operand (*tp, &stmts, true, NULL_TREE);
561 add_bb_predicate_gimplified_stmts (bb, stmts);
563 set_bb_predicate (bb, bc);
566 /* Add the condition COND to the previous condition PREV_COND, and add
567 this to the predicate list of the destination of edge E. LOOP is
568 the loop to be if-converted. */
570 static void
571 add_to_dst_predicate_list (class loop *loop, edge e,
572 tree prev_cond, tree cond)
574 if (!flow_bb_inside_loop_p (loop, e->dest))
575 return;
577 if (!is_true_predicate (prev_cond))
578 cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
579 prev_cond, cond);
581 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, e->dest))
582 add_to_predicate_list (loop, e->dest, cond);
585 /* Return true if one of the successor edges of BB exits LOOP. */
587 static bool
588 bb_with_exit_edge_p (class loop *loop, basic_block bb)
590 edge e;
591 edge_iterator ei;
593 FOR_EACH_EDGE (e, ei, bb->succs)
594 if (loop_exit_edge_p (loop, e))
595 return true;
597 return false;
600 /* Given PHI which has more than two arguments, this function checks if
601 it's if-convertible by degenerating its arguments. Specifically, if
602 below two conditions are satisfied:
604 1) Number of PHI arguments with different values equals to 2 and one
605 argument has the only occurrence.
606 2) The edge corresponding to the unique argument isn't critical edge.
608 Such PHI can be handled as PHIs have only two arguments. For example,
609 below PHI:
611 res = PHI <A_1(e1), A_1(e2), A_2(e3)>;
613 can be transformed into:
615 res = (predicate of e3) ? A_2 : A_1;
617 Return TRUE if it is the case, FALSE otherwise. */
619 static bool
620 phi_convertible_by_degenerating_args (gphi *phi)
622 edge e;
623 tree arg, t1 = NULL, t2 = NULL;
624 unsigned int i, i1 = 0, i2 = 0, n1 = 0, n2 = 0;
625 unsigned int num_args = gimple_phi_num_args (phi);
627 gcc_assert (num_args > 2);
629 for (i = 0; i < num_args; i++)
631 arg = gimple_phi_arg_def (phi, i);
632 if (t1 == NULL || operand_equal_p (t1, arg, 0))
634 n1++;
635 i1 = i;
636 t1 = arg;
638 else if (t2 == NULL || operand_equal_p (t2, arg, 0))
640 n2++;
641 i2 = i;
642 t2 = arg;
644 else
645 return false;
648 if (n1 != 1 && n2 != 1)
649 return false;
651 /* Check if the edge corresponding to the unique arg is critical. */
652 e = gimple_phi_arg_edge (phi, (n1 == 1) ? i1 : i2);
653 if (EDGE_COUNT (e->src->succs) > 1)
654 return false;
656 return true;
659 /* Return true when PHI is if-convertible. PHI is part of loop LOOP
660 and it belongs to basic block BB. Note at this point, it is sure
661 that PHI is if-convertible. This function updates global variable
662 ANY_COMPLICATED_PHI if PHI is complicated. */
664 static bool
665 if_convertible_phi_p (class loop *loop, basic_block bb, gphi *phi)
667 if (dump_file && (dump_flags & TDF_DETAILS))
669 fprintf (dump_file, "-------------------------\n");
670 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
673 if (bb != loop->header
674 && gimple_phi_num_args (phi) > 2
675 && !phi_convertible_by_degenerating_args (phi))
676 any_complicated_phi = true;
678 return true;
681 /* Records the status of a data reference. This struct is attached to
682 each DR->aux field. */
684 struct ifc_dr {
685 bool rw_unconditionally;
686 bool w_unconditionally;
687 bool written_at_least_once;
689 tree rw_predicate;
690 tree w_predicate;
691 tree base_w_predicate;
694 #define IFC_DR(DR) ((struct ifc_dr *) (DR)->aux)
695 #define DR_BASE_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->written_at_least_once)
696 #define DR_RW_UNCONDITIONALLY(DR) (IFC_DR (DR)->rw_unconditionally)
697 #define DR_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->w_unconditionally)
699 /* Iterates over DR's and stores refs, DR and base refs, DR pairs in
700 HASH tables. While storing them in HASH table, it checks if the
701 reference is unconditionally read or written and stores that as a flag
702 information. For base reference it checks if it is written atlest once
703 unconditionally and stores it as flag information along with DR.
704 In other words for every data reference A in STMT there exist other
705 accesses to a data reference with the same base with predicates that
706 add up (OR-up) to the true predicate: this ensures that the data
707 reference A is touched (read or written) on every iteration of the
708 if-converted loop. */
709 static void
710 hash_memrefs_baserefs_and_store_DRs_read_written_info (data_reference_p a)
713 data_reference_p *master_dr, *base_master_dr;
714 tree base_ref = DR_BASE_OBJECT (a);
715 innermost_loop_behavior *innermost = &DR_INNERMOST (a);
716 tree ca = bb_predicate (gimple_bb (DR_STMT (a)));
717 bool exist1, exist2;
719 master_dr = &innermost_DR_map->get_or_insert (innermost, &exist1);
720 if (!exist1)
721 *master_dr = a;
723 if (DR_IS_WRITE (a))
725 IFC_DR (*master_dr)->w_predicate
726 = fold_or_predicates (UNKNOWN_LOCATION, ca,
727 IFC_DR (*master_dr)->w_predicate);
728 if (is_true_predicate (IFC_DR (*master_dr)->w_predicate))
729 DR_W_UNCONDITIONALLY (*master_dr) = true;
731 IFC_DR (*master_dr)->rw_predicate
732 = fold_or_predicates (UNKNOWN_LOCATION, ca,
733 IFC_DR (*master_dr)->rw_predicate);
734 if (is_true_predicate (IFC_DR (*master_dr)->rw_predicate))
735 DR_RW_UNCONDITIONALLY (*master_dr) = true;
737 if (DR_IS_WRITE (a))
739 base_master_dr = &baseref_DR_map->get_or_insert (base_ref, &exist2);
740 if (!exist2)
741 *base_master_dr = a;
742 IFC_DR (*base_master_dr)->base_w_predicate
743 = fold_or_predicates (UNKNOWN_LOCATION, ca,
744 IFC_DR (*base_master_dr)->base_w_predicate);
745 if (is_true_predicate (IFC_DR (*base_master_dr)->base_w_predicate))
746 DR_BASE_W_UNCONDITIONALLY (*base_master_dr) = true;
750 /* Return TRUE if can prove the index IDX of an array reference REF is
751 within array bound. Return false otherwise. */
753 static bool
754 idx_within_array_bound (tree ref, tree *idx, void *dta)
756 wi::overflow_type overflow;
757 widest_int niter, valid_niter, delta, wi_step;
758 tree ev, init, step;
759 tree low, high;
760 class loop *loop = (class loop*) dta;
762 /* Only support within-bound access for array references. */
763 if (TREE_CODE (ref) != ARRAY_REF)
764 return false;
766 /* For arrays that might have flexible sizes, it is not guaranteed that they
767 do not extend over their declared size. */
768 if (array_ref_flexible_size_p (ref))
769 return false;
771 ev = analyze_scalar_evolution (loop, *idx);
772 ev = instantiate_parameters (loop, ev);
773 init = initial_condition (ev);
774 step = evolution_part_in_loop_num (ev, loop->num);
776 if (!init || TREE_CODE (init) != INTEGER_CST
777 || (step && TREE_CODE (step) != INTEGER_CST))
778 return false;
780 low = array_ref_low_bound (ref);
781 high = array_ref_up_bound (ref);
783 /* The case of nonconstant bounds could be handled, but it would be
784 complicated. */
785 if (TREE_CODE (low) != INTEGER_CST
786 || !high || TREE_CODE (high) != INTEGER_CST)
787 return false;
789 /* Check if the intial idx is within bound. */
790 if (wi::to_widest (init) < wi::to_widest (low)
791 || wi::to_widest (init) > wi::to_widest (high))
792 return false;
794 /* The idx is always within bound. */
795 if (!step || integer_zerop (step))
796 return true;
798 if (!max_loop_iterations (loop, &niter))
799 return false;
801 if (wi::to_widest (step) < 0)
803 delta = wi::to_widest (init) - wi::to_widest (low);
804 wi_step = -wi::to_widest (step);
806 else
808 delta = wi::to_widest (high) - wi::to_widest (init);
809 wi_step = wi::to_widest (step);
812 valid_niter = wi::div_floor (delta, wi_step, SIGNED, &overflow);
813 /* The iteration space of idx is within array bound. */
814 if (!overflow && niter <= valid_niter)
815 return true;
817 return false;
820 /* Return TRUE if ref is a within bound array reference. */
822 static bool
823 ref_within_array_bound (gimple *stmt, tree ref)
825 class loop *loop = loop_containing_stmt (stmt);
827 gcc_assert (loop != NULL);
828 return for_each_index (&ref, idx_within_array_bound, loop);
832 /* Given a memory reference expression T, return TRUE if base object
833 it refers to is writable. The base object of a memory reference
834 is the main object being referenced, which is returned by function
835 get_base_address. */
837 static bool
838 base_object_writable (tree ref)
840 tree base_tree = get_base_address (ref);
842 return (base_tree
843 && DECL_P (base_tree)
844 && decl_binds_to_current_def_p (base_tree)
845 && !TREE_READONLY (base_tree));
848 /* Return true when the memory references of STMT won't trap in the
849 if-converted code. There are two things that we have to check for:
851 - writes to memory occur to writable memory: if-conversion of
852 memory writes transforms the conditional memory writes into
853 unconditional writes, i.e. "if (cond) A[i] = foo" is transformed
854 into "A[i] = cond ? foo : A[i]", and as the write to memory may not
855 be executed at all in the original code, it may be a readonly
856 memory. To check that A is not const-qualified, we check that
857 there exists at least an unconditional write to A in the current
858 function.
860 - reads or writes to memory are valid memory accesses for every
861 iteration. To check that the memory accesses are correctly formed
862 and that we are allowed to read and write in these locations, we
863 check that the memory accesses to be if-converted occur at every
864 iteration unconditionally.
866 Returns true for the memory reference in STMT, same memory reference
867 is read or written unconditionally atleast once and the base memory
868 reference is written unconditionally once. This is to check reference
869 will not write fault. Also retuns true if the memory reference is
870 unconditionally read once then we are conditionally writing to memory
871 which is defined as read and write and is bound to the definition
872 we are seeing. */
873 static bool
874 ifcvt_memrefs_wont_trap (gimple *stmt, vec<data_reference_p> drs)
876 /* If DR didn't see a reference here we can't use it to tell
877 whether the ref traps or not. */
878 if (gimple_uid (stmt) == 0)
879 return false;
881 data_reference_p *master_dr, *base_master_dr;
882 data_reference_p a = drs[gimple_uid (stmt) - 1];
884 tree base = DR_BASE_OBJECT (a);
885 innermost_loop_behavior *innermost = &DR_INNERMOST (a);
887 gcc_assert (DR_STMT (a) == stmt);
888 gcc_assert (DR_BASE_ADDRESS (a) || DR_OFFSET (a)
889 || DR_INIT (a) || DR_STEP (a));
891 master_dr = innermost_DR_map->get (innermost);
892 gcc_assert (master_dr != NULL);
894 base_master_dr = baseref_DR_map->get (base);
896 /* If a is unconditionally written to it doesn't trap. */
897 if (DR_W_UNCONDITIONALLY (*master_dr))
898 return true;
900 /* If a is unconditionally accessed then ...
902 Even a is conditional access, we can treat it as an unconditional
903 one if it's an array reference and all its index are within array
904 bound. */
905 if (DR_RW_UNCONDITIONALLY (*master_dr)
906 || ref_within_array_bound (stmt, DR_REF (a)))
908 /* an unconditional read won't trap. */
909 if (DR_IS_READ (a))
910 return true;
912 /* an unconditionaly write won't trap if the base is written
913 to unconditionally. */
914 if (base_master_dr
915 && DR_BASE_W_UNCONDITIONALLY (*base_master_dr))
916 return flag_store_data_races;
917 /* or the base is known to be not readonly. */
918 else if (base_object_writable (DR_REF (a)))
919 return flag_store_data_races;
922 return false;
925 /* Return true if STMT could be converted into a masked load or store
926 (conditional load or store based on a mask computed from bb predicate). */
928 static bool
929 ifcvt_can_use_mask_load_store (gimple *stmt)
931 /* Check whether this is a load or store. */
932 tree lhs = gimple_assign_lhs (stmt);
933 bool is_load;
934 tree ref;
935 if (gimple_store_p (stmt))
937 if (!is_gimple_val (gimple_assign_rhs1 (stmt)))
938 return false;
939 is_load = false;
940 ref = lhs;
942 else if (gimple_assign_load_p (stmt))
944 is_load = true;
945 ref = gimple_assign_rhs1 (stmt);
947 else
948 return false;
950 if (may_be_nonaddressable_p (ref))
951 return false;
953 /* Mask should be integer mode of the same size as the load/store
954 mode. */
955 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
956 if (!int_mode_for_mode (mode).exists () || VECTOR_MODE_P (mode))
957 return false;
959 if (can_vec_mask_load_store_p (mode, VOIDmode, is_load))
960 return true;
962 return false;
965 /* Return true if STMT could be converted from an operation that is
966 unconditional to one that is conditional on a bb predicate mask. */
968 static bool
969 ifcvt_can_predicate (gimple *stmt)
971 basic_block bb = gimple_bb (stmt);
973 if (!(flag_tree_loop_vectorize || bb->loop_father->force_vectorize)
974 || bb->loop_father->dont_vectorize
975 || gimple_has_volatile_ops (stmt))
976 return false;
978 if (gimple_assign_single_p (stmt))
979 return ifcvt_can_use_mask_load_store (stmt);
981 tree_code code = gimple_assign_rhs_code (stmt);
982 tree lhs_type = TREE_TYPE (gimple_assign_lhs (stmt));
983 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
984 if (!types_compatible_p (lhs_type, rhs_type))
985 return false;
986 internal_fn cond_fn = get_conditional_internal_fn (code);
987 return (cond_fn != IFN_LAST
988 && vectorized_internal_fn_supported_p (cond_fn, lhs_type));
991 /* Return true when STMT is if-convertible.
993 GIMPLE_ASSIGN statement is not if-convertible if,
994 - it is not movable,
995 - it could trap,
996 - LHS is not var decl. */
998 static bool
999 if_convertible_gimple_assign_stmt_p (gimple *stmt,
1000 vec<data_reference_p> refs)
1002 tree lhs = gimple_assign_lhs (stmt);
1004 if (dump_file && (dump_flags & TDF_DETAILS))
1006 fprintf (dump_file, "-------------------------\n");
1007 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1010 if (!is_gimple_reg_type (TREE_TYPE (lhs)))
1011 return false;
1013 /* Some of these constrains might be too conservative. */
1014 if (stmt_ends_bb_p (stmt)
1015 || gimple_has_volatile_ops (stmt)
1016 || (TREE_CODE (lhs) == SSA_NAME
1017 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
1018 || gimple_has_side_effects (stmt))
1020 if (dump_file && (dump_flags & TDF_DETAILS))
1021 fprintf (dump_file, "stmt not suitable for ifcvt\n");
1022 return false;
1025 /* tree-into-ssa.cc uses GF_PLF_1, so avoid it, because
1026 in between if_convertible_loop_p and combine_blocks
1027 we can perform loop versioning. */
1028 gimple_set_plf (stmt, GF_PLF_2, false);
1030 if ((! gimple_vuse (stmt)
1031 || gimple_could_trap_p_1 (stmt, false, false)
1032 || ! ifcvt_memrefs_wont_trap (stmt, refs))
1033 && gimple_could_trap_p (stmt))
1035 if (ifcvt_can_predicate (stmt))
1037 gimple_set_plf (stmt, GF_PLF_2, true);
1038 need_to_predicate = true;
1039 return true;
1041 if (dump_file && (dump_flags & TDF_DETAILS))
1042 fprintf (dump_file, "tree could trap...\n");
1043 return false;
1045 else if ((INTEGRAL_TYPE_P (TREE_TYPE (lhs))
1046 || POINTER_TYPE_P (TREE_TYPE (lhs)))
1047 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs))
1048 && arith_code_with_undefined_signed_overflow
1049 (gimple_assign_rhs_code (stmt)))
1050 /* We have to rewrite stmts with undefined overflow. */
1051 need_to_rewrite_undefined = true;
1053 /* When if-converting stores force versioning, likewise if we
1054 ended up generating store data races. */
1055 if (gimple_vdef (stmt))
1056 need_to_predicate = true;
1058 return true;
1061 /* Return true when STMT is if-convertible.
1063 A statement is if-convertible if:
1064 - it is an if-convertible GIMPLE_ASSIGN,
1065 - it is a GIMPLE_LABEL or a GIMPLE_COND,
1066 - it is builtins call. */
1068 static bool
1069 if_convertible_stmt_p (gimple *stmt, vec<data_reference_p> refs)
1071 switch (gimple_code (stmt))
1073 case GIMPLE_LABEL:
1074 case GIMPLE_DEBUG:
1075 case GIMPLE_COND:
1076 return true;
1078 case GIMPLE_ASSIGN:
1079 return if_convertible_gimple_assign_stmt_p (stmt, refs);
1081 case GIMPLE_CALL:
1083 tree fndecl = gimple_call_fndecl (stmt);
1084 if (fndecl)
1086 int flags = gimple_call_flags (stmt);
1087 if ((flags & ECF_CONST)
1088 && !(flags & ECF_LOOPING_CONST_OR_PURE)
1089 /* We can only vectorize some builtins at the moment,
1090 so restrict if-conversion to those. */
1091 && fndecl_built_in_p (fndecl))
1092 return true;
1094 return false;
1097 default:
1098 /* Don't know what to do with 'em so don't do anything. */
1099 if (dump_file && (dump_flags & TDF_DETAILS))
1101 fprintf (dump_file, "don't know what to do\n");
1102 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1104 return false;
1108 /* Assumes that BB has more than 1 predecessors.
1109 Returns false if at least one successor is not on critical edge
1110 and true otherwise. */
1112 static inline bool
1113 all_preds_critical_p (basic_block bb)
1115 edge e;
1116 edge_iterator ei;
1118 FOR_EACH_EDGE (e, ei, bb->preds)
1119 if (EDGE_COUNT (e->src->succs) == 1)
1120 return false;
1121 return true;
1124 /* Return true when BB is if-convertible. This routine does not check
1125 basic block's statements and phis.
1127 A basic block is not if-convertible if:
1128 - it is non-empty and it is after the exit block (in BFS order),
1129 - it is after the exit block but before the latch,
1130 - its edges are not normal.
1132 EXIT_BB is the basic block containing the exit of the LOOP. BB is
1133 inside LOOP. */
1135 static bool
1136 if_convertible_bb_p (class loop *loop, basic_block bb, basic_block exit_bb)
1138 edge e;
1139 edge_iterator ei;
1141 if (dump_file && (dump_flags & TDF_DETAILS))
1142 fprintf (dump_file, "----------[%d]-------------\n", bb->index);
1144 if (EDGE_COUNT (bb->succs) > 2)
1145 return false;
1147 gimple *last = last_stmt (bb);
1148 if (gcall *call = safe_dyn_cast <gcall *> (last))
1149 if (gimple_call_ctrl_altering_p (call))
1150 return false;
1152 if (exit_bb)
1154 if (bb != loop->latch)
1156 if (dump_file && (dump_flags & TDF_DETAILS))
1157 fprintf (dump_file, "basic block after exit bb but before latch\n");
1158 return false;
1160 else if (!empty_block_p (bb))
1162 if (dump_file && (dump_flags & TDF_DETAILS))
1163 fprintf (dump_file, "non empty basic block after exit bb\n");
1164 return false;
1166 else if (bb == loop->latch
1167 && bb != exit_bb
1168 && !dominated_by_p (CDI_DOMINATORS, bb, exit_bb))
1170 if (dump_file && (dump_flags & TDF_DETAILS))
1171 fprintf (dump_file, "latch is not dominated by exit_block\n");
1172 return false;
1176 /* Be less adventurous and handle only normal edges. */
1177 FOR_EACH_EDGE (e, ei, bb->succs)
1178 if (e->flags & (EDGE_EH | EDGE_ABNORMAL | EDGE_IRREDUCIBLE_LOOP))
1180 if (dump_file && (dump_flags & TDF_DETAILS))
1181 fprintf (dump_file, "Difficult to handle edges\n");
1182 return false;
1185 return true;
1188 /* Return true when all predecessor blocks of BB are visited. The
1189 VISITED bitmap keeps track of the visited blocks. */
1191 static bool
1192 pred_blocks_visited_p (basic_block bb, bitmap *visited)
1194 edge e;
1195 edge_iterator ei;
1196 FOR_EACH_EDGE (e, ei, bb->preds)
1197 if (!bitmap_bit_p (*visited, e->src->index))
1198 return false;
1200 return true;
1203 /* Get body of a LOOP in suitable order for if-conversion. It is
1204 caller's responsibility to deallocate basic block list.
1205 If-conversion suitable order is, breadth first sort (BFS) order
1206 with an additional constraint: select a block only if all its
1207 predecessors are already selected. */
1209 static basic_block *
1210 get_loop_body_in_if_conv_order (const class loop *loop)
1212 basic_block *blocks, *blocks_in_bfs_order;
1213 basic_block bb;
1214 bitmap visited;
1215 unsigned int index = 0;
1216 unsigned int visited_count = 0;
1218 gcc_assert (loop->num_nodes);
1219 gcc_assert (loop->latch != EXIT_BLOCK_PTR_FOR_FN (cfun));
1221 blocks = XCNEWVEC (basic_block, loop->num_nodes);
1222 visited = BITMAP_ALLOC (NULL);
1224 blocks_in_bfs_order = get_loop_body_in_bfs_order (loop);
1226 index = 0;
1227 while (index < loop->num_nodes)
1229 bb = blocks_in_bfs_order [index];
1231 if (bb->flags & BB_IRREDUCIBLE_LOOP)
1233 free (blocks_in_bfs_order);
1234 BITMAP_FREE (visited);
1235 free (blocks);
1236 return NULL;
1239 if (!bitmap_bit_p (visited, bb->index))
1241 if (pred_blocks_visited_p (bb, &visited)
1242 || bb == loop->header)
1244 /* This block is now visited. */
1245 bitmap_set_bit (visited, bb->index);
1246 blocks[visited_count++] = bb;
1250 index++;
1252 if (index == loop->num_nodes
1253 && visited_count != loop->num_nodes)
1254 /* Not done yet. */
1255 index = 0;
1257 free (blocks_in_bfs_order);
1258 BITMAP_FREE (visited);
1259 return blocks;
1262 /* Returns true when the analysis of the predicates for all the basic
1263 blocks in LOOP succeeded.
1265 predicate_bbs first allocates the predicates of the basic blocks.
1266 These fields are then initialized with the tree expressions
1267 representing the predicates under which a basic block is executed
1268 in the LOOP. As the loop->header is executed at each iteration, it
1269 has the "true" predicate. Other statements executed under a
1270 condition are predicated with that condition, for example
1272 | if (x)
1273 | S1;
1274 | else
1275 | S2;
1277 S1 will be predicated with "x", and
1278 S2 will be predicated with "!x". */
1280 static void
1281 predicate_bbs (loop_p loop)
1283 unsigned int i;
1285 for (i = 0; i < loop->num_nodes; i++)
1286 init_bb_predicate (ifc_bbs[i]);
1288 for (i = 0; i < loop->num_nodes; i++)
1290 basic_block bb = ifc_bbs[i];
1291 tree cond;
1292 gimple *stmt;
1294 /* The loop latch and loop exit block are always executed and
1295 have no extra conditions to be processed: skip them. */
1296 if (bb == loop->latch
1297 || bb_with_exit_edge_p (loop, bb))
1299 reset_bb_predicate (bb);
1300 continue;
1303 cond = bb_predicate (bb);
1304 stmt = last_stmt (bb);
1305 if (stmt && gimple_code (stmt) == GIMPLE_COND)
1307 tree c2;
1308 edge true_edge, false_edge;
1309 location_t loc = gimple_location (stmt);
1310 tree c;
1311 /* gcc.dg/fold-bopcond-1.c shows that despite all forwprop passes
1312 conditions can remain unfolded because of multiple uses so
1313 try to re-fold here, especially to get precision changing
1314 conversions sorted out. Do not simply fold the stmt since
1315 this is analysis only. When conditions were embedded in
1316 COND_EXPRs those were folded separately before folding the
1317 COND_EXPR but as they are now outside we have to make sure
1318 to fold them. Do it here - another opportunity would be to
1319 fold predicates as they are inserted. */
1320 gimple_match_op cexpr (gimple_match_cond::UNCOND,
1321 gimple_cond_code (stmt),
1322 boolean_type_node,
1323 gimple_cond_lhs (stmt),
1324 gimple_cond_rhs (stmt));
1325 if (cexpr.resimplify (NULL, follow_all_ssa_edges)
1326 && cexpr.code.is_tree_code ()
1327 && TREE_CODE_CLASS ((tree_code)cexpr.code) == tcc_comparison)
1328 c = build2_loc (loc, (tree_code)cexpr.code, boolean_type_node,
1329 cexpr.ops[0], cexpr.ops[1]);
1330 else
1331 c = build2_loc (loc, gimple_cond_code (stmt),
1332 boolean_type_node,
1333 gimple_cond_lhs (stmt),
1334 gimple_cond_rhs (stmt));
1336 /* Add new condition into destination's predicate list. */
1337 extract_true_false_edges_from_block (gimple_bb (stmt),
1338 &true_edge, &false_edge);
1340 /* If C is true, then TRUE_EDGE is taken. */
1341 add_to_dst_predicate_list (loop, true_edge, unshare_expr (cond),
1342 unshare_expr (c));
1344 /* If C is false, then FALSE_EDGE is taken. */
1345 c2 = build1_loc (loc, TRUTH_NOT_EXPR, boolean_type_node,
1346 unshare_expr (c));
1347 add_to_dst_predicate_list (loop, false_edge,
1348 unshare_expr (cond), c2);
1350 cond = NULL_TREE;
1353 /* If current bb has only one successor, then consider it as an
1354 unconditional goto. */
1355 if (single_succ_p (bb))
1357 basic_block bb_n = single_succ (bb);
1359 /* The successor bb inherits the predicate of its
1360 predecessor. If there is no predicate in the predecessor
1361 bb, then consider the successor bb as always executed. */
1362 if (cond == NULL_TREE)
1363 cond = boolean_true_node;
1365 add_to_predicate_list (loop, bb_n, cond);
1369 /* The loop header is always executed. */
1370 reset_bb_predicate (loop->header);
1371 gcc_assert (bb_predicate_gimplified_stmts (loop->header) == NULL
1372 && bb_predicate_gimplified_stmts (loop->latch) == NULL);
1375 /* Build region by adding loop pre-header and post-header blocks. */
1377 static vec<basic_block>
1378 build_region (class loop *loop)
1380 vec<basic_block> region = vNULL;
1381 basic_block exit_bb = NULL;
1383 gcc_assert (ifc_bbs);
1384 /* The first element is loop pre-header. */
1385 region.safe_push (loop_preheader_edge (loop)->src);
1387 for (unsigned int i = 0; i < loop->num_nodes; i++)
1389 basic_block bb = ifc_bbs[i];
1390 region.safe_push (bb);
1391 /* Find loop postheader. */
1392 edge e;
1393 edge_iterator ei;
1394 FOR_EACH_EDGE (e, ei, bb->succs)
1395 if (loop_exit_edge_p (loop, e))
1397 exit_bb = e->dest;
1398 break;
1401 /* The last element is loop post-header. */
1402 gcc_assert (exit_bb);
1403 region.safe_push (exit_bb);
1404 return region;
1407 /* Return true when LOOP is if-convertible. This is a helper function
1408 for if_convertible_loop_p. REFS and DDRS are initialized and freed
1409 in if_convertible_loop_p. */
1411 static bool
1412 if_convertible_loop_p_1 (class loop *loop, vec<data_reference_p> *refs)
1414 unsigned int i;
1415 basic_block exit_bb = NULL;
1416 vec<basic_block> region;
1418 calculate_dominance_info (CDI_DOMINATORS);
1420 for (i = 0; i < loop->num_nodes; i++)
1422 basic_block bb = ifc_bbs[i];
1424 if (!if_convertible_bb_p (loop, bb, exit_bb))
1425 return false;
1427 if (bb_with_exit_edge_p (loop, bb))
1428 exit_bb = bb;
1431 for (i = 0; i < loop->num_nodes; i++)
1433 basic_block bb = ifc_bbs[i];
1434 gimple_stmt_iterator gsi;
1436 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1437 switch (gimple_code (gsi_stmt (gsi)))
1439 case GIMPLE_LABEL:
1440 case GIMPLE_ASSIGN:
1441 case GIMPLE_CALL:
1442 case GIMPLE_DEBUG:
1443 case GIMPLE_COND:
1444 gimple_set_uid (gsi_stmt (gsi), 0);
1445 break;
1446 default:
1447 return false;
1451 data_reference_p dr;
1453 innermost_DR_map
1454 = new hash_map<innermost_loop_behavior_hash, data_reference_p>;
1455 baseref_DR_map = new hash_map<tree_operand_hash, data_reference_p>;
1457 /* Compute post-dominator tree locally. */
1458 region = build_region (loop);
1459 calculate_dominance_info_for_region (CDI_POST_DOMINATORS, region);
1461 predicate_bbs (loop);
1463 /* Free post-dominator tree since it is not used after predication. */
1464 free_dominance_info_for_region (cfun, CDI_POST_DOMINATORS, region);
1465 region.release ();
1467 for (i = 0; refs->iterate (i, &dr); i++)
1469 tree ref = DR_REF (dr);
1471 dr->aux = XNEW (struct ifc_dr);
1472 DR_BASE_W_UNCONDITIONALLY (dr) = false;
1473 DR_RW_UNCONDITIONALLY (dr) = false;
1474 DR_W_UNCONDITIONALLY (dr) = false;
1475 IFC_DR (dr)->rw_predicate = boolean_false_node;
1476 IFC_DR (dr)->w_predicate = boolean_false_node;
1477 IFC_DR (dr)->base_w_predicate = boolean_false_node;
1478 if (gimple_uid (DR_STMT (dr)) == 0)
1479 gimple_set_uid (DR_STMT (dr), i + 1);
1481 /* If DR doesn't have innermost loop behavior or it's a compound
1482 memory reference, we synthesize its innermost loop behavior
1483 for hashing. */
1484 if (TREE_CODE (ref) == COMPONENT_REF
1485 || TREE_CODE (ref) == IMAGPART_EXPR
1486 || TREE_CODE (ref) == REALPART_EXPR
1487 || !(DR_BASE_ADDRESS (dr) || DR_OFFSET (dr)
1488 || DR_INIT (dr) || DR_STEP (dr)))
1490 while (TREE_CODE (ref) == COMPONENT_REF
1491 || TREE_CODE (ref) == IMAGPART_EXPR
1492 || TREE_CODE (ref) == REALPART_EXPR)
1493 ref = TREE_OPERAND (ref, 0);
1495 memset (&DR_INNERMOST (dr), 0, sizeof (DR_INNERMOST (dr)));
1496 DR_BASE_ADDRESS (dr) = ref;
1498 hash_memrefs_baserefs_and_store_DRs_read_written_info (dr);
1501 for (i = 0; i < loop->num_nodes; i++)
1503 basic_block bb = ifc_bbs[i];
1504 gimple_stmt_iterator itr;
1506 /* Check the if-convertibility of statements in predicated BBs. */
1507 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1508 for (itr = gsi_start_bb (bb); !gsi_end_p (itr); gsi_next (&itr))
1509 if (!if_convertible_stmt_p (gsi_stmt (itr), *refs))
1510 return false;
1513 /* Checking PHIs needs to be done after stmts, as the fact whether there
1514 are any masked loads or stores affects the tests. */
1515 for (i = 0; i < loop->num_nodes; i++)
1517 basic_block bb = ifc_bbs[i];
1518 gphi_iterator itr;
1520 for (itr = gsi_start_phis (bb); !gsi_end_p (itr); gsi_next (&itr))
1521 if (!if_convertible_phi_p (loop, bb, itr.phi ()))
1522 return false;
1525 if (dump_file)
1526 fprintf (dump_file, "Applying if-conversion\n");
1528 return true;
1531 /* Return true when LOOP is if-convertible.
1532 LOOP is if-convertible if:
1533 - it is innermost,
1534 - it has two or more basic blocks,
1535 - it has only one exit,
1536 - loop header is not the exit edge,
1537 - if its basic blocks and phi nodes are if convertible. */
1539 static bool
1540 if_convertible_loop_p (class loop *loop, vec<data_reference_p> *refs)
1542 edge e;
1543 edge_iterator ei;
1544 bool res = false;
1546 /* Handle only innermost loop. */
1547 if (!loop || loop->inner)
1549 if (dump_file && (dump_flags & TDF_DETAILS))
1550 fprintf (dump_file, "not innermost loop\n");
1551 return false;
1554 /* If only one block, no need for if-conversion. */
1555 if (loop->num_nodes <= 2)
1557 if (dump_file && (dump_flags & TDF_DETAILS))
1558 fprintf (dump_file, "less than 2 basic blocks\n");
1559 return false;
1562 /* More than one loop exit is too much to handle. */
1563 if (!single_exit (loop))
1565 if (dump_file && (dump_flags & TDF_DETAILS))
1566 fprintf (dump_file, "multiple exits\n");
1567 return false;
1570 /* If one of the loop header's edge is an exit edge then do not
1571 apply if-conversion. */
1572 FOR_EACH_EDGE (e, ei, loop->header->succs)
1573 if (loop_exit_edge_p (loop, e))
1574 return false;
1576 res = if_convertible_loop_p_1 (loop, refs);
1578 delete innermost_DR_map;
1579 innermost_DR_map = NULL;
1581 delete baseref_DR_map;
1582 baseref_DR_map = NULL;
1584 return res;
1587 /* Return reduc_1 if has_nop.
1589 if (...)
1590 tmp1 = (unsigned type) reduc_1;
1591 tmp2 = tmp1 + rhs2;
1592 reduc_3 = (signed type) tmp2. */
1593 static tree
1594 strip_nop_cond_scalar_reduction (bool has_nop, tree op)
1596 if (!has_nop)
1597 return op;
1599 if (TREE_CODE (op) != SSA_NAME)
1600 return NULL_TREE;
1602 gassign *stmt = safe_dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op));
1603 if (!stmt
1604 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt))
1605 || !tree_nop_conversion_p (TREE_TYPE (op), TREE_TYPE
1606 (gimple_assign_rhs1 (stmt))))
1607 return NULL_TREE;
1609 return gimple_assign_rhs1 (stmt);
1612 /* Returns true if def-stmt for phi argument ARG is simple increment/decrement
1613 which is in predicated basic block.
1614 In fact, the following PHI pattern is searching:
1615 loop-header:
1616 reduc_1 = PHI <..., reduc_2>
1618 if (...)
1619 reduc_3 = ...
1620 reduc_2 = PHI <reduc_1, reduc_3>
1622 ARG_0 and ARG_1 are correspondent PHI arguments.
1623 REDUC, OP0 and OP1 contain reduction stmt and its operands.
1624 EXTENDED is true if PHI has > 2 arguments. */
1626 static bool
1627 is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1,
1628 tree *op0, tree *op1, bool extended, bool* has_nop,
1629 gimple **nop_reduc)
1631 tree lhs, r_op1, r_op2, r_nop1, r_nop2;
1632 gimple *stmt;
1633 gimple *header_phi = NULL;
1634 enum tree_code reduction_op;
1635 basic_block bb = gimple_bb (phi);
1636 class loop *loop = bb->loop_father;
1637 edge latch_e = loop_latch_edge (loop);
1638 imm_use_iterator imm_iter;
1639 use_operand_p use_p;
1640 edge e;
1641 edge_iterator ei;
1642 bool result = *has_nop = false;
1643 if (TREE_CODE (arg_0) != SSA_NAME || TREE_CODE (arg_1) != SSA_NAME)
1644 return false;
1646 if (!extended && gimple_code (SSA_NAME_DEF_STMT (arg_0)) == GIMPLE_PHI)
1648 lhs = arg_1;
1649 header_phi = SSA_NAME_DEF_STMT (arg_0);
1650 stmt = SSA_NAME_DEF_STMT (arg_1);
1652 else if (gimple_code (SSA_NAME_DEF_STMT (arg_1)) == GIMPLE_PHI)
1654 lhs = arg_0;
1655 header_phi = SSA_NAME_DEF_STMT (arg_1);
1656 stmt = SSA_NAME_DEF_STMT (arg_0);
1658 else
1659 return false;
1660 if (gimple_bb (header_phi) != loop->header)
1661 return false;
1663 if (PHI_ARG_DEF_FROM_EDGE (header_phi, latch_e) != PHI_RESULT (phi))
1664 return false;
1666 if (gimple_code (stmt) != GIMPLE_ASSIGN
1667 || gimple_has_volatile_ops (stmt))
1668 return false;
1670 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
1671 return false;
1673 if (!is_predicated (gimple_bb (stmt)))
1674 return false;
1676 /* Check that stmt-block is predecessor of phi-block. */
1677 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->succs)
1678 if (e->dest == bb)
1680 result = true;
1681 break;
1683 if (!result)
1684 return false;
1686 if (!has_single_use (lhs))
1687 return false;
1689 reduction_op = gimple_assign_rhs_code (stmt);
1691 /* Catch something like below
1693 loop-header:
1694 reduc_1 = PHI <..., reduc_2>
1696 if (...)
1697 tmp1 = (unsigned type) reduc_1;
1698 tmp2 = tmp1 + rhs2;
1699 reduc_3 = (signed type) tmp2;
1701 reduc_2 = PHI <reduc_1, reduc_3>
1703 and convert to
1705 reduc_2 = PHI <0, reduc_3>
1706 tmp1 = (unsigned type)reduce_1;
1707 ifcvt = cond_expr ? rhs2 : 0
1708 tmp2 = tmp1 +/- ifcvt;
1709 reduce_1 = (signed type)tmp2; */
1711 if (CONVERT_EXPR_CODE_P (reduction_op))
1713 lhs = gimple_assign_rhs1 (stmt);
1714 if (TREE_CODE (lhs) != SSA_NAME
1715 || !has_single_use (lhs))
1716 return false;
1718 *nop_reduc = stmt;
1719 stmt = SSA_NAME_DEF_STMT (lhs);
1720 if (gimple_bb (stmt) != gimple_bb (*nop_reduc)
1721 || !is_gimple_assign (stmt))
1722 return false;
1724 *has_nop = true;
1725 reduction_op = gimple_assign_rhs_code (stmt);
1728 if (reduction_op != PLUS_EXPR
1729 && reduction_op != MINUS_EXPR
1730 && reduction_op != MULT_EXPR
1731 && reduction_op != BIT_IOR_EXPR
1732 && reduction_op != BIT_XOR_EXPR
1733 && reduction_op != BIT_AND_EXPR)
1734 return false;
1735 r_op1 = gimple_assign_rhs1 (stmt);
1736 r_op2 = gimple_assign_rhs2 (stmt);
1738 r_nop1 = strip_nop_cond_scalar_reduction (*has_nop, r_op1);
1739 r_nop2 = strip_nop_cond_scalar_reduction (*has_nop, r_op2);
1741 /* Make R_OP1 to hold reduction variable. */
1742 if (r_nop2 == PHI_RESULT (header_phi)
1743 && commutative_tree_code (reduction_op))
1745 std::swap (r_op1, r_op2);
1746 std::swap (r_nop1, r_nop2);
1748 else if (r_nop1 != PHI_RESULT (header_phi))
1749 return false;
1751 if (*has_nop)
1753 /* Check that R_NOP1 is used in nop_stmt or in PHI only. */
1754 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_nop1)
1756 gimple *use_stmt = USE_STMT (use_p);
1757 if (is_gimple_debug (use_stmt))
1758 continue;
1759 if (use_stmt == SSA_NAME_DEF_STMT (r_op1))
1760 continue;
1761 if (use_stmt != phi)
1762 return false;
1766 /* Check that R_OP1 is used in reduction stmt or in PHI only. */
1767 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_op1)
1769 gimple *use_stmt = USE_STMT (use_p);
1770 if (is_gimple_debug (use_stmt))
1771 continue;
1772 if (use_stmt == stmt)
1773 continue;
1774 if (gimple_code (use_stmt) != GIMPLE_PHI)
1775 return false;
1778 *op0 = r_op1; *op1 = r_op2;
1779 *reduc = stmt;
1780 return true;
1783 /* Converts conditional scalar reduction into unconditional form, e.g.
1784 bb_4
1785 if (_5 != 0) goto bb_5 else goto bb_6
1786 end_bb_4
1787 bb_5
1788 res_6 = res_13 + 1;
1789 end_bb_5
1790 bb_6
1791 # res_2 = PHI <res_13(4), res_6(5)>
1792 end_bb_6
1794 will be converted into sequence
1795 _ifc__1 = _5 != 0 ? 1 : 0;
1796 res_2 = res_13 + _ifc__1;
1797 Argument SWAP tells that arguments of conditional expression should be
1798 swapped.
1799 Returns rhs of resulting PHI assignment. */
1801 static tree
1802 convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi,
1803 tree cond, tree op0, tree op1, bool swap,
1804 bool has_nop, gimple* nop_reduc)
1806 gimple_stmt_iterator stmt_it;
1807 gimple *new_assign;
1808 tree rhs;
1809 tree rhs1 = gimple_assign_rhs1 (reduc);
1810 tree tmp = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, "_ifc_");
1811 tree c;
1812 enum tree_code reduction_op = gimple_assign_rhs_code (reduc);
1813 tree op_nochange = neutral_op_for_reduction (TREE_TYPE (rhs1), reduction_op, NULL);
1814 gimple_seq stmts = NULL;
1816 if (dump_file && (dump_flags & TDF_DETAILS))
1818 fprintf (dump_file, "Found cond scalar reduction.\n");
1819 print_gimple_stmt (dump_file, reduc, 0, TDF_SLIM);
1822 /* Build cond expression using COND and constant operand
1823 of reduction rhs. */
1824 c = fold_build_cond_expr (TREE_TYPE (rhs1),
1825 unshare_expr (cond),
1826 swap ? op_nochange : op1,
1827 swap ? op1 : op_nochange);
1829 /* Create assignment stmt and insert it at GSI. */
1830 new_assign = gimple_build_assign (tmp, c);
1831 gsi_insert_before (gsi, new_assign, GSI_SAME_STMT);
1832 /* Build rhs for unconditional increment/decrement/logic_operation. */
1833 rhs = gimple_build (&stmts, reduction_op,
1834 TREE_TYPE (rhs1), op0, tmp);
1836 if (has_nop)
1838 rhs = gimple_convert (&stmts,
1839 TREE_TYPE (gimple_assign_lhs (nop_reduc)), rhs);
1840 stmt_it = gsi_for_stmt (nop_reduc);
1841 gsi_remove (&stmt_it, true);
1842 release_defs (nop_reduc);
1844 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
1846 /* Delete original reduction stmt. */
1847 stmt_it = gsi_for_stmt (reduc);
1848 gsi_remove (&stmt_it, true);
1849 release_defs (reduc);
1850 return rhs;
1853 /* Produce condition for all occurrences of ARG in PHI node. */
1855 static tree
1856 gen_phi_arg_condition (gphi *phi, vec<int> *occur,
1857 gimple_stmt_iterator *gsi)
1859 int len;
1860 int i;
1861 tree cond = NULL_TREE;
1862 tree c;
1863 edge e;
1865 len = occur->length ();
1866 gcc_assert (len > 0);
1867 for (i = 0; i < len; i++)
1869 e = gimple_phi_arg_edge (phi, (*occur)[i]);
1870 c = bb_predicate (e->src);
1871 if (is_true_predicate (c))
1873 cond = c;
1874 break;
1876 c = force_gimple_operand_gsi (gsi, unshare_expr (c),
1877 true, NULL_TREE, true, GSI_SAME_STMT);
1878 if (cond != NULL_TREE)
1880 /* Must build OR expression. */
1881 cond = fold_or_predicates (EXPR_LOCATION (c), c, cond);
1882 cond = force_gimple_operand_gsi (gsi, unshare_expr (cond), true,
1883 NULL_TREE, true, GSI_SAME_STMT);
1885 else
1886 cond = c;
1888 gcc_assert (cond != NULL_TREE);
1889 return cond;
1892 /* Replace a scalar PHI node with a COND_EXPR using COND as condition.
1893 This routine can handle PHI nodes with more than two arguments.
1895 For example,
1896 S1: A = PHI <x1(1), x2(5)>
1897 is converted into,
1898 S2: A = cond ? x1 : x2;
1900 The generated code is inserted at GSI that points to the top of
1901 basic block's statement list.
1902 If PHI node has more than two arguments a chain of conditional
1903 expression is produced. */
1906 static void
1907 predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
1909 gimple *new_stmt = NULL, *reduc, *nop_reduc;
1910 tree rhs, res, arg0, arg1, op0, op1, scev;
1911 tree cond;
1912 unsigned int index0;
1913 unsigned int max, args_len;
1914 edge e;
1915 basic_block bb;
1916 unsigned int i;
1917 bool has_nop;
1919 res = gimple_phi_result (phi);
1920 if (virtual_operand_p (res))
1921 return;
1923 if ((rhs = degenerate_phi_result (phi))
1924 || ((scev = analyze_scalar_evolution (gimple_bb (phi)->loop_father,
1925 res))
1926 && !chrec_contains_undetermined (scev)
1927 && scev != res
1928 && (rhs = gimple_phi_arg_def (phi, 0))))
1930 if (dump_file && (dump_flags & TDF_DETAILS))
1932 fprintf (dump_file, "Degenerate phi!\n");
1933 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
1935 new_stmt = gimple_build_assign (res, rhs);
1936 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1937 update_stmt (new_stmt);
1938 return;
1941 bb = gimple_bb (phi);
1942 if (EDGE_COUNT (bb->preds) == 2)
1944 /* Predicate ordinary PHI node with 2 arguments. */
1945 edge first_edge, second_edge;
1946 basic_block true_bb;
1947 first_edge = EDGE_PRED (bb, 0);
1948 second_edge = EDGE_PRED (bb, 1);
1949 cond = bb_predicate (first_edge->src);
1950 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1951 std::swap (first_edge, second_edge);
1952 if (EDGE_COUNT (first_edge->src->succs) > 1)
1954 cond = bb_predicate (second_edge->src);
1955 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1956 cond = TREE_OPERAND (cond, 0);
1957 else
1958 first_edge = second_edge;
1960 else
1961 cond = bb_predicate (first_edge->src);
1962 /* Gimplify the condition to a valid cond-expr conditonal operand. */
1963 cond = force_gimple_operand_gsi (gsi, unshare_expr (cond), true,
1964 NULL_TREE, true, GSI_SAME_STMT);
1965 true_bb = first_edge->src;
1966 if (EDGE_PRED (bb, 1)->src == true_bb)
1968 arg0 = gimple_phi_arg_def (phi, 1);
1969 arg1 = gimple_phi_arg_def (phi, 0);
1971 else
1973 arg0 = gimple_phi_arg_def (phi, 0);
1974 arg1 = gimple_phi_arg_def (phi, 1);
1976 if (is_cond_scalar_reduction (phi, &reduc, arg0, arg1,
1977 &op0, &op1, false, &has_nop,
1978 &nop_reduc))
1980 /* Convert reduction stmt into vectorizable form. */
1981 rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
1982 true_bb != gimple_bb (reduc),
1983 has_nop, nop_reduc);
1984 redundant_ssa_names.safe_push (std::make_pair (res, rhs));
1986 else
1987 /* Build new RHS using selected condition and arguments. */
1988 rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
1989 arg0, arg1);
1990 new_stmt = gimple_build_assign (res, rhs);
1991 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1992 gimple_stmt_iterator new_gsi = gsi_for_stmt (new_stmt);
1993 if (fold_stmt (&new_gsi, follow_all_ssa_edges))
1995 new_stmt = gsi_stmt (new_gsi);
1996 update_stmt (new_stmt);
1999 if (dump_file && (dump_flags & TDF_DETAILS))
2001 fprintf (dump_file, "new phi replacement stmt\n");
2002 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
2004 return;
2007 /* Create hashmap for PHI node which contain vector of argument indexes
2008 having the same value. */
2009 bool swap = false;
2010 hash_map<tree_operand_hash, auto_vec<int> > phi_arg_map;
2011 unsigned int num_args = gimple_phi_num_args (phi);
2012 int max_ind = -1;
2013 /* Vector of different PHI argument values. */
2014 auto_vec<tree> args (num_args);
2016 /* Compute phi_arg_map. */
2017 for (i = 0; i < num_args; i++)
2019 tree arg;
2021 arg = gimple_phi_arg_def (phi, i);
2022 if (!phi_arg_map.get (arg))
2023 args.quick_push (arg);
2024 phi_arg_map.get_or_insert (arg).safe_push (i);
2027 /* Determine element with max number of occurrences. */
2028 max_ind = -1;
2029 max = 1;
2030 args_len = args.length ();
2031 for (i = 0; i < args_len; i++)
2033 unsigned int len;
2034 if ((len = phi_arg_map.get (args[i])->length ()) > max)
2036 max_ind = (int) i;
2037 max = len;
2041 /* Put element with max number of occurences to the end of ARGS. */
2042 if (max_ind != -1 && max_ind +1 != (int) args_len)
2043 std::swap (args[args_len - 1], args[max_ind]);
2045 /* Handle one special case when number of arguments with different values
2046 is equal 2 and one argument has the only occurrence. Such PHI can be
2047 handled as if would have only 2 arguments. */
2048 if (args_len == 2 && phi_arg_map.get (args[0])->length () == 1)
2050 vec<int> *indexes;
2051 indexes = phi_arg_map.get (args[0]);
2052 index0 = (*indexes)[0];
2053 arg0 = args[0];
2054 arg1 = args[1];
2055 e = gimple_phi_arg_edge (phi, index0);
2056 cond = bb_predicate (e->src);
2057 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2059 swap = true;
2060 cond = TREE_OPERAND (cond, 0);
2062 /* Gimplify the condition to a valid cond-expr conditonal operand. */
2063 cond = force_gimple_operand_gsi (gsi, unshare_expr (cond), true,
2064 NULL_TREE, true, GSI_SAME_STMT);
2065 if (!(is_cond_scalar_reduction (phi, &reduc, arg0 , arg1,
2066 &op0, &op1, true, &has_nop, &nop_reduc)))
2067 rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
2068 swap? arg1 : arg0,
2069 swap? arg0 : arg1);
2070 else
2072 /* Convert reduction stmt into vectorizable form. */
2073 rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
2074 swap,has_nop, nop_reduc);
2075 redundant_ssa_names.safe_push (std::make_pair (res, rhs));
2077 new_stmt = gimple_build_assign (res, rhs);
2078 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2079 update_stmt (new_stmt);
2081 else
2083 /* Common case. */
2084 vec<int> *indexes;
2085 tree type = TREE_TYPE (gimple_phi_result (phi));
2086 tree lhs;
2087 arg1 = args[1];
2088 for (i = 0; i < args_len; i++)
2090 arg0 = args[i];
2091 indexes = phi_arg_map.get (args[i]);
2092 if (i != args_len - 1)
2093 lhs = make_temp_ssa_name (type, NULL, "_ifc_");
2094 else
2095 lhs = res;
2096 cond = gen_phi_arg_condition (phi, indexes, gsi);
2097 rhs = fold_build_cond_expr (type, unshare_expr (cond),
2098 arg0, arg1);
2099 new_stmt = gimple_build_assign (lhs, rhs);
2100 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2101 update_stmt (new_stmt);
2102 arg1 = lhs;
2106 if (dump_file && (dump_flags & TDF_DETAILS))
2108 fprintf (dump_file, "new extended phi replacement stmt\n");
2109 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
2113 /* Replaces in LOOP all the scalar phi nodes other than those in the
2114 LOOP->header block with conditional modify expressions. */
2116 static void
2117 predicate_all_scalar_phis (class loop *loop)
2119 basic_block bb;
2120 unsigned int orig_loop_num_nodes = loop->num_nodes;
2121 unsigned int i;
2123 for (i = 1; i < orig_loop_num_nodes; i++)
2125 gphi *phi;
2126 gimple_stmt_iterator gsi;
2127 gphi_iterator phi_gsi;
2128 bb = ifc_bbs[i];
2130 if (bb == loop->header)
2131 continue;
2133 phi_gsi = gsi_start_phis (bb);
2134 if (gsi_end_p (phi_gsi))
2135 continue;
2137 gsi = gsi_after_labels (bb);
2138 while (!gsi_end_p (phi_gsi))
2140 phi = phi_gsi.phi ();
2141 if (virtual_operand_p (gimple_phi_result (phi)))
2142 gsi_next (&phi_gsi);
2143 else
2145 predicate_scalar_phi (phi, &gsi);
2146 remove_phi_node (&phi_gsi, false);
2152 /* Insert in each basic block of LOOP the statements produced by the
2153 gimplification of the predicates. */
2155 static void
2156 insert_gimplified_predicates (loop_p loop)
2158 unsigned int i;
2160 for (i = 0; i < loop->num_nodes; i++)
2162 basic_block bb = ifc_bbs[i];
2163 gimple_seq stmts;
2164 if (!is_predicated (bb))
2165 gcc_assert (bb_predicate_gimplified_stmts (bb) == NULL);
2166 if (!is_predicated (bb))
2168 /* Do not insert statements for a basic block that is not
2169 predicated. Also make sure that the predicate of the
2170 basic block is set to true. */
2171 reset_bb_predicate (bb);
2172 continue;
2175 stmts = bb_predicate_gimplified_stmts (bb);
2176 if (stmts)
2178 if (need_to_predicate)
2180 /* Insert the predicate of the BB just after the label,
2181 as the if-conversion of memory writes will use this
2182 predicate. */
2183 gimple_stmt_iterator gsi = gsi_after_labels (bb);
2184 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2186 else
2188 /* Insert the predicate of the BB at the end of the BB
2189 as this would reduce the register pressure: the only
2190 use of this predicate will be in successor BBs. */
2191 gimple_stmt_iterator gsi = gsi_last_bb (bb);
2193 if (gsi_end_p (gsi)
2194 || stmt_ends_bb_p (gsi_stmt (gsi)))
2195 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2196 else
2197 gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
2200 /* Once the sequence is code generated, set it to NULL. */
2201 set_bb_predicate_gimplified_stmts (bb, NULL);
2206 /* Helper function for predicate_statements. Returns index of existent
2207 mask if it was created for given SIZE and -1 otherwise. */
2209 static int
2210 mask_exists (int size, const vec<int> &vec)
2212 unsigned int ix;
2213 int v;
2214 FOR_EACH_VEC_ELT (vec, ix, v)
2215 if (v == size)
2216 return (int) ix;
2217 return -1;
2220 /* Helper function for predicate_statements. STMT is a memory read or
2221 write and it needs to be predicated by MASK. Return a statement
2222 that does so. */
2224 static gimple *
2225 predicate_load_or_store (gimple_stmt_iterator *gsi, gassign *stmt, tree mask)
2227 gcall *new_stmt;
2229 tree lhs = gimple_assign_lhs (stmt);
2230 tree rhs = gimple_assign_rhs1 (stmt);
2231 tree ref = TREE_CODE (lhs) == SSA_NAME ? rhs : lhs;
2232 mark_addressable (ref);
2233 tree addr = force_gimple_operand_gsi (gsi, build_fold_addr_expr (ref),
2234 true, NULL_TREE, true, GSI_SAME_STMT);
2235 tree ptr = build_int_cst (reference_alias_ptr_type (ref),
2236 get_object_alignment (ref));
2237 /* Copy points-to info if possible. */
2238 if (TREE_CODE (addr) == SSA_NAME && !SSA_NAME_PTR_INFO (addr))
2239 copy_ref_info (build2 (MEM_REF, TREE_TYPE (ref), addr, ptr),
2240 ref);
2241 if (TREE_CODE (lhs) == SSA_NAME)
2243 new_stmt
2244 = gimple_build_call_internal (IFN_MASK_LOAD, 3, addr,
2245 ptr, mask);
2246 gimple_call_set_lhs (new_stmt, lhs);
2247 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
2249 else
2251 new_stmt
2252 = gimple_build_call_internal (IFN_MASK_STORE, 4, addr, ptr,
2253 mask, rhs);
2254 gimple_move_vops (new_stmt, stmt);
2256 gimple_call_set_nothrow (new_stmt, true);
2257 return new_stmt;
2260 /* STMT uses OP_LHS. Check whether it is equivalent to:
2262 ... = OP_MASK ? OP_LHS : X;
2264 Return X if so, otherwise return null. OP_MASK is an SSA_NAME that is
2265 known to have value OP_COND. */
2267 static tree
2268 check_redundant_cond_expr (gimple *stmt, tree op_mask, tree op_cond,
2269 tree op_lhs)
2271 gassign *assign = dyn_cast <gassign *> (stmt);
2272 if (!assign || gimple_assign_rhs_code (assign) != COND_EXPR)
2273 return NULL_TREE;
2275 tree use_cond = gimple_assign_rhs1 (assign);
2276 tree if_true = gimple_assign_rhs2 (assign);
2277 tree if_false = gimple_assign_rhs3 (assign);
2279 if ((use_cond == op_mask || operand_equal_p (use_cond, op_cond, 0))
2280 && if_true == op_lhs)
2281 return if_false;
2283 if (inverse_conditions_p (use_cond, op_cond) && if_false == op_lhs)
2284 return if_true;
2286 return NULL_TREE;
2289 /* Return true if VALUE is available for use at STMT. SSA_NAMES is
2290 the set of SSA names defined earlier in STMT's block. */
2292 static bool
2293 value_available_p (gimple *stmt, hash_set<tree_ssa_name_hash> *ssa_names,
2294 tree value)
2296 if (is_gimple_min_invariant (value))
2297 return true;
2299 if (TREE_CODE (value) == SSA_NAME)
2301 if (SSA_NAME_IS_DEFAULT_DEF (value))
2302 return true;
2304 basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (value));
2305 basic_block use_bb = gimple_bb (stmt);
2306 return (def_bb == use_bb
2307 ? ssa_names->contains (value)
2308 : dominated_by_p (CDI_DOMINATORS, use_bb, def_bb));
2311 return false;
2314 /* Helper function for predicate_statements. STMT is a potentially-trapping
2315 arithmetic operation that needs to be predicated by MASK, an SSA_NAME that
2316 has value COND. Return a statement that does so. SSA_NAMES is the set of
2317 SSA names defined earlier in STMT's block. */
2319 static gimple *
2320 predicate_rhs_code (gassign *stmt, tree mask, tree cond,
2321 hash_set<tree_ssa_name_hash> *ssa_names)
2323 tree lhs = gimple_assign_lhs (stmt);
2324 tree_code code = gimple_assign_rhs_code (stmt);
2325 unsigned int nops = gimple_num_ops (stmt);
2326 internal_fn cond_fn = get_conditional_internal_fn (code);
2328 /* Construct the arguments to the conditional internal function. */
2329 auto_vec<tree, 8> args;
2330 args.safe_grow (nops + 1, true);
2331 args[0] = mask;
2332 for (unsigned int i = 1; i < nops; ++i)
2333 args[i] = gimple_op (stmt, i);
2334 args[nops] = NULL_TREE;
2336 /* Look for uses of the result to see whether they are COND_EXPRs that can
2337 be folded into the conditional call. */
2338 imm_use_iterator imm_iter;
2339 gimple *use_stmt;
2340 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs)
2342 tree new_else = check_redundant_cond_expr (use_stmt, mask, cond, lhs);
2343 if (new_else && value_available_p (stmt, ssa_names, new_else))
2345 if (!args[nops])
2346 args[nops] = new_else;
2347 if (operand_equal_p (new_else, args[nops], 0))
2349 /* We have:
2351 LHS = IFN_COND (MASK, ..., ELSE);
2352 X = MASK ? LHS : ELSE;
2354 which makes X equivalent to LHS. */
2355 tree use_lhs = gimple_assign_lhs (use_stmt);
2356 redundant_ssa_names.safe_push (std::make_pair (use_lhs, lhs));
2360 if (!args[nops])
2361 args[nops] = targetm.preferred_else_value (cond_fn, TREE_TYPE (lhs),
2362 nops - 1, &args[1]);
2364 /* Create and insert the call. */
2365 gcall *new_stmt = gimple_build_call_internal_vec (cond_fn, args);
2366 gimple_call_set_lhs (new_stmt, lhs);
2367 gimple_call_set_nothrow (new_stmt, true);
2369 return new_stmt;
2372 /* Predicate each write to memory in LOOP.
2374 This function transforms control flow constructs containing memory
2375 writes of the form:
2377 | for (i = 0; i < N; i++)
2378 | if (cond)
2379 | A[i] = expr;
2381 into the following form that does not contain control flow:
2383 | for (i = 0; i < N; i++)
2384 | A[i] = cond ? expr : A[i];
2386 The original CFG looks like this:
2388 | bb_0
2389 | i = 0
2390 | end_bb_0
2392 | bb_1
2393 | if (i < N) goto bb_5 else goto bb_2
2394 | end_bb_1
2396 | bb_2
2397 | cond = some_computation;
2398 | if (cond) goto bb_3 else goto bb_4
2399 | end_bb_2
2401 | bb_3
2402 | A[i] = expr;
2403 | goto bb_4
2404 | end_bb_3
2406 | bb_4
2407 | goto bb_1
2408 | end_bb_4
2410 insert_gimplified_predicates inserts the computation of the COND
2411 expression at the beginning of the destination basic block:
2413 | bb_0
2414 | i = 0
2415 | end_bb_0
2417 | bb_1
2418 | if (i < N) goto bb_5 else goto bb_2
2419 | end_bb_1
2421 | bb_2
2422 | cond = some_computation;
2423 | if (cond) goto bb_3 else goto bb_4
2424 | end_bb_2
2426 | bb_3
2427 | cond = some_computation;
2428 | A[i] = expr;
2429 | goto bb_4
2430 | end_bb_3
2432 | bb_4
2433 | goto bb_1
2434 | end_bb_4
2436 predicate_statements is then predicating the memory write as follows:
2438 | bb_0
2439 | i = 0
2440 | end_bb_0
2442 | bb_1
2443 | if (i < N) goto bb_5 else goto bb_2
2444 | end_bb_1
2446 | bb_2
2447 | if (cond) goto bb_3 else goto bb_4
2448 | end_bb_2
2450 | bb_3
2451 | cond = some_computation;
2452 | A[i] = cond ? expr : A[i];
2453 | goto bb_4
2454 | end_bb_3
2456 | bb_4
2457 | goto bb_1
2458 | end_bb_4
2460 and finally combine_blocks removes the basic block boundaries making
2461 the loop vectorizable:
2463 | bb_0
2464 | i = 0
2465 | if (i < N) goto bb_5 else goto bb_1
2466 | end_bb_0
2468 | bb_1
2469 | cond = some_computation;
2470 | A[i] = cond ? expr : A[i];
2471 | if (i < N) goto bb_5 else goto bb_4
2472 | end_bb_1
2474 | bb_4
2475 | goto bb_1
2476 | end_bb_4
2479 static void
2480 predicate_statements (loop_p loop)
2482 unsigned int i, orig_loop_num_nodes = loop->num_nodes;
2483 auto_vec<int, 1> vect_sizes;
2484 auto_vec<tree, 1> vect_masks;
2485 hash_set<tree_ssa_name_hash> ssa_names;
2487 for (i = 1; i < orig_loop_num_nodes; i++)
2489 gimple_stmt_iterator gsi;
2490 basic_block bb = ifc_bbs[i];
2491 tree cond = bb_predicate (bb);
2492 bool swap;
2493 int index;
2495 if (is_true_predicate (cond))
2496 continue;
2498 swap = false;
2499 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2501 swap = true;
2502 cond = TREE_OPERAND (cond, 0);
2505 vect_sizes.truncate (0);
2506 vect_masks.truncate (0);
2508 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
2510 gassign *stmt = dyn_cast <gassign *> (gsi_stmt (gsi));
2511 tree lhs;
2512 if (!stmt)
2514 else if (is_false_predicate (cond)
2515 && gimple_vdef (stmt))
2517 unlink_stmt_vdef (stmt);
2518 gsi_remove (&gsi, true);
2519 release_defs (stmt);
2520 continue;
2522 else if (gimple_plf (stmt, GF_PLF_2))
2524 tree lhs = gimple_assign_lhs (stmt);
2525 tree mask;
2526 gimple *new_stmt;
2527 gimple_seq stmts = NULL;
2528 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
2529 /* We checked before setting GF_PLF_2 that an equivalent
2530 integer mode exists. */
2531 int bitsize = GET_MODE_BITSIZE (mode).to_constant ();
2532 if (!vect_sizes.is_empty ()
2533 && (index = mask_exists (bitsize, vect_sizes)) != -1)
2534 /* Use created mask. */
2535 mask = vect_masks[index];
2536 else
2538 if (COMPARISON_CLASS_P (cond))
2539 mask = gimple_build (&stmts, TREE_CODE (cond),
2540 boolean_type_node,
2541 TREE_OPERAND (cond, 0),
2542 TREE_OPERAND (cond, 1));
2543 else
2544 mask = cond;
2546 if (swap)
2548 tree true_val
2549 = constant_boolean_node (true, TREE_TYPE (mask));
2550 mask = gimple_build (&stmts, BIT_XOR_EXPR,
2551 TREE_TYPE (mask), mask, true_val);
2553 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2555 /* Save mask and its size for further use. */
2556 vect_sizes.safe_push (bitsize);
2557 vect_masks.safe_push (mask);
2559 if (gimple_assign_single_p (stmt))
2560 new_stmt = predicate_load_or_store (&gsi, stmt, mask);
2561 else
2562 new_stmt = predicate_rhs_code (stmt, mask, cond, &ssa_names);
2564 gsi_replace (&gsi, new_stmt, true);
2566 else if (((lhs = gimple_assign_lhs (stmt)), true)
2567 && (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2568 || POINTER_TYPE_P (TREE_TYPE (lhs)))
2569 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs))
2570 && arith_code_with_undefined_signed_overflow
2571 (gimple_assign_rhs_code (stmt)))
2573 gsi_remove (&gsi, true);
2574 gimple_seq stmts = rewrite_to_defined_overflow (stmt);
2575 bool first = true;
2576 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
2577 !gsi_end_p (gsi2);)
2579 gassign *stmt2 = as_a <gassign *> (gsi_stmt (gsi2));
2580 gsi_remove (&gsi2, false);
2581 if (first)
2583 gsi_insert_before (&gsi, stmt2, GSI_NEW_STMT);
2584 first = false;
2586 else
2587 gsi_insert_after (&gsi, stmt2, GSI_NEW_STMT);
2590 else if (gimple_vdef (stmt))
2592 tree lhs = gimple_assign_lhs (stmt);
2593 tree rhs = gimple_assign_rhs1 (stmt);
2594 tree type = TREE_TYPE (lhs);
2596 lhs = ifc_temp_var (type, unshare_expr (lhs), &gsi);
2597 rhs = ifc_temp_var (type, unshare_expr (rhs), &gsi);
2598 if (swap)
2599 std::swap (lhs, rhs);
2600 cond = force_gimple_operand_gsi (&gsi, unshare_expr (cond), true,
2601 NULL_TREE, true, GSI_SAME_STMT);
2602 rhs = fold_build_cond_expr (type, unshare_expr (cond), rhs, lhs);
2603 gimple_assign_set_rhs1 (stmt, ifc_temp_var (type, rhs, &gsi));
2604 update_stmt (stmt);
2606 lhs = gimple_get_lhs (gsi_stmt (gsi));
2607 if (lhs && TREE_CODE (lhs) == SSA_NAME)
2608 ssa_names.add (lhs);
2609 gsi_next (&gsi);
2611 ssa_names.empty ();
2615 /* Remove all GIMPLE_CONDs and GIMPLE_LABELs of all the basic blocks
2616 other than the exit and latch of the LOOP. Also resets the
2617 GIMPLE_DEBUG information. */
2619 static void
2620 remove_conditions_and_labels (loop_p loop)
2622 gimple_stmt_iterator gsi;
2623 unsigned int i;
2625 for (i = 0; i < loop->num_nodes; i++)
2627 basic_block bb = ifc_bbs[i];
2629 if (bb_with_exit_edge_p (loop, bb)
2630 || bb == loop->latch)
2631 continue;
2633 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
2634 switch (gimple_code (gsi_stmt (gsi)))
2636 case GIMPLE_COND:
2637 case GIMPLE_LABEL:
2638 gsi_remove (&gsi, true);
2639 break;
2641 case GIMPLE_DEBUG:
2642 /* ??? Should there be conditional GIMPLE_DEBUG_BINDs? */
2643 if (gimple_debug_bind_p (gsi_stmt (gsi)))
2645 gimple_debug_bind_reset_value (gsi_stmt (gsi));
2646 update_stmt (gsi_stmt (gsi));
2648 gsi_next (&gsi);
2649 break;
2651 default:
2652 gsi_next (&gsi);
2657 /* Combine all the basic blocks from LOOP into one or two super basic
2658 blocks. Replace PHI nodes with conditional modify expressions. */
2660 static void
2661 combine_blocks (class loop *loop)
2663 basic_block bb, exit_bb, merge_target_bb;
2664 unsigned int orig_loop_num_nodes = loop->num_nodes;
2665 unsigned int i;
2666 edge e;
2667 edge_iterator ei;
2669 remove_conditions_and_labels (loop);
2670 insert_gimplified_predicates (loop);
2671 predicate_all_scalar_phis (loop);
2673 if (need_to_predicate || need_to_rewrite_undefined)
2674 predicate_statements (loop);
2676 /* Merge basic blocks. */
2677 exit_bb = NULL;
2678 bool *predicated = XNEWVEC (bool, orig_loop_num_nodes);
2679 for (i = 0; i < orig_loop_num_nodes; i++)
2681 bb = ifc_bbs[i];
2682 predicated[i] = !is_true_predicate (bb_predicate (bb));
2683 free_bb_predicate (bb);
2684 if (bb_with_exit_edge_p (loop, bb))
2686 gcc_assert (exit_bb == NULL);
2687 exit_bb = bb;
2690 gcc_assert (exit_bb != loop->latch);
2692 merge_target_bb = loop->header;
2694 /* Get at the virtual def valid for uses starting at the first block
2695 we merge into the header. Without a virtual PHI the loop has the
2696 same virtual use on all stmts. */
2697 gphi *vphi = get_virtual_phi (loop->header);
2698 tree last_vdef = NULL_TREE;
2699 if (vphi)
2701 last_vdef = gimple_phi_result (vphi);
2702 for (gimple_stmt_iterator gsi = gsi_start_bb (loop->header);
2703 ! gsi_end_p (gsi); gsi_next (&gsi))
2704 if (gimple_vdef (gsi_stmt (gsi)))
2705 last_vdef = gimple_vdef (gsi_stmt (gsi));
2707 for (i = 1; i < orig_loop_num_nodes; i++)
2709 gimple_stmt_iterator gsi;
2710 gimple_stmt_iterator last;
2712 bb = ifc_bbs[i];
2714 if (bb == exit_bb || bb == loop->latch)
2715 continue;
2717 /* We release virtual PHIs late because we have to propagate them
2718 out using the current VUSE. The def might be the one used
2719 after the loop. */
2720 vphi = get_virtual_phi (bb);
2721 if (vphi)
2723 /* When there's just loads inside the loop a stray virtual
2724 PHI merging the uses can appear, update last_vdef from
2725 it. */
2726 if (!last_vdef)
2727 last_vdef = gimple_phi_arg_def (vphi, 0);
2728 imm_use_iterator iter;
2729 use_operand_p use_p;
2730 gimple *use_stmt;
2731 FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi))
2733 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2734 SET_USE (use_p, last_vdef);
2736 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (vphi)))
2737 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (last_vdef) = 1;
2738 gsi = gsi_for_stmt (vphi);
2739 remove_phi_node (&gsi, true);
2742 /* Make stmts member of loop->header and clear range info from all stmts
2743 in BB which is now no longer executed conditional on a predicate we
2744 could have derived it from. */
2745 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2747 gimple *stmt = gsi_stmt (gsi);
2748 gimple_set_bb (stmt, merge_target_bb);
2749 /* Update virtual operands. */
2750 if (last_vdef)
2752 use_operand_p use_p = ssa_vuse_operand (stmt);
2753 if (use_p
2754 && USE_FROM_PTR (use_p) != last_vdef)
2755 SET_USE (use_p, last_vdef);
2756 if (gimple_vdef (stmt))
2757 last_vdef = gimple_vdef (stmt);
2759 else
2760 /* If this is the first load we arrive at update last_vdef
2761 so we handle stray PHIs correctly. */
2762 last_vdef = gimple_vuse (stmt);
2763 if (predicated[i])
2765 ssa_op_iter i;
2766 tree op;
2767 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF)
2768 reset_flow_sensitive_info (op);
2772 /* Update stmt list. */
2773 last = gsi_last_bb (merge_target_bb);
2774 gsi_insert_seq_after_without_update (&last, bb_seq (bb), GSI_NEW_STMT);
2775 set_bb_seq (bb, NULL);
2778 /* Fixup virtual operands in the exit block. */
2779 if (exit_bb
2780 && exit_bb != loop->header)
2782 /* We release virtual PHIs late because we have to propagate them
2783 out using the current VUSE. The def might be the one used
2784 after the loop. */
2785 vphi = get_virtual_phi (exit_bb);
2786 if (vphi)
2788 /* When there's just loads inside the loop a stray virtual
2789 PHI merging the uses can appear, update last_vdef from
2790 it. */
2791 if (!last_vdef)
2792 last_vdef = gimple_phi_arg_def (vphi, 0);
2793 imm_use_iterator iter;
2794 use_operand_p use_p;
2795 gimple *use_stmt;
2796 FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi))
2798 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2799 SET_USE (use_p, last_vdef);
2801 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (vphi)))
2802 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (last_vdef) = 1;
2803 gimple_stmt_iterator gsi = gsi_for_stmt (vphi);
2804 remove_phi_node (&gsi, true);
2808 /* Now remove all the edges in the loop, except for those from the exit
2809 block and delete the blocks we elided. */
2810 for (i = 1; i < orig_loop_num_nodes; i++)
2812 bb = ifc_bbs[i];
2814 for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));)
2816 if (e->src == exit_bb)
2817 ei_next (&ei);
2818 else
2819 remove_edge (e);
2822 for (i = 1; i < orig_loop_num_nodes; i++)
2824 bb = ifc_bbs[i];
2826 if (bb == exit_bb || bb == loop->latch)
2827 continue;
2829 delete_basic_block (bb);
2832 /* Re-connect the exit block. */
2833 if (exit_bb != NULL)
2835 if (exit_bb != loop->header)
2837 /* Connect this node to loop header. */
2838 make_single_succ_edge (loop->header, exit_bb, EDGE_FALLTHRU);
2839 set_immediate_dominator (CDI_DOMINATORS, exit_bb, loop->header);
2842 /* Redirect non-exit edges to loop->latch. */
2843 FOR_EACH_EDGE (e, ei, exit_bb->succs)
2845 if (!loop_exit_edge_p (loop, e))
2846 redirect_edge_and_branch (e, loop->latch);
2848 set_immediate_dominator (CDI_DOMINATORS, loop->latch, exit_bb);
2850 else
2852 /* If the loop does not have an exit, reconnect header and latch. */
2853 make_edge (loop->header, loop->latch, EDGE_FALLTHRU);
2854 set_immediate_dominator (CDI_DOMINATORS, loop->latch, loop->header);
2857 /* If possible, merge loop header to the block with the exit edge.
2858 This reduces the number of basic blocks to two, to please the
2859 vectorizer that handles only loops with two nodes. */
2860 if (exit_bb
2861 && exit_bb != loop->header)
2863 if (can_merge_blocks_p (loop->header, exit_bb))
2864 merge_blocks (loop->header, exit_bb);
2867 free (ifc_bbs);
2868 ifc_bbs = NULL;
2869 free (predicated);
2872 /* Version LOOP before if-converting it; the original loop
2873 will be if-converted, the new copy of the loop will not,
2874 and the LOOP_VECTORIZED internal call will be guarding which
2875 loop to execute. The vectorizer pass will fold this
2876 internal call into either true or false.
2878 Note that this function intentionally invalidates profile. Both edges
2879 out of LOOP_VECTORIZED must have 100% probability so the profile remains
2880 consistent after the condition is folded in the vectorizer. */
2882 static class loop *
2883 version_loop_for_if_conversion (class loop *loop, vec<gimple *> *preds)
2885 basic_block cond_bb;
2886 tree cond = make_ssa_name (boolean_type_node);
2887 class loop *new_loop;
2888 gimple *g;
2889 gimple_stmt_iterator gsi;
2890 unsigned int save_length = 0;
2892 g = gimple_build_call_internal (IFN_LOOP_VECTORIZED, 2,
2893 build_int_cst (integer_type_node, loop->num),
2894 integer_zero_node);
2895 gimple_call_set_lhs (g, cond);
2897 void **saved_preds = NULL;
2898 if (any_complicated_phi || need_to_predicate)
2900 /* Save BB->aux around loop_version as that uses the same field. */
2901 save_length = loop->inner ? loop->inner->num_nodes : loop->num_nodes;
2902 saved_preds = XALLOCAVEC (void *, save_length);
2903 for (unsigned i = 0; i < save_length; i++)
2904 saved_preds[i] = ifc_bbs[i]->aux;
2907 initialize_original_copy_tables ();
2908 /* At this point we invalidate porfile confistency until IFN_LOOP_VECTORIZED
2909 is re-merged in the vectorizer. */
2910 new_loop = loop_version (loop, cond, &cond_bb,
2911 profile_probability::always (),
2912 profile_probability::always (),
2913 profile_probability::always (),
2914 profile_probability::always (), true);
2915 free_original_copy_tables ();
2917 if (any_complicated_phi || need_to_predicate)
2918 for (unsigned i = 0; i < save_length; i++)
2919 ifc_bbs[i]->aux = saved_preds[i];
2921 if (new_loop == NULL)
2922 return NULL;
2924 new_loop->dont_vectorize = true;
2925 new_loop->force_vectorize = false;
2926 gsi = gsi_last_bb (cond_bb);
2927 gimple_call_set_arg (g, 1, build_int_cst (integer_type_node, new_loop->num));
2928 if (preds)
2929 preds->safe_push (g);
2930 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2931 update_ssa (TODO_update_ssa_no_phi);
2932 return new_loop;
2935 /* Return true when LOOP satisfies the follow conditions that will
2936 allow it to be recognized by the vectorizer for outer-loop
2937 vectorization:
2938 - The loop is not the root node of the loop tree.
2939 - The loop has exactly one inner loop.
2940 - The loop has a single exit.
2941 - The loop header has a single successor, which is the inner
2942 loop header.
2943 - Each of the inner and outer loop latches have a single
2944 predecessor.
2945 - The loop exit block has a single predecessor, which is the
2946 inner loop's exit block. */
2948 static bool
2949 versionable_outer_loop_p (class loop *loop)
2951 if (!loop_outer (loop)
2952 || loop->dont_vectorize
2953 || !loop->inner
2954 || loop->inner->next
2955 || !single_exit (loop)
2956 || !single_succ_p (loop->header)
2957 || single_succ (loop->header) != loop->inner->header
2958 || !single_pred_p (loop->latch)
2959 || !single_pred_p (loop->inner->latch))
2960 return false;
2962 basic_block outer_exit = single_pred (loop->latch);
2963 basic_block inner_exit = single_pred (loop->inner->latch);
2965 if (!single_pred_p (outer_exit) || single_pred (outer_exit) != inner_exit)
2966 return false;
2968 if (dump_file)
2969 fprintf (dump_file, "Found vectorizable outer loop for versioning\n");
2971 return true;
2974 /* Performs splitting of critical edges. Skip splitting and return false
2975 if LOOP will not be converted because:
2977 - LOOP is not well formed.
2978 - LOOP has PHI with more than MAX_PHI_ARG_NUM arguments.
2980 Last restriction is valid only if AGGRESSIVE_IF_CONV is false. */
2982 static bool
2983 ifcvt_split_critical_edges (class loop *loop, bool aggressive_if_conv)
2985 basic_block *body;
2986 basic_block bb;
2987 unsigned int num = loop->num_nodes;
2988 unsigned int i;
2989 gimple *stmt;
2990 edge e;
2991 edge_iterator ei;
2992 auto_vec<edge> critical_edges;
2994 /* Loop is not well formed. */
2995 if (loop->inner)
2996 return false;
2998 body = get_loop_body (loop);
2999 for (i = 0; i < num; i++)
3001 bb = body[i];
3002 if (!aggressive_if_conv
3003 && phi_nodes (bb)
3004 && EDGE_COUNT (bb->preds) > MAX_PHI_ARG_NUM)
3006 if (dump_file && (dump_flags & TDF_DETAILS))
3007 fprintf (dump_file,
3008 "BB %d has complicated PHI with more than %u args.\n",
3009 bb->index, MAX_PHI_ARG_NUM);
3011 free (body);
3012 return false;
3014 if (bb == loop->latch || bb_with_exit_edge_p (loop, bb))
3015 continue;
3017 stmt = last_stmt (bb);
3018 /* Skip basic blocks not ending with conditional branch. */
3019 if (!stmt || gimple_code (stmt) != GIMPLE_COND)
3020 continue;
3022 FOR_EACH_EDGE (e, ei, bb->succs)
3023 if (EDGE_CRITICAL_P (e) && e->dest->loop_father == loop)
3024 critical_edges.safe_push (e);
3026 free (body);
3028 while (critical_edges.length () > 0)
3030 e = critical_edges.pop ();
3031 /* Don't split if bb can be predicated along non-critical edge. */
3032 if (EDGE_COUNT (e->dest->preds) > 2 || all_preds_critical_p (e->dest))
3033 split_edge (e);
3036 return true;
3039 /* Delete redundant statements produced by predication which prevents
3040 loop vectorization. */
3042 static void
3043 ifcvt_local_dce (class loop *loop)
3045 gimple *stmt;
3046 gimple *stmt1;
3047 gimple *phi;
3048 gimple_stmt_iterator gsi;
3049 auto_vec<gimple *> worklist;
3050 enum gimple_code code;
3051 use_operand_p use_p;
3052 imm_use_iterator imm_iter;
3054 /* The loop has a single BB only. */
3055 basic_block bb = loop->header;
3056 tree latch_vdef = NULL_TREE;
3058 worklist.create (64);
3059 /* Consider all phi as live statements. */
3060 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3062 phi = gsi_stmt (gsi);
3063 gimple_set_plf (phi, GF_PLF_2, true);
3064 worklist.safe_push (phi);
3065 if (virtual_operand_p (gimple_phi_result (phi)))
3066 latch_vdef = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
3068 /* Consider load/store statements, CALL and COND as live. */
3069 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3071 stmt = gsi_stmt (gsi);
3072 if (is_gimple_debug (stmt))
3074 gimple_set_plf (stmt, GF_PLF_2, true);
3075 continue;
3077 if (gimple_store_p (stmt) || gimple_assign_load_p (stmt))
3079 gimple_set_plf (stmt, GF_PLF_2, true);
3080 worklist.safe_push (stmt);
3081 continue;
3083 code = gimple_code (stmt);
3084 if (code == GIMPLE_COND || code == GIMPLE_CALL)
3086 gimple_set_plf (stmt, GF_PLF_2, true);
3087 worklist.safe_push (stmt);
3088 continue;
3090 gimple_set_plf (stmt, GF_PLF_2, false);
3092 if (code == GIMPLE_ASSIGN)
3094 tree lhs = gimple_assign_lhs (stmt);
3095 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
3097 stmt1 = USE_STMT (use_p);
3098 if (!is_gimple_debug (stmt1) && gimple_bb (stmt1) != bb)
3100 gimple_set_plf (stmt, GF_PLF_2, true);
3101 worklist.safe_push (stmt);
3102 break;
3107 /* Propagate liveness through arguments of live stmt. */
3108 while (worklist.length () > 0)
3110 ssa_op_iter iter;
3111 use_operand_p use_p;
3112 tree use;
3114 stmt = worklist.pop ();
3115 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
3117 use = USE_FROM_PTR (use_p);
3118 if (TREE_CODE (use) != SSA_NAME)
3119 continue;
3120 stmt1 = SSA_NAME_DEF_STMT (use);
3121 if (gimple_bb (stmt1) != bb || gimple_plf (stmt1, GF_PLF_2))
3122 continue;
3123 gimple_set_plf (stmt1, GF_PLF_2, true);
3124 worklist.safe_push (stmt1);
3127 /* Delete dead statements. */
3128 gsi = gsi_last_bb (bb);
3129 while (!gsi_end_p (gsi))
3131 gimple_stmt_iterator gsiprev = gsi;
3132 gsi_prev (&gsiprev);
3133 stmt = gsi_stmt (gsi);
3134 if (gimple_store_p (stmt) && gimple_vdef (stmt))
3136 tree lhs = gimple_get_lhs (stmt);
3137 ao_ref write;
3138 ao_ref_init (&write, lhs);
3140 if (dse_classify_store (&write, stmt, false, NULL, NULL, latch_vdef)
3141 == DSE_STORE_DEAD)
3142 delete_dead_or_redundant_assignment (&gsi, "dead");
3143 gsi = gsiprev;
3144 continue;
3147 if (gimple_plf (stmt, GF_PLF_2))
3149 gsi = gsiprev;
3150 continue;
3152 if (dump_file && (dump_flags & TDF_DETAILS))
3154 fprintf (dump_file, "Delete dead stmt in bb#%d\n", bb->index);
3155 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3157 gsi_remove (&gsi, true);
3158 release_defs (stmt);
3159 gsi = gsiprev;
3163 /* Return true if VALUE is already available on edge PE. */
3165 static bool
3166 ifcvt_available_on_edge_p (edge pe, tree value)
3168 if (is_gimple_min_invariant (value))
3169 return true;
3171 if (TREE_CODE (value) == SSA_NAME)
3173 basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (value));
3174 if (!def_bb || dominated_by_p (CDI_DOMINATORS, pe->dest, def_bb))
3175 return true;
3178 return false;
3181 /* Return true if STMT can be hoisted from if-converted loop LOOP to
3182 edge PE. */
3184 static bool
3185 ifcvt_can_hoist (class loop *loop, edge pe, gimple *stmt)
3187 if (auto *call = dyn_cast<gcall *> (stmt))
3189 if (gimple_call_internal_p (call)
3190 && internal_fn_mask_index (gimple_call_internal_fn (call)) >= 0)
3191 return false;
3193 else if (auto *assign = dyn_cast<gassign *> (stmt))
3195 if (gimple_assign_rhs_code (assign) == COND_EXPR)
3196 return false;
3198 else
3199 return false;
3201 if (gimple_has_side_effects (stmt)
3202 || gimple_could_trap_p (stmt)
3203 || stmt_could_throw_p (cfun, stmt)
3204 || gimple_vdef (stmt)
3205 || gimple_vuse (stmt))
3206 return false;
3208 int num_args = gimple_num_args (stmt);
3209 if (pe != loop_preheader_edge (loop))
3211 for (int i = 0; i < num_args; ++i)
3212 if (!ifcvt_available_on_edge_p (pe, gimple_arg (stmt, i)))
3213 return false;
3215 else
3217 for (int i = 0; i < num_args; ++i)
3218 if (!expr_invariant_in_loop_p (loop, gimple_arg (stmt, i)))
3219 return false;
3222 return true;
3225 /* Hoist invariant statements from LOOP to edge PE. */
3227 static void
3228 ifcvt_hoist_invariants (class loop *loop, edge pe)
3230 gimple_stmt_iterator hoist_gsi = {};
3231 unsigned int num_blocks = loop->num_nodes;
3232 basic_block *body = get_loop_body (loop);
3233 for (unsigned int i = 0; i < num_blocks; ++i)
3234 for (gimple_stmt_iterator gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi);)
3236 gimple *stmt = gsi_stmt (gsi);
3237 if (ifcvt_can_hoist (loop, pe, stmt))
3239 /* Once we've hoisted one statement, insert other statements
3240 after it. */
3241 gsi_remove (&gsi, false);
3242 if (hoist_gsi.ptr)
3243 gsi_insert_after (&hoist_gsi, stmt, GSI_NEW_STMT);
3244 else
3246 gsi_insert_on_edge_immediate (pe, stmt);
3247 hoist_gsi = gsi_for_stmt (stmt);
3249 continue;
3251 gsi_next (&gsi);
3253 free (body);
3256 /* Returns the DECL_FIELD_BIT_OFFSET of the bitfield accesse in stmt iff its
3257 type mode is not BLKmode. If BITPOS is not NULL it will hold the poly_int64
3258 value of the DECL_FIELD_BIT_OFFSET of the bitfield access and STRUCT_EXPR,
3259 if not NULL, will hold the tree representing the base struct of this
3260 bitfield. */
3262 static tree
3263 get_bitfield_rep (gassign *stmt, bool write, tree *bitpos,
3264 tree *struct_expr)
3266 tree comp_ref = write ? gimple_assign_lhs (stmt)
3267 : gimple_assign_rhs1 (stmt);
3269 tree field_decl = TREE_OPERAND (comp_ref, 1);
3270 tree rep_decl = DECL_BIT_FIELD_REPRESENTATIVE (field_decl);
3272 /* Bail out if the representative is BLKmode as we will not be able to
3273 vectorize this. */
3274 if (TYPE_MODE (TREE_TYPE (rep_decl)) == E_BLKmode)
3275 return NULL_TREE;
3277 /* Bail out if the DECL_SIZE of the field_decl isn't the same as the BF's
3278 precision. */
3279 unsigned HOST_WIDE_INT bf_prec
3280 = TYPE_PRECISION (TREE_TYPE (gimple_assign_lhs (stmt)));
3281 if (compare_tree_int (DECL_SIZE (field_decl), bf_prec) != 0)
3282 return NULL_TREE;
3284 if (struct_expr)
3285 *struct_expr = TREE_OPERAND (comp_ref, 0);
3287 if (bitpos)
3289 /* To calculate the bitposition of the BITFIELD_REF we have to determine
3290 where our bitfield starts in relation to the container REP_DECL. The
3291 DECL_FIELD_OFFSET of the original bitfield's member FIELD_DECL tells
3292 us how many bytes from the start of the structure there are until the
3293 start of the group of bitfield members the FIELD_DECL belongs to,
3294 whereas DECL_FIELD_BIT_OFFSET will tell us how many bits from that
3295 position our actual bitfield member starts. For the container
3296 REP_DECL adding DECL_FIELD_OFFSET and DECL_FIELD_BIT_OFFSET will tell
3297 us the distance between the start of the structure and the start of
3298 the container, though the first is in bytes and the later other in
3299 bits. With this in mind we calculate the bit position of our new
3300 BITFIELD_REF by subtracting the number of bits between the start of
3301 the structure and the container from the number of bits from the start
3302 of the structure and the actual bitfield member. */
3303 tree bf_pos = fold_build2 (MULT_EXPR, bitsizetype,
3304 DECL_FIELD_OFFSET (field_decl),
3305 build_int_cst (bitsizetype, BITS_PER_UNIT));
3306 bf_pos = fold_build2 (PLUS_EXPR, bitsizetype, bf_pos,
3307 DECL_FIELD_BIT_OFFSET (field_decl));
3308 tree rep_pos = fold_build2 (MULT_EXPR, bitsizetype,
3309 DECL_FIELD_OFFSET (rep_decl),
3310 build_int_cst (bitsizetype, BITS_PER_UNIT));
3311 rep_pos = fold_build2 (PLUS_EXPR, bitsizetype, rep_pos,
3312 DECL_FIELD_BIT_OFFSET (rep_decl));
3314 *bitpos = fold_build2 (MINUS_EXPR, bitsizetype, bf_pos, rep_pos);
3317 return rep_decl;
3321 /* Lowers the bitfield described by DATA.
3322 For a write like:
3324 struct.bf = _1;
3326 lower to:
3328 __ifc_1 = struct.<representative>;
3329 __ifc_2 = BIT_INSERT_EXPR (__ifc_1, _1, bitpos);
3330 struct.<representative> = __ifc_2;
3332 For a read:
3334 _1 = struct.bf;
3336 lower to:
3338 __ifc_1 = struct.<representative>;
3339 _1 = BIT_FIELD_REF (__ifc_1, bitsize, bitpos);
3341 where representative is a legal load that contains the bitfield value,
3342 bitsize is the size of the bitfield and bitpos the offset to the start of
3343 the bitfield within the representative. */
3345 static void
3346 lower_bitfield (gassign *stmt, bool write)
3348 tree struct_expr;
3349 tree bitpos;
3350 tree rep_decl = get_bitfield_rep (stmt, write, &bitpos, &struct_expr);
3351 tree rep_type = TREE_TYPE (rep_decl);
3352 tree bf_type = TREE_TYPE (gimple_assign_lhs (stmt));
3354 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3355 if (dump_file && (dump_flags & TDF_DETAILS))
3357 fprintf (dump_file, "Lowering:\n");
3358 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3359 fprintf (dump_file, "to:\n");
3362 /* REP_COMP_REF is a COMPONENT_REF for the representative. NEW_VAL is it's
3363 defining SSA_NAME. */
3364 tree rep_comp_ref = build3 (COMPONENT_REF, rep_type, struct_expr, rep_decl,
3365 NULL_TREE);
3366 tree new_val = ifc_temp_var (rep_type, rep_comp_ref, &gsi);
3368 if (dump_file && (dump_flags & TDF_DETAILS))
3369 print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (new_val), 0, TDF_SLIM);
3371 if (write)
3373 new_val = ifc_temp_var (rep_type,
3374 build3 (BIT_INSERT_EXPR, rep_type, new_val,
3375 unshare_expr (gimple_assign_rhs1 (stmt)),
3376 bitpos), &gsi);
3378 if (dump_file && (dump_flags & TDF_DETAILS))
3379 print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (new_val), 0, TDF_SLIM);
3381 gimple *new_stmt = gimple_build_assign (unshare_expr (rep_comp_ref),
3382 new_val);
3383 gimple_move_vops (new_stmt, stmt);
3384 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
3386 if (dump_file && (dump_flags & TDF_DETAILS))
3387 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
3389 else
3391 tree bfr = build3 (BIT_FIELD_REF, bf_type, new_val,
3392 build_int_cst (bitsizetype, TYPE_PRECISION (bf_type)),
3393 bitpos);
3394 new_val = ifc_temp_var (bf_type, bfr, &gsi);
3396 gimple *new_stmt = gimple_build_assign (gimple_assign_lhs (stmt),
3397 new_val);
3398 gimple_move_vops (new_stmt, stmt);
3399 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
3401 if (dump_file && (dump_flags & TDF_DETAILS))
3402 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
3405 gsi_remove (&gsi, true);
3408 /* Return TRUE if there are bitfields to lower in this LOOP. Fill TO_LOWER
3409 with data structures representing these bitfields. */
3411 static bool
3412 bitfields_to_lower_p (class loop *loop,
3413 vec <gassign *> &reads_to_lower,
3414 vec <gassign *> &writes_to_lower)
3416 gimple_stmt_iterator gsi;
3418 if (dump_file && (dump_flags & TDF_DETAILS))
3420 fprintf (dump_file, "Analyzing loop %d for bitfields:\n", loop->num);
3423 for (unsigned i = 0; i < loop->num_nodes; ++i)
3425 basic_block bb = ifc_bbs[i];
3426 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3428 gassign *stmt = dyn_cast<gassign*> (gsi_stmt (gsi));
3429 if (!stmt)
3430 continue;
3432 tree op = gimple_assign_lhs (stmt);
3433 bool write = TREE_CODE (op) == COMPONENT_REF;
3435 if (!write)
3436 op = gimple_assign_rhs1 (stmt);
3438 if (TREE_CODE (op) != COMPONENT_REF)
3439 continue;
3441 if (DECL_BIT_FIELD_TYPE (TREE_OPERAND (op, 1)))
3443 if (dump_file && (dump_flags & TDF_DETAILS))
3444 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3446 if (!INTEGRAL_TYPE_P (TREE_TYPE (op)))
3448 if (dump_file && (dump_flags & TDF_DETAILS))
3449 fprintf (dump_file, "\t Bitfield NO OK to lower,"
3450 " field type is not Integral.\n");
3451 return false;
3454 if (!get_bitfield_rep (stmt, write, NULL, NULL))
3456 if (dump_file && (dump_flags & TDF_DETAILS))
3457 fprintf (dump_file, "\t Bitfield NOT OK to lower,"
3458 " representative is BLKmode.\n");
3459 return false;
3462 if (dump_file && (dump_flags & TDF_DETAILS))
3463 fprintf (dump_file, "\tBitfield OK to lower.\n");
3464 if (write)
3465 writes_to_lower.safe_push (stmt);
3466 else
3467 reads_to_lower.safe_push (stmt);
3471 return !reads_to_lower.is_empty () || !writes_to_lower.is_empty ();
3475 /* If-convert LOOP when it is legal. For the moment this pass has no
3476 profitability analysis. Returns non-zero todo flags when something
3477 changed. */
3479 unsigned int
3480 tree_if_conversion (class loop *loop, vec<gimple *> *preds)
3482 unsigned int todo = 0;
3483 bool aggressive_if_conv;
3484 class loop *rloop;
3485 auto_vec <gassign *, 4> reads_to_lower;
3486 auto_vec <gassign *, 4> writes_to_lower;
3487 bitmap exit_bbs;
3488 edge pe;
3489 vec<data_reference_p> refs;
3491 again:
3492 rloop = NULL;
3493 ifc_bbs = NULL;
3494 need_to_lower_bitfields = false;
3495 need_to_ifcvt = false;
3496 need_to_predicate = false;
3497 need_to_rewrite_undefined = false;
3498 any_complicated_phi = false;
3499 refs.create (5);
3501 /* Apply more aggressive if-conversion when loop or its outer loop were
3502 marked with simd pragma. When that's the case, we try to if-convert
3503 loop containing PHIs with more than MAX_PHI_ARG_NUM arguments. */
3504 aggressive_if_conv = loop->force_vectorize;
3505 if (!aggressive_if_conv)
3507 class loop *outer_loop = loop_outer (loop);
3508 if (outer_loop && outer_loop->force_vectorize)
3509 aggressive_if_conv = true;
3512 if (!single_exit (loop))
3513 goto cleanup;
3515 /* If there are more than two BBs in the loop then there is at least one if
3516 to convert. */
3517 if (loop->num_nodes > 2
3518 && !ifcvt_split_critical_edges (loop, aggressive_if_conv))
3519 goto cleanup;
3521 ifc_bbs = get_loop_body_in_if_conv_order (loop);
3522 if (!ifc_bbs)
3524 if (dump_file && (dump_flags & TDF_DETAILS))
3525 fprintf (dump_file, "Irreducible loop\n");
3526 goto cleanup;
3529 if (find_data_references_in_loop (loop, &refs) == chrec_dont_know)
3530 goto cleanup;
3532 if (loop->num_nodes > 2)
3534 need_to_ifcvt = true;
3536 if (!if_convertible_loop_p (loop, &refs) || !dbg_cnt (if_conversion_tree))
3537 goto cleanup;
3539 if ((need_to_predicate || any_complicated_phi)
3540 && ((!flag_tree_loop_vectorize && !loop->force_vectorize)
3541 || loop->dont_vectorize))
3542 goto cleanup;
3545 if ((flag_tree_loop_vectorize || loop->force_vectorize)
3546 && !loop->dont_vectorize)
3547 need_to_lower_bitfields = bitfields_to_lower_p (loop, reads_to_lower,
3548 writes_to_lower);
3550 if (!need_to_ifcvt && !need_to_lower_bitfields)
3551 goto cleanup;
3553 /* The edge to insert invariant stmts on. */
3554 pe = loop_preheader_edge (loop);
3556 /* Since we have no cost model, always version loops unless the user
3557 specified -ftree-loop-if-convert or unless versioning is required.
3558 Either version this loop, or if the pattern is right for outer-loop
3559 vectorization, version the outer loop. In the latter case we will
3560 still if-convert the original inner loop. */
3561 if (need_to_lower_bitfields
3562 || need_to_predicate
3563 || any_complicated_phi
3564 || flag_tree_loop_if_convert != 1)
3566 class loop *vloop
3567 = (versionable_outer_loop_p (loop_outer (loop))
3568 ? loop_outer (loop) : loop);
3569 class loop *nloop = version_loop_for_if_conversion (vloop, preds);
3570 if (nloop == NULL)
3571 goto cleanup;
3572 if (vloop != loop)
3574 /* If versionable_outer_loop_p decided to version the
3575 outer loop, version also the inner loop of the non-vectorized
3576 loop copy. So we transform:
3577 loop1
3578 loop2
3579 into:
3580 if (LOOP_VECTORIZED (1, 3))
3582 loop1
3583 loop2
3585 else
3586 loop3 (copy of loop1)
3587 if (LOOP_VECTORIZED (4, 5))
3588 loop4 (copy of loop2)
3589 else
3590 loop5 (copy of loop4) */
3591 gcc_assert (nloop->inner && nloop->inner->next == NULL);
3592 rloop = nloop->inner;
3594 else
3595 /* If we versioned loop then make sure to insert invariant
3596 stmts before the .LOOP_VECTORIZED check since the vectorizer
3597 will re-use that for things like runtime alias versioning
3598 whose condition can end up using those invariants. */
3599 pe = single_pred_edge (gimple_bb (preds->last ()));
3602 if (need_to_lower_bitfields)
3604 if (dump_file && (dump_flags & TDF_DETAILS))
3606 fprintf (dump_file, "-------------------------\n");
3607 fprintf (dump_file, "Start lowering bitfields\n");
3609 while (!reads_to_lower.is_empty ())
3610 lower_bitfield (reads_to_lower.pop (), false);
3611 while (!writes_to_lower.is_empty ())
3612 lower_bitfield (writes_to_lower.pop (), true);
3614 if (dump_file && (dump_flags & TDF_DETAILS))
3616 fprintf (dump_file, "Done lowering bitfields\n");
3617 fprintf (dump_file, "-------------------------\n");
3620 if (need_to_ifcvt)
3622 /* Now all statements are if-convertible. Combine all the basic
3623 blocks into one huge basic block doing the if-conversion
3624 on-the-fly. */
3625 combine_blocks (loop);
3628 /* Perform local CSE, this esp. helps the vectorizer analysis if loads
3629 and stores are involved. CSE only the loop body, not the entry
3630 PHIs, those are to be kept in sync with the non-if-converted copy.
3631 ??? We'll still keep dead stores though. */
3632 exit_bbs = BITMAP_ALLOC (NULL);
3633 bitmap_set_bit (exit_bbs, single_exit (loop)->dest->index);
3634 bitmap_set_bit (exit_bbs, loop->latch->index);
3636 std::pair <tree, tree> *name_pair;
3637 unsigned ssa_names_idx;
3638 FOR_EACH_VEC_ELT (redundant_ssa_names, ssa_names_idx, name_pair)
3639 replace_uses_by (name_pair->first, name_pair->second);
3640 redundant_ssa_names.release ();
3642 todo |= do_rpo_vn (cfun, loop_preheader_edge (loop), exit_bbs);
3644 /* Delete dead predicate computations. */
3645 ifcvt_local_dce (loop);
3646 BITMAP_FREE (exit_bbs);
3648 ifcvt_hoist_invariants (loop, pe);
3650 todo |= TODO_cleanup_cfg;
3652 cleanup:
3653 data_reference_p dr;
3654 unsigned int i;
3655 for (i = 0; refs.iterate (i, &dr); i++)
3656 free (dr->aux);
3658 refs.truncate (0);
3660 if (ifc_bbs)
3662 unsigned int i;
3664 for (i = 0; i < loop->num_nodes; i++)
3665 free_bb_predicate (ifc_bbs[i]);
3667 free (ifc_bbs);
3668 ifc_bbs = NULL;
3670 if (rloop != NULL)
3672 loop = rloop;
3673 reads_to_lower.truncate (0);
3674 writes_to_lower.truncate (0);
3675 goto again;
3678 return todo;
3681 /* Tree if-conversion pass management. */
3683 namespace {
3685 const pass_data pass_data_if_conversion =
3687 GIMPLE_PASS, /* type */
3688 "ifcvt", /* name */
3689 OPTGROUP_NONE, /* optinfo_flags */
3690 TV_TREE_LOOP_IFCVT, /* tv_id */
3691 ( PROP_cfg | PROP_ssa ), /* properties_required */
3692 0, /* properties_provided */
3693 0, /* properties_destroyed */
3694 0, /* todo_flags_start */
3695 0, /* todo_flags_finish */
3698 class pass_if_conversion : public gimple_opt_pass
3700 public:
3701 pass_if_conversion (gcc::context *ctxt)
3702 : gimple_opt_pass (pass_data_if_conversion, ctxt)
3705 /* opt_pass methods: */
3706 bool gate (function *) final override;
3707 unsigned int execute (function *) final override;
3709 }; // class pass_if_conversion
3711 bool
3712 pass_if_conversion::gate (function *fun)
3714 return (((flag_tree_loop_vectorize || fun->has_force_vectorize_loops)
3715 && flag_tree_loop_if_convert != 0)
3716 || flag_tree_loop_if_convert == 1);
3719 unsigned int
3720 pass_if_conversion::execute (function *fun)
3722 unsigned todo = 0;
3724 if (number_of_loops (fun) <= 1)
3725 return 0;
3727 auto_vec<gimple *> preds;
3728 for (auto loop : loops_list (cfun, 0))
3729 if (flag_tree_loop_if_convert == 1
3730 || ((flag_tree_loop_vectorize || loop->force_vectorize)
3731 && !loop->dont_vectorize))
3732 todo |= tree_if_conversion (loop, &preds);
3734 if (todo)
3736 free_numbers_of_iterations_estimates (fun);
3737 scev_reset ();
3740 if (flag_checking)
3742 basic_block bb;
3743 FOR_EACH_BB_FN (bb, fun)
3744 gcc_assert (!bb->aux);
3747 /* Perform IL update now, it might elide some loops. */
3748 if (todo & TODO_cleanup_cfg)
3750 cleanup_tree_cfg ();
3751 if (need_ssa_update_p (fun))
3752 todo |= TODO_update_ssa;
3754 if (todo & TODO_update_ssa_any)
3755 update_ssa (todo & TODO_update_ssa_any);
3757 /* If if-conversion elided the loop fall back to the original one. */
3758 for (unsigned i = 0; i < preds.length (); ++i)
3760 gimple *g = preds[i];
3761 if (!gimple_bb (g))
3762 continue;
3763 unsigned ifcvt_loop = tree_to_uhwi (gimple_call_arg (g, 0));
3764 unsigned orig_loop = tree_to_uhwi (gimple_call_arg (g, 1));
3765 if (!get_loop (fun, ifcvt_loop) || !get_loop (fun, orig_loop))
3767 if (dump_file)
3768 fprintf (dump_file, "If-converted loop vanished\n");
3769 fold_loop_internal_call (g, boolean_false_node);
3773 return 0;
3776 } // anon namespace
3778 gimple_opt_pass *
3779 make_pass_if_conversion (gcc::context *ctxt)
3781 return new pass_if_conversion (ctxt);