MATCH: Improve `A CMP 0 ? A : -A` set of patterns to use bitwise_equal_p.
[official-gcc.git] / gcc / tree-if-conv.cc
bloba8c915913aed267edfb3ebd2c530aeca7cf51832
1 /* If-conversion for vectorizer.
2 Copyright (C) 2004-2023 Free Software Foundation, Inc.
3 Contributed by Devang Patel <dpatel@apple.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass implements a tree level if-conversion of loops. Its
22 initial goal is to help the vectorizer to vectorize loops with
23 conditions.
25 A short description of if-conversion:
27 o Decide if a loop is if-convertible or not.
28 o Walk all loop basic blocks in breadth first order (BFS order).
29 o Remove conditional statements (at the end of basic block)
30 and propagate condition into destination basic blocks'
31 predicate list.
32 o Replace modify expression with conditional modify expression
33 using current basic block's condition.
34 o Merge all basic blocks
35 o Replace phi nodes with conditional modify expr
36 o Merge all basic blocks into header
38 Sample transformation:
40 INPUT
41 -----
43 # i_23 = PHI <0(0), i_18(10)>;
44 <L0>:;
45 j_15 = A[i_23];
46 if (j_15 > 41) goto <L1>; else goto <L17>;
48 <L17>:;
49 goto <bb 3> (<L3>);
51 <L1>:;
53 # iftmp.2_4 = PHI <0(8), 42(2)>;
54 <L3>:;
55 A[i_23] = iftmp.2_4;
56 i_18 = i_23 + 1;
57 if (i_18 <= 15) goto <L19>; else goto <L18>;
59 <L19>:;
60 goto <bb 1> (<L0>);
62 <L18>:;
64 OUTPUT
65 ------
67 # i_23 = PHI <0(0), i_18(10)>;
68 <L0>:;
69 j_15 = A[i_23];
71 <L3>:;
72 iftmp.2_4 = j_15 > 41 ? 42 : 0;
73 A[i_23] = iftmp.2_4;
74 i_18 = i_23 + 1;
75 if (i_18 <= 15) goto <L19>; else goto <L18>;
77 <L19>:;
78 goto <bb 1> (<L0>);
80 <L18>:;
83 #define INCLUDE_ALGORITHM
84 #include "config.h"
85 #include "system.h"
86 #include "coretypes.h"
87 #include "backend.h"
88 #include "rtl.h"
89 #include "tree.h"
90 #include "gimple.h"
91 #include "cfghooks.h"
92 #include "tree-pass.h"
93 #include "ssa.h"
94 #include "expmed.h"
95 #include "expr.h"
96 #include "optabs-tree.h"
97 #include "gimple-pretty-print.h"
98 #include "alias.h"
99 #include "fold-const.h"
100 #include "stor-layout.h"
101 #include "gimple-iterator.h"
102 #include "gimple-fold.h"
103 #include "gimplify.h"
104 #include "gimplify-me.h"
105 #include "tree-cfg.h"
106 #include "tree-into-ssa.h"
107 #include "tree-ssa.h"
108 #include "cfgloop.h"
109 #include "tree-data-ref.h"
110 #include "tree-scalar-evolution.h"
111 #include "tree-ssa-loop.h"
112 #include "tree-ssa-loop-niter.h"
113 #include "tree-ssa-loop-ivopts.h"
114 #include "tree-ssa-address.h"
115 #include "dbgcnt.h"
116 #include "tree-hash-traits.h"
117 #include "varasm.h"
118 #include "builtins.h"
119 #include "cfganal.h"
120 #include "internal-fn.h"
121 #include "fold-const.h"
122 #include "tree-ssa-sccvn.h"
123 #include "tree-cfgcleanup.h"
124 #include "tree-ssa-dse.h"
125 #include "tree-vectorizer.h"
126 #include "tree-eh.h"
127 #include "cgraph.h"
129 /* For lang_hooks.types.type_for_mode. */
130 #include "langhooks.h"
132 /* Only handle PHIs with no more arguments unless we are asked to by
133 simd pragma. */
134 #define MAX_PHI_ARG_NUM \
135 ((unsigned) param_max_tree_if_conversion_phi_args)
137 /* True if we've converted a statement that was only executed when some
138 condition C was true, and if for correctness we need to predicate the
139 statement to ensure that it is a no-op when C is false. See
140 predicate_statements for the kinds of predication we support. */
141 static bool need_to_predicate;
143 /* True if we have to rewrite stmts that may invoke undefined behavior
144 when a condition C was false so it doesn't if it is always executed.
145 See predicate_statements for the kinds of predication we support. */
146 static bool need_to_rewrite_undefined;
148 /* Indicate if there are any complicated PHIs that need to be handled in
149 if-conversion. Complicated PHI has more than two arguments and can't
150 be degenerated to two arguments PHI. See more information in comment
151 before phi_convertible_by_degenerating_args. */
152 static bool any_complicated_phi;
154 /* True if we have bitfield accesses we can lower. */
155 static bool need_to_lower_bitfields;
157 /* True if there is any ifcvting to be done. */
158 static bool need_to_ifcvt;
160 /* Hash for struct innermost_loop_behavior. It depends on the user to
161 free the memory. */
163 struct innermost_loop_behavior_hash : nofree_ptr_hash <innermost_loop_behavior>
165 static inline hashval_t hash (const value_type &);
166 static inline bool equal (const value_type &,
167 const compare_type &);
170 inline hashval_t
171 innermost_loop_behavior_hash::hash (const value_type &e)
173 hashval_t hash;
175 hash = iterative_hash_expr (e->base_address, 0);
176 hash = iterative_hash_expr (e->offset, hash);
177 hash = iterative_hash_expr (e->init, hash);
178 return iterative_hash_expr (e->step, hash);
181 inline bool
182 innermost_loop_behavior_hash::equal (const value_type &e1,
183 const compare_type &e2)
185 if ((e1->base_address && !e2->base_address)
186 || (!e1->base_address && e2->base_address)
187 || (!e1->offset && e2->offset)
188 || (e1->offset && !e2->offset)
189 || (!e1->init && e2->init)
190 || (e1->init && !e2->init)
191 || (!e1->step && e2->step)
192 || (e1->step && !e2->step))
193 return false;
195 if (e1->base_address && e2->base_address
196 && !operand_equal_p (e1->base_address, e2->base_address, 0))
197 return false;
198 if (e1->offset && e2->offset
199 && !operand_equal_p (e1->offset, e2->offset, 0))
200 return false;
201 if (e1->init && e2->init
202 && !operand_equal_p (e1->init, e2->init, 0))
203 return false;
204 if (e1->step && e2->step
205 && !operand_equal_p (e1->step, e2->step, 0))
206 return false;
208 return true;
211 /* List of basic blocks in if-conversion-suitable order. */
212 static basic_block *ifc_bbs;
214 /* Hash table to store <DR's innermost loop behavior, DR> pairs. */
215 static hash_map<innermost_loop_behavior_hash,
216 data_reference_p> *innermost_DR_map;
218 /* Hash table to store <base reference, DR> pairs. */
219 static hash_map<tree_operand_hash, data_reference_p> *baseref_DR_map;
221 /* List of redundant SSA names: the first should be replaced by the second. */
222 static vec< std::pair<tree, tree> > redundant_ssa_names;
224 /* Structure used to predicate basic blocks. This is attached to the
225 ->aux field of the BBs in the loop to be if-converted. */
226 struct bb_predicate {
228 /* The condition under which this basic block is executed. */
229 tree predicate;
231 /* PREDICATE is gimplified, and the sequence of statements is
232 recorded here, in order to avoid the duplication of computations
233 that occur in previous conditions. See PR44483. */
234 gimple_seq predicate_gimplified_stmts;
236 /* Records the number of statements recorded into
237 PREDICATE_GIMPLIFIED_STMTS. */
238 unsigned no_predicate_stmts;
241 /* Returns true when the basic block BB has a predicate. */
243 static inline bool
244 bb_has_predicate (basic_block bb)
246 return bb->aux != NULL;
249 /* Returns the gimplified predicate for basic block BB. */
251 static inline tree
252 bb_predicate (basic_block bb)
254 return ((struct bb_predicate *) bb->aux)->predicate;
257 /* Sets the gimplified predicate COND for basic block BB. */
259 static inline void
260 set_bb_predicate (basic_block bb, tree cond)
262 auto aux = (struct bb_predicate *) bb->aux;
263 gcc_assert ((TREE_CODE (cond) == TRUTH_NOT_EXPR
264 && is_gimple_val (TREE_OPERAND (cond, 0)))
265 || is_gimple_val (cond));
266 aux->predicate = cond;
267 aux->no_predicate_stmts++;
269 if (dump_file && (dump_flags & TDF_DETAILS))
270 fprintf (dump_file, "Recording block %d value %d\n", bb->index,
271 aux->no_predicate_stmts);
274 /* Returns the sequence of statements of the gimplification of the
275 predicate for basic block BB. */
277 static inline gimple_seq
278 bb_predicate_gimplified_stmts (basic_block bb)
280 return ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts;
283 /* Sets the sequence of statements STMTS of the gimplification of the
284 predicate for basic block BB. If PRESERVE_COUNTS then don't clear the predicate
285 counts. */
287 static inline void
288 set_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts,
289 bool preserve_counts)
291 ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts = stmts;
292 if (stmts == NULL && !preserve_counts)
293 ((struct bb_predicate *) bb->aux)->no_predicate_stmts = 0;
296 /* Adds the sequence of statements STMTS to the sequence of statements
297 of the predicate for basic block BB. */
299 static inline void
300 add_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
302 /* We might have updated some stmts in STMTS via force_gimple_operand
303 calling fold_stmt and that producing multiple stmts. Delink immediate
304 uses so update_ssa after loop versioning doesn't get confused for
305 the not yet inserted predicates.
306 ??? This should go away once we reliably avoid updating stmts
307 not in any BB. */
308 for (gimple_stmt_iterator gsi = gsi_start (stmts);
309 !gsi_end_p (gsi); gsi_next (&gsi))
311 gimple *stmt = gsi_stmt (gsi);
312 delink_stmt_imm_use (stmt);
313 gimple_set_modified (stmt, true);
314 ((struct bb_predicate *) bb->aux)->no_predicate_stmts++;
316 gimple_seq_add_seq_without_update
317 (&(((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts), stmts);
320 /* Return the number of statements the predicate of the basic block consists
321 of. */
323 static inline unsigned
324 get_bb_num_predicate_stmts (basic_block bb)
326 return ((struct bb_predicate *) bb->aux)->no_predicate_stmts;
329 /* Initializes to TRUE the predicate of basic block BB. */
331 static inline void
332 init_bb_predicate (basic_block bb)
334 bb->aux = XNEW (struct bb_predicate);
335 set_bb_predicate_gimplified_stmts (bb, NULL, false);
336 set_bb_predicate (bb, boolean_true_node);
339 /* Release the SSA_NAMEs associated with the predicate of basic block BB. */
341 static inline void
342 release_bb_predicate (basic_block bb)
344 gimple_seq stmts = bb_predicate_gimplified_stmts (bb);
345 if (stmts)
347 /* Ensure that these stmts haven't yet been added to a bb. */
348 if (flag_checking)
349 for (gimple_stmt_iterator i = gsi_start (stmts);
350 !gsi_end_p (i); gsi_next (&i))
351 gcc_assert (! gimple_bb (gsi_stmt (i)));
353 /* Discard them. */
354 gimple_seq_discard (stmts);
355 set_bb_predicate_gimplified_stmts (bb, NULL, false);
359 /* Free the predicate of basic block BB. */
361 static inline void
362 free_bb_predicate (basic_block bb)
364 if (!bb_has_predicate (bb))
365 return;
367 release_bb_predicate (bb);
368 free (bb->aux);
369 bb->aux = NULL;
372 /* Reinitialize predicate of BB with the true predicate. */
374 static inline void
375 reset_bb_predicate (basic_block bb)
377 if (!bb_has_predicate (bb))
378 init_bb_predicate (bb);
379 else
381 release_bb_predicate (bb);
382 set_bb_predicate (bb, boolean_true_node);
386 /* Returns a new SSA_NAME of type TYPE that is assigned the value of
387 the expression EXPR. Inserts the statement created for this
388 computation before GSI and leaves the iterator GSI at the same
389 statement. */
391 static tree
392 ifc_temp_var (tree type, tree expr, gimple_stmt_iterator *gsi)
394 tree new_name = make_temp_ssa_name (type, NULL, "_ifc_");
395 gimple *stmt = gimple_build_assign (new_name, expr);
396 gimple_set_vuse (stmt, gimple_vuse (gsi_stmt (*gsi)));
397 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
398 return new_name;
401 /* Return true when COND is a false predicate. */
403 static inline bool
404 is_false_predicate (tree cond)
406 return (cond != NULL_TREE
407 && (cond == boolean_false_node
408 || integer_zerop (cond)));
411 /* Return true when COND is a true predicate. */
413 static inline bool
414 is_true_predicate (tree cond)
416 return (cond == NULL_TREE
417 || cond == boolean_true_node
418 || integer_onep (cond));
421 /* Returns true when BB has a predicate that is not trivial: true or
422 NULL_TREE. */
424 static inline bool
425 is_predicated (basic_block bb)
427 return !is_true_predicate (bb_predicate (bb));
430 /* Parses the predicate COND and returns its comparison code and
431 operands OP0 and OP1. */
433 static enum tree_code
434 parse_predicate (tree cond, tree *op0, tree *op1)
436 gimple *s;
438 if (TREE_CODE (cond) == SSA_NAME
439 && is_gimple_assign (s = SSA_NAME_DEF_STMT (cond)))
441 if (TREE_CODE_CLASS (gimple_assign_rhs_code (s)) == tcc_comparison)
443 *op0 = gimple_assign_rhs1 (s);
444 *op1 = gimple_assign_rhs2 (s);
445 return gimple_assign_rhs_code (s);
448 else if (gimple_assign_rhs_code (s) == TRUTH_NOT_EXPR)
450 tree op = gimple_assign_rhs1 (s);
451 tree type = TREE_TYPE (op);
452 enum tree_code code = parse_predicate (op, op0, op1);
454 return code == ERROR_MARK ? ERROR_MARK
455 : invert_tree_comparison (code, HONOR_NANS (type));
458 return ERROR_MARK;
461 if (COMPARISON_CLASS_P (cond))
463 *op0 = TREE_OPERAND (cond, 0);
464 *op1 = TREE_OPERAND (cond, 1);
465 return TREE_CODE (cond);
468 return ERROR_MARK;
471 /* Returns the fold of predicate C1 OR C2 at location LOC. */
473 static tree
474 fold_or_predicates (location_t loc, tree c1, tree c2)
476 tree op1a, op1b, op2a, op2b;
477 enum tree_code code1 = parse_predicate (c1, &op1a, &op1b);
478 enum tree_code code2 = parse_predicate (c2, &op2a, &op2b);
480 if (code1 != ERROR_MARK && code2 != ERROR_MARK)
482 tree t = maybe_fold_or_comparisons (boolean_type_node, code1, op1a, op1b,
483 code2, op2a, op2b);
484 if (t)
485 return t;
488 return fold_build2_loc (loc, TRUTH_OR_EXPR, boolean_type_node, c1, c2);
491 /* Returns either a COND_EXPR or the folded expression if the folded
492 expression is a MIN_EXPR, a MAX_EXPR, an ABS_EXPR,
493 a constant or a SSA_NAME. */
495 static tree
496 fold_build_cond_expr (tree type, tree cond, tree rhs, tree lhs)
498 /* If COND is comparison r != 0 and r has boolean type, convert COND
499 to SSA_NAME to accept by vect bool pattern. */
500 if (TREE_CODE (cond) == NE_EXPR)
502 tree op0 = TREE_OPERAND (cond, 0);
503 tree op1 = TREE_OPERAND (cond, 1);
504 if (TREE_CODE (op0) == SSA_NAME
505 && TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE
506 && (integer_zerop (op1)))
507 cond = op0;
510 gimple_match_op cexpr (gimple_match_cond::UNCOND, COND_EXPR,
511 type, cond, rhs, lhs);
512 if (cexpr.resimplify (NULL, follow_all_ssa_edges))
514 if (gimple_simplified_result_is_gimple_val (&cexpr))
515 return cexpr.ops[0];
516 else if (cexpr.code == ABS_EXPR)
517 return build1 (ABS_EXPR, type, cexpr.ops[0]);
518 else if (cexpr.code == MIN_EXPR
519 || cexpr.code == MAX_EXPR)
520 return build2 ((tree_code)cexpr.code, type, cexpr.ops[0], cexpr.ops[1]);
523 return build3 (COND_EXPR, type, cond, rhs, lhs);
526 /* Add condition NC to the predicate list of basic block BB. LOOP is
527 the loop to be if-converted. Use predicate of cd-equivalent block
528 for join bb if it exists: we call basic blocks bb1 and bb2
529 cd-equivalent if they are executed under the same condition. */
531 static inline void
532 add_to_predicate_list (class loop *loop, basic_block bb, tree nc)
534 tree bc, *tp;
535 basic_block dom_bb;
537 if (is_true_predicate (nc))
538 return;
540 /* If dominance tells us this basic block is always executed,
541 don't record any predicates for it. */
542 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
543 return;
545 dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb);
546 /* We use notion of cd equivalence to get simpler predicate for
547 join block, e.g. if join block has 2 predecessors with predicates
548 p1 & p2 and p1 & !p2, we'd like to get p1 for it instead of
549 p1 & p2 | p1 & !p2. */
550 if (dom_bb != loop->header
551 && get_immediate_dominator (CDI_POST_DOMINATORS, dom_bb) == bb)
553 gcc_assert (flow_bb_inside_loop_p (loop, dom_bb));
554 bc = bb_predicate (dom_bb);
555 if (!is_true_predicate (bc))
556 set_bb_predicate (bb, bc);
557 else
558 gcc_assert (is_true_predicate (bb_predicate (bb)));
559 if (dump_file && (dump_flags & TDF_DETAILS))
560 fprintf (dump_file, "Use predicate of bb#%d for bb#%d\n",
561 dom_bb->index, bb->index);
562 return;
565 if (!is_predicated (bb))
566 bc = nc;
567 else
569 bc = bb_predicate (bb);
570 bc = fold_or_predicates (EXPR_LOCATION (bc), nc, bc);
571 if (is_true_predicate (bc))
573 reset_bb_predicate (bb);
574 return;
578 /* Allow a TRUTH_NOT_EXPR around the main predicate. */
579 if (TREE_CODE (bc) == TRUTH_NOT_EXPR)
580 tp = &TREE_OPERAND (bc, 0);
581 else
582 tp = &bc;
583 if (!is_gimple_val (*tp))
585 gimple_seq stmts;
586 *tp = force_gimple_operand (*tp, &stmts, true, NULL_TREE);
587 add_bb_predicate_gimplified_stmts (bb, stmts);
589 set_bb_predicate (bb, bc);
592 /* Add the condition COND to the previous condition PREV_COND, and add
593 this to the predicate list of the destination of edge E. LOOP is
594 the loop to be if-converted. */
596 static void
597 add_to_dst_predicate_list (class loop *loop, edge e,
598 tree prev_cond, tree cond)
600 if (!flow_bb_inside_loop_p (loop, e->dest))
601 return;
603 if (!is_true_predicate (prev_cond))
604 cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
605 prev_cond, cond);
607 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, e->dest))
608 add_to_predicate_list (loop, e->dest, cond);
611 /* Return true if one of the successor edges of BB exits LOOP. */
613 static bool
614 bb_with_exit_edge_p (class loop *loop, basic_block bb)
616 edge e;
617 edge_iterator ei;
619 FOR_EACH_EDGE (e, ei, bb->succs)
620 if (loop_exit_edge_p (loop, e))
621 return true;
623 return false;
626 /* Given PHI which has more than two arguments, this function checks if
627 it's if-convertible by degenerating its arguments. Specifically, if
628 below two conditions are satisfied:
630 1) Number of PHI arguments with different values equals to 2 and one
631 argument has the only occurrence.
632 2) The edge corresponding to the unique argument isn't critical edge.
634 Such PHI can be handled as PHIs have only two arguments. For example,
635 below PHI:
637 res = PHI <A_1(e1), A_1(e2), A_2(e3)>;
639 can be transformed into:
641 res = (predicate of e3) ? A_2 : A_1;
643 Return TRUE if it is the case, FALSE otherwise. */
645 static bool
646 phi_convertible_by_degenerating_args (gphi *phi)
648 edge e;
649 tree arg, t1 = NULL, t2 = NULL;
650 unsigned int i, i1 = 0, i2 = 0, n1 = 0, n2 = 0;
651 unsigned int num_args = gimple_phi_num_args (phi);
653 gcc_assert (num_args > 2);
655 for (i = 0; i < num_args; i++)
657 arg = gimple_phi_arg_def (phi, i);
658 if (t1 == NULL || operand_equal_p (t1, arg, 0))
660 n1++;
661 i1 = i;
662 t1 = arg;
664 else if (t2 == NULL || operand_equal_p (t2, arg, 0))
666 n2++;
667 i2 = i;
668 t2 = arg;
670 else
671 return false;
674 if (n1 != 1 && n2 != 1)
675 return false;
677 /* Check if the edge corresponding to the unique arg is critical. */
678 e = gimple_phi_arg_edge (phi, (n1 == 1) ? i1 : i2);
679 if (EDGE_COUNT (e->src->succs) > 1)
680 return false;
682 return true;
685 /* Return true when PHI is if-convertible. PHI is part of loop LOOP
686 and it belongs to basic block BB. Note at this point, it is sure
687 that PHI is if-convertible. This function updates global variable
688 ANY_COMPLICATED_PHI if PHI is complicated. */
690 static bool
691 if_convertible_phi_p (class loop *loop, basic_block bb, gphi *phi)
693 if (dump_file && (dump_flags & TDF_DETAILS))
695 fprintf (dump_file, "-------------------------\n");
696 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
699 if (bb != loop->header
700 && gimple_phi_num_args (phi) > 2
701 && !phi_convertible_by_degenerating_args (phi))
702 any_complicated_phi = true;
704 return true;
707 /* Records the status of a data reference. This struct is attached to
708 each DR->aux field. */
710 struct ifc_dr {
711 bool rw_unconditionally;
712 bool w_unconditionally;
713 bool written_at_least_once;
715 tree rw_predicate;
716 tree w_predicate;
717 tree base_w_predicate;
720 #define IFC_DR(DR) ((struct ifc_dr *) (DR)->aux)
721 #define DR_BASE_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->written_at_least_once)
722 #define DR_RW_UNCONDITIONALLY(DR) (IFC_DR (DR)->rw_unconditionally)
723 #define DR_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->w_unconditionally)
725 /* Iterates over DR's and stores refs, DR and base refs, DR pairs in
726 HASH tables. While storing them in HASH table, it checks if the
727 reference is unconditionally read or written and stores that as a flag
728 information. For base reference it checks if it is written atlest once
729 unconditionally and stores it as flag information along with DR.
730 In other words for every data reference A in STMT there exist other
731 accesses to a data reference with the same base with predicates that
732 add up (OR-up) to the true predicate: this ensures that the data
733 reference A is touched (read or written) on every iteration of the
734 if-converted loop. */
735 static void
736 hash_memrefs_baserefs_and_store_DRs_read_written_info (data_reference_p a)
739 data_reference_p *master_dr, *base_master_dr;
740 tree base_ref = DR_BASE_OBJECT (a);
741 innermost_loop_behavior *innermost = &DR_INNERMOST (a);
742 tree ca = bb_predicate (gimple_bb (DR_STMT (a)));
743 bool exist1, exist2;
745 master_dr = &innermost_DR_map->get_or_insert (innermost, &exist1);
746 if (!exist1)
747 *master_dr = a;
749 if (DR_IS_WRITE (a))
751 IFC_DR (*master_dr)->w_predicate
752 = fold_or_predicates (UNKNOWN_LOCATION, ca,
753 IFC_DR (*master_dr)->w_predicate);
754 if (is_true_predicate (IFC_DR (*master_dr)->w_predicate))
755 DR_W_UNCONDITIONALLY (*master_dr) = true;
757 IFC_DR (*master_dr)->rw_predicate
758 = fold_or_predicates (UNKNOWN_LOCATION, ca,
759 IFC_DR (*master_dr)->rw_predicate);
760 if (is_true_predicate (IFC_DR (*master_dr)->rw_predicate))
761 DR_RW_UNCONDITIONALLY (*master_dr) = true;
763 if (DR_IS_WRITE (a))
765 base_master_dr = &baseref_DR_map->get_or_insert (base_ref, &exist2);
766 if (!exist2)
767 *base_master_dr = a;
768 IFC_DR (*base_master_dr)->base_w_predicate
769 = fold_or_predicates (UNKNOWN_LOCATION, ca,
770 IFC_DR (*base_master_dr)->base_w_predicate);
771 if (is_true_predicate (IFC_DR (*base_master_dr)->base_w_predicate))
772 DR_BASE_W_UNCONDITIONALLY (*base_master_dr) = true;
776 /* Return TRUE if can prove the index IDX of an array reference REF is
777 within array bound. Return false otherwise. */
779 static bool
780 idx_within_array_bound (tree ref, tree *idx, void *dta)
782 wi::overflow_type overflow;
783 widest_int niter, valid_niter, delta, wi_step;
784 tree ev, init, step;
785 tree low, high;
786 class loop *loop = (class loop*) dta;
788 /* Only support within-bound access for array references. */
789 if (TREE_CODE (ref) != ARRAY_REF)
790 return false;
792 /* For arrays that might have flexible sizes, it is not guaranteed that they
793 do not extend over their declared size. */
794 if (array_ref_flexible_size_p (ref))
795 return false;
797 ev = analyze_scalar_evolution (loop, *idx);
798 ev = instantiate_parameters (loop, ev);
799 init = initial_condition (ev);
800 step = evolution_part_in_loop_num (ev, loop->num);
802 if (!init || TREE_CODE (init) != INTEGER_CST
803 || (step && TREE_CODE (step) != INTEGER_CST))
804 return false;
806 low = array_ref_low_bound (ref);
807 high = array_ref_up_bound (ref);
809 /* The case of nonconstant bounds could be handled, but it would be
810 complicated. */
811 if (TREE_CODE (low) != INTEGER_CST
812 || !high || TREE_CODE (high) != INTEGER_CST)
813 return false;
815 /* Check if the intial idx is within bound. */
816 if (wi::to_widest (init) < wi::to_widest (low)
817 || wi::to_widest (init) > wi::to_widest (high))
818 return false;
820 /* The idx is always within bound. */
821 if (!step || integer_zerop (step))
822 return true;
824 if (!max_loop_iterations (loop, &niter))
825 return false;
827 if (wi::to_widest (step) < 0)
829 delta = wi::to_widest (init) - wi::to_widest (low);
830 wi_step = -wi::to_widest (step);
832 else
834 delta = wi::to_widest (high) - wi::to_widest (init);
835 wi_step = wi::to_widest (step);
838 valid_niter = wi::div_floor (delta, wi_step, SIGNED, &overflow);
839 /* The iteration space of idx is within array bound. */
840 if (!overflow && niter <= valid_niter)
841 return true;
843 return false;
846 /* Return TRUE if ref is a within bound array reference. */
848 static bool
849 ref_within_array_bound (gimple *stmt, tree ref)
851 class loop *loop = loop_containing_stmt (stmt);
853 gcc_assert (loop != NULL);
854 return for_each_index (&ref, idx_within_array_bound, loop);
858 /* Given a memory reference expression T, return TRUE if base object
859 it refers to is writable. The base object of a memory reference
860 is the main object being referenced, which is returned by function
861 get_base_address. */
863 static bool
864 base_object_writable (tree ref)
866 tree base_tree = get_base_address (ref);
868 return (base_tree
869 && DECL_P (base_tree)
870 && decl_binds_to_current_def_p (base_tree)
871 && !TREE_READONLY (base_tree));
874 /* Return true when the memory references of STMT won't trap in the
875 if-converted code. There are two things that we have to check for:
877 - writes to memory occur to writable memory: if-conversion of
878 memory writes transforms the conditional memory writes into
879 unconditional writes, i.e. "if (cond) A[i] = foo" is transformed
880 into "A[i] = cond ? foo : A[i]", and as the write to memory may not
881 be executed at all in the original code, it may be a readonly
882 memory. To check that A is not const-qualified, we check that
883 there exists at least an unconditional write to A in the current
884 function.
886 - reads or writes to memory are valid memory accesses for every
887 iteration. To check that the memory accesses are correctly formed
888 and that we are allowed to read and write in these locations, we
889 check that the memory accesses to be if-converted occur at every
890 iteration unconditionally.
892 Returns true for the memory reference in STMT, same memory reference
893 is read or written unconditionally atleast once and the base memory
894 reference is written unconditionally once. This is to check reference
895 will not write fault. Also retuns true if the memory reference is
896 unconditionally read once then we are conditionally writing to memory
897 which is defined as read and write and is bound to the definition
898 we are seeing. */
899 static bool
900 ifcvt_memrefs_wont_trap (gimple *stmt, vec<data_reference_p> drs)
902 /* If DR didn't see a reference here we can't use it to tell
903 whether the ref traps or not. */
904 if (gimple_uid (stmt) == 0)
905 return false;
907 data_reference_p *master_dr, *base_master_dr;
908 data_reference_p a = drs[gimple_uid (stmt) - 1];
910 tree base = DR_BASE_OBJECT (a);
911 innermost_loop_behavior *innermost = &DR_INNERMOST (a);
913 gcc_assert (DR_STMT (a) == stmt);
914 gcc_assert (DR_BASE_ADDRESS (a) || DR_OFFSET (a)
915 || DR_INIT (a) || DR_STEP (a));
917 master_dr = innermost_DR_map->get (innermost);
918 gcc_assert (master_dr != NULL);
920 base_master_dr = baseref_DR_map->get (base);
922 /* If a is unconditionally written to it doesn't trap. */
923 if (DR_W_UNCONDITIONALLY (*master_dr))
924 return true;
926 /* If a is unconditionally accessed then ...
928 Even a is conditional access, we can treat it as an unconditional
929 one if it's an array reference and all its index are within array
930 bound. */
931 if (DR_RW_UNCONDITIONALLY (*master_dr)
932 || ref_within_array_bound (stmt, DR_REF (a)))
934 /* an unconditional read won't trap. */
935 if (DR_IS_READ (a))
936 return true;
938 /* an unconditionaly write won't trap if the base is written
939 to unconditionally. */
940 if (base_master_dr
941 && DR_BASE_W_UNCONDITIONALLY (*base_master_dr))
942 return flag_store_data_races;
943 /* or the base is known to be not readonly. */
944 else if (base_object_writable (DR_REF (a)))
945 return flag_store_data_races;
948 return false;
951 /* Return true if STMT could be converted into a masked load or store
952 (conditional load or store based on a mask computed from bb predicate). */
954 static bool
955 ifcvt_can_use_mask_load_store (gimple *stmt)
957 /* Check whether this is a load or store. */
958 tree lhs = gimple_assign_lhs (stmt);
959 bool is_load;
960 tree ref;
961 if (gimple_store_p (stmt))
963 if (!is_gimple_val (gimple_assign_rhs1 (stmt)))
964 return false;
965 is_load = false;
966 ref = lhs;
968 else if (gimple_assign_load_p (stmt))
970 is_load = true;
971 ref = gimple_assign_rhs1 (stmt);
973 else
974 return false;
976 if (may_be_nonaddressable_p (ref))
977 return false;
979 /* Mask should be integer mode of the same size as the load/store
980 mode. */
981 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
982 if (!int_mode_for_mode (mode).exists () || VECTOR_MODE_P (mode))
983 return false;
985 if (can_vec_mask_load_store_p (mode, VOIDmode, is_load))
986 return true;
988 return false;
991 /* Return true if STMT could be converted from an operation that is
992 unconditional to one that is conditional on a bb predicate mask. */
994 static bool
995 ifcvt_can_predicate (gimple *stmt)
997 basic_block bb = gimple_bb (stmt);
999 if (!(flag_tree_loop_vectorize || bb->loop_father->force_vectorize)
1000 || bb->loop_father->dont_vectorize
1001 || gimple_has_volatile_ops (stmt))
1002 return false;
1004 if (gimple_assign_single_p (stmt))
1005 return ifcvt_can_use_mask_load_store (stmt);
1007 tree_code code = gimple_assign_rhs_code (stmt);
1008 tree lhs_type = TREE_TYPE (gimple_assign_lhs (stmt));
1009 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
1010 if (!types_compatible_p (lhs_type, rhs_type))
1011 return false;
1012 internal_fn cond_fn = get_conditional_internal_fn (code);
1013 return (cond_fn != IFN_LAST
1014 && vectorized_internal_fn_supported_p (cond_fn, lhs_type));
1017 /* Return true when STMT is if-convertible.
1019 GIMPLE_ASSIGN statement is not if-convertible if,
1020 - it is not movable,
1021 - it could trap,
1022 - LHS is not var decl. */
1024 static bool
1025 if_convertible_gimple_assign_stmt_p (gimple *stmt,
1026 vec<data_reference_p> refs)
1028 tree lhs = gimple_assign_lhs (stmt);
1030 if (dump_file && (dump_flags & TDF_DETAILS))
1032 fprintf (dump_file, "-------------------------\n");
1033 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1036 if (!is_gimple_reg_type (TREE_TYPE (lhs)))
1037 return false;
1039 /* Some of these constrains might be too conservative. */
1040 if (stmt_ends_bb_p (stmt)
1041 || gimple_has_volatile_ops (stmt)
1042 || (TREE_CODE (lhs) == SSA_NAME
1043 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
1044 || gimple_has_side_effects (stmt))
1046 if (dump_file && (dump_flags & TDF_DETAILS))
1047 fprintf (dump_file, "stmt not suitable for ifcvt\n");
1048 return false;
1051 /* tree-into-ssa.cc uses GF_PLF_1, so avoid it, because
1052 in between if_convertible_loop_p and combine_blocks
1053 we can perform loop versioning. */
1054 gimple_set_plf (stmt, GF_PLF_2, false);
1056 if ((! gimple_vuse (stmt)
1057 || gimple_could_trap_p_1 (stmt, false, false)
1058 || ! ifcvt_memrefs_wont_trap (stmt, refs))
1059 && gimple_could_trap_p (stmt))
1061 if (ifcvt_can_predicate (stmt))
1063 gimple_set_plf (stmt, GF_PLF_2, true);
1064 need_to_predicate = true;
1065 return true;
1067 if (dump_file && (dump_flags & TDF_DETAILS))
1068 fprintf (dump_file, "tree could trap...\n");
1069 return false;
1071 else if ((INTEGRAL_TYPE_P (TREE_TYPE (lhs))
1072 || POINTER_TYPE_P (TREE_TYPE (lhs)))
1073 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs))
1074 && arith_code_with_undefined_signed_overflow
1075 (gimple_assign_rhs_code (stmt)))
1076 /* We have to rewrite stmts with undefined overflow. */
1077 need_to_rewrite_undefined = true;
1079 /* When if-converting stores force versioning, likewise if we
1080 ended up generating store data races. */
1081 if (gimple_vdef (stmt))
1082 need_to_predicate = true;
1084 return true;
1087 /* Return true when STMT is if-convertible.
1089 A statement is if-convertible if:
1090 - it is an if-convertible GIMPLE_ASSIGN,
1091 - it is a GIMPLE_LABEL or a GIMPLE_COND,
1092 - it is builtins call,
1093 - it is a call to a function with a SIMD clone. */
1095 static bool
1096 if_convertible_stmt_p (gimple *stmt, vec<data_reference_p> refs)
1098 switch (gimple_code (stmt))
1100 case GIMPLE_LABEL:
1101 case GIMPLE_DEBUG:
1102 case GIMPLE_COND:
1103 return true;
1105 case GIMPLE_ASSIGN:
1106 return if_convertible_gimple_assign_stmt_p (stmt, refs);
1108 case GIMPLE_CALL:
1110 tree fndecl = gimple_call_fndecl (stmt);
1111 if (fndecl)
1113 /* We can vectorize some builtins and functions with SIMD
1114 "inbranch" clones. */
1115 int flags = gimple_call_flags (stmt);
1116 struct cgraph_node *node = cgraph_node::get (fndecl);
1117 if ((flags & ECF_CONST)
1118 && !(flags & ECF_LOOPING_CONST_OR_PURE)
1119 && fndecl_built_in_p (fndecl))
1120 return true;
1121 if (node && node->simd_clones != NULL)
1122 /* Ensure that at least one clone can be "inbranch". */
1123 for (struct cgraph_node *n = node->simd_clones; n != NULL;
1124 n = n->simdclone->next_clone)
1125 if (n->simdclone->inbranch)
1127 gimple_set_plf (stmt, GF_PLF_2, true);
1128 need_to_predicate = true;
1129 return true;
1132 return false;
1135 default:
1136 /* Don't know what to do with 'em so don't do anything. */
1137 if (dump_file && (dump_flags & TDF_DETAILS))
1139 fprintf (dump_file, "don't know what to do\n");
1140 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1142 return false;
1146 /* Assumes that BB has more than 1 predecessors.
1147 Returns false if at least one successor is not on critical edge
1148 and true otherwise. */
1150 static inline bool
1151 all_preds_critical_p (basic_block bb)
1153 edge e;
1154 edge_iterator ei;
1156 FOR_EACH_EDGE (e, ei, bb->preds)
1157 if (EDGE_COUNT (e->src->succs) == 1)
1158 return false;
1159 return true;
1162 /* Return true when BB is if-convertible. This routine does not check
1163 basic block's statements and phis.
1165 A basic block is not if-convertible if:
1166 - it is non-empty and it is after the exit block (in BFS order),
1167 - it is after the exit block but before the latch,
1168 - its edges are not normal.
1170 EXIT_BB is the basic block containing the exit of the LOOP. BB is
1171 inside LOOP. */
1173 static bool
1174 if_convertible_bb_p (class loop *loop, basic_block bb, basic_block exit_bb)
1176 edge e;
1177 edge_iterator ei;
1179 if (dump_file && (dump_flags & TDF_DETAILS))
1180 fprintf (dump_file, "----------[%d]-------------\n", bb->index);
1182 if (EDGE_COUNT (bb->succs) > 2)
1183 return false;
1185 if (gcall *call = safe_dyn_cast <gcall *> (*gsi_last_bb (bb)))
1186 if (gimple_call_ctrl_altering_p (call))
1187 return false;
1189 if (exit_bb)
1191 if (bb != loop->latch)
1193 if (dump_file && (dump_flags & TDF_DETAILS))
1194 fprintf (dump_file, "basic block after exit bb but before latch\n");
1195 return false;
1197 else if (!empty_block_p (bb))
1199 if (dump_file && (dump_flags & TDF_DETAILS))
1200 fprintf (dump_file, "non empty basic block after exit bb\n");
1201 return false;
1203 else if (bb == loop->latch
1204 && bb != exit_bb
1205 && !dominated_by_p (CDI_DOMINATORS, bb, exit_bb))
1207 if (dump_file && (dump_flags & TDF_DETAILS))
1208 fprintf (dump_file, "latch is not dominated by exit_block\n");
1209 return false;
1213 /* Be less adventurous and handle only normal edges. */
1214 FOR_EACH_EDGE (e, ei, bb->succs)
1215 if (e->flags & (EDGE_EH | EDGE_ABNORMAL | EDGE_IRREDUCIBLE_LOOP))
1217 if (dump_file && (dump_flags & TDF_DETAILS))
1218 fprintf (dump_file, "Difficult to handle edges\n");
1219 return false;
1222 return true;
1225 /* Return true when all predecessor blocks of BB are visited. The
1226 VISITED bitmap keeps track of the visited blocks. */
1228 static bool
1229 pred_blocks_visited_p (basic_block bb, bitmap *visited)
1231 edge e;
1232 edge_iterator ei;
1233 FOR_EACH_EDGE (e, ei, bb->preds)
1234 if (!bitmap_bit_p (*visited, e->src->index))
1235 return false;
1237 return true;
1240 /* Get body of a LOOP in suitable order for if-conversion. It is
1241 caller's responsibility to deallocate basic block list.
1242 If-conversion suitable order is, breadth first sort (BFS) order
1243 with an additional constraint: select a block only if all its
1244 predecessors are already selected. */
1246 static basic_block *
1247 get_loop_body_in_if_conv_order (const class loop *loop)
1249 basic_block *blocks, *blocks_in_bfs_order;
1250 basic_block bb;
1251 bitmap visited;
1252 unsigned int index = 0;
1253 unsigned int visited_count = 0;
1255 gcc_assert (loop->num_nodes);
1256 gcc_assert (loop->latch != EXIT_BLOCK_PTR_FOR_FN (cfun));
1258 blocks = XCNEWVEC (basic_block, loop->num_nodes);
1259 visited = BITMAP_ALLOC (NULL);
1261 blocks_in_bfs_order = get_loop_body_in_bfs_order (loop);
1263 index = 0;
1264 while (index < loop->num_nodes)
1266 bb = blocks_in_bfs_order [index];
1268 if (bb->flags & BB_IRREDUCIBLE_LOOP)
1270 free (blocks_in_bfs_order);
1271 BITMAP_FREE (visited);
1272 free (blocks);
1273 return NULL;
1276 if (!bitmap_bit_p (visited, bb->index))
1278 if (pred_blocks_visited_p (bb, &visited)
1279 || bb == loop->header)
1281 /* This block is now visited. */
1282 bitmap_set_bit (visited, bb->index);
1283 blocks[visited_count++] = bb;
1287 index++;
1289 if (index == loop->num_nodes
1290 && visited_count != loop->num_nodes)
1291 /* Not done yet. */
1292 index = 0;
1294 free (blocks_in_bfs_order);
1295 BITMAP_FREE (visited);
1296 return blocks;
1299 /* Returns true when the analysis of the predicates for all the basic
1300 blocks in LOOP succeeded.
1302 predicate_bbs first allocates the predicates of the basic blocks.
1303 These fields are then initialized with the tree expressions
1304 representing the predicates under which a basic block is executed
1305 in the LOOP. As the loop->header is executed at each iteration, it
1306 has the "true" predicate. Other statements executed under a
1307 condition are predicated with that condition, for example
1309 | if (x)
1310 | S1;
1311 | else
1312 | S2;
1314 S1 will be predicated with "x", and
1315 S2 will be predicated with "!x". */
1317 static void
1318 predicate_bbs (loop_p loop)
1320 unsigned int i;
1322 for (i = 0; i < loop->num_nodes; i++)
1323 init_bb_predicate (ifc_bbs[i]);
1325 for (i = 0; i < loop->num_nodes; i++)
1327 basic_block bb = ifc_bbs[i];
1328 tree cond;
1330 /* The loop latch and loop exit block are always executed and
1331 have no extra conditions to be processed: skip them. */
1332 if (bb == loop->latch
1333 || bb_with_exit_edge_p (loop, bb))
1335 reset_bb_predicate (bb);
1336 continue;
1339 cond = bb_predicate (bb);
1340 if (gcond *stmt = safe_dyn_cast <gcond *> (*gsi_last_bb (bb)))
1342 tree c2;
1343 edge true_edge, false_edge;
1344 location_t loc = gimple_location (stmt);
1345 tree c;
1346 /* gcc.dg/fold-bopcond-1.c shows that despite all forwprop passes
1347 conditions can remain unfolded because of multiple uses so
1348 try to re-fold here, especially to get precision changing
1349 conversions sorted out. Do not simply fold the stmt since
1350 this is analysis only. When conditions were embedded in
1351 COND_EXPRs those were folded separately before folding the
1352 COND_EXPR but as they are now outside we have to make sure
1353 to fold them. Do it here - another opportunity would be to
1354 fold predicates as they are inserted. */
1355 gimple_match_op cexpr (gimple_match_cond::UNCOND,
1356 gimple_cond_code (stmt),
1357 boolean_type_node,
1358 gimple_cond_lhs (stmt),
1359 gimple_cond_rhs (stmt));
1360 if (cexpr.resimplify (NULL, follow_all_ssa_edges)
1361 && cexpr.code.is_tree_code ()
1362 && TREE_CODE_CLASS ((tree_code)cexpr.code) == tcc_comparison)
1363 c = build2_loc (loc, (tree_code)cexpr.code, boolean_type_node,
1364 cexpr.ops[0], cexpr.ops[1]);
1365 else
1366 c = build2_loc (loc, gimple_cond_code (stmt),
1367 boolean_type_node,
1368 gimple_cond_lhs (stmt),
1369 gimple_cond_rhs (stmt));
1371 /* Add new condition into destination's predicate list. */
1372 extract_true_false_edges_from_block (gimple_bb (stmt),
1373 &true_edge, &false_edge);
1375 /* If C is true, then TRUE_EDGE is taken. */
1376 add_to_dst_predicate_list (loop, true_edge, unshare_expr (cond),
1377 unshare_expr (c));
1379 /* If C is false, then FALSE_EDGE is taken. */
1380 c2 = build1_loc (loc, TRUTH_NOT_EXPR, boolean_type_node,
1381 unshare_expr (c));
1382 add_to_dst_predicate_list (loop, false_edge,
1383 unshare_expr (cond), c2);
1385 cond = NULL_TREE;
1388 /* If current bb has only one successor, then consider it as an
1389 unconditional goto. */
1390 if (single_succ_p (bb))
1392 basic_block bb_n = single_succ (bb);
1394 /* The successor bb inherits the predicate of its
1395 predecessor. If there is no predicate in the predecessor
1396 bb, then consider the successor bb as always executed. */
1397 if (cond == NULL_TREE)
1398 cond = boolean_true_node;
1400 add_to_predicate_list (loop, bb_n, cond);
1404 /* The loop header is always executed. */
1405 reset_bb_predicate (loop->header);
1406 gcc_assert (bb_predicate_gimplified_stmts (loop->header) == NULL
1407 && bb_predicate_gimplified_stmts (loop->latch) == NULL);
1410 /* Build region by adding loop pre-header and post-header blocks. */
1412 static vec<basic_block>
1413 build_region (class loop *loop)
1415 vec<basic_block> region = vNULL;
1416 basic_block exit_bb = NULL;
1418 gcc_assert (ifc_bbs);
1419 /* The first element is loop pre-header. */
1420 region.safe_push (loop_preheader_edge (loop)->src);
1422 for (unsigned int i = 0; i < loop->num_nodes; i++)
1424 basic_block bb = ifc_bbs[i];
1425 region.safe_push (bb);
1426 /* Find loop postheader. */
1427 edge e;
1428 edge_iterator ei;
1429 FOR_EACH_EDGE (e, ei, bb->succs)
1430 if (loop_exit_edge_p (loop, e))
1432 exit_bb = e->dest;
1433 break;
1436 /* The last element is loop post-header. */
1437 gcc_assert (exit_bb);
1438 region.safe_push (exit_bb);
1439 return region;
1442 /* Return true when LOOP is if-convertible. This is a helper function
1443 for if_convertible_loop_p. REFS and DDRS are initialized and freed
1444 in if_convertible_loop_p. */
1446 static bool
1447 if_convertible_loop_p_1 (class loop *loop, vec<data_reference_p> *refs)
1449 unsigned int i;
1450 basic_block exit_bb = NULL;
1451 vec<basic_block> region;
1453 calculate_dominance_info (CDI_DOMINATORS);
1455 for (i = 0; i < loop->num_nodes; i++)
1457 basic_block bb = ifc_bbs[i];
1459 if (!if_convertible_bb_p (loop, bb, exit_bb))
1460 return false;
1462 if (bb_with_exit_edge_p (loop, bb))
1463 exit_bb = bb;
1466 for (i = 0; i < loop->num_nodes; i++)
1468 basic_block bb = ifc_bbs[i];
1469 gimple_stmt_iterator gsi;
1471 bool may_have_nonlocal_labels
1472 = bb_with_exit_edge_p (loop, bb) || bb == loop->latch;
1473 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1474 switch (gimple_code (gsi_stmt (gsi)))
1476 case GIMPLE_LABEL:
1477 if (!may_have_nonlocal_labels)
1479 tree label
1480 = gimple_label_label (as_a <glabel *> (gsi_stmt (gsi)));
1481 if (DECL_NONLOCAL (label) || FORCED_LABEL (label))
1482 return false;
1484 /* Fallthru. */
1485 case GIMPLE_ASSIGN:
1486 case GIMPLE_CALL:
1487 case GIMPLE_DEBUG:
1488 case GIMPLE_COND:
1489 gimple_set_uid (gsi_stmt (gsi), 0);
1490 break;
1491 default:
1492 return false;
1496 data_reference_p dr;
1498 innermost_DR_map
1499 = new hash_map<innermost_loop_behavior_hash, data_reference_p>;
1500 baseref_DR_map = new hash_map<tree_operand_hash, data_reference_p>;
1502 /* Compute post-dominator tree locally. */
1503 region = build_region (loop);
1504 calculate_dominance_info_for_region (CDI_POST_DOMINATORS, region);
1506 predicate_bbs (loop);
1508 /* Free post-dominator tree since it is not used after predication. */
1509 free_dominance_info_for_region (cfun, CDI_POST_DOMINATORS, region);
1510 region.release ();
1512 for (i = 0; refs->iterate (i, &dr); i++)
1514 tree ref = DR_REF (dr);
1516 dr->aux = XNEW (struct ifc_dr);
1517 DR_BASE_W_UNCONDITIONALLY (dr) = false;
1518 DR_RW_UNCONDITIONALLY (dr) = false;
1519 DR_W_UNCONDITIONALLY (dr) = false;
1520 IFC_DR (dr)->rw_predicate = boolean_false_node;
1521 IFC_DR (dr)->w_predicate = boolean_false_node;
1522 IFC_DR (dr)->base_w_predicate = boolean_false_node;
1523 if (gimple_uid (DR_STMT (dr)) == 0)
1524 gimple_set_uid (DR_STMT (dr), i + 1);
1526 /* If DR doesn't have innermost loop behavior or it's a compound
1527 memory reference, we synthesize its innermost loop behavior
1528 for hashing. */
1529 if (TREE_CODE (ref) == COMPONENT_REF
1530 || TREE_CODE (ref) == IMAGPART_EXPR
1531 || TREE_CODE (ref) == REALPART_EXPR
1532 || !(DR_BASE_ADDRESS (dr) || DR_OFFSET (dr)
1533 || DR_INIT (dr) || DR_STEP (dr)))
1535 while (TREE_CODE (ref) == COMPONENT_REF
1536 || TREE_CODE (ref) == IMAGPART_EXPR
1537 || TREE_CODE (ref) == REALPART_EXPR)
1538 ref = TREE_OPERAND (ref, 0);
1540 memset (&DR_INNERMOST (dr), 0, sizeof (DR_INNERMOST (dr)));
1541 DR_BASE_ADDRESS (dr) = ref;
1543 hash_memrefs_baserefs_and_store_DRs_read_written_info (dr);
1546 for (i = 0; i < loop->num_nodes; i++)
1548 basic_block bb = ifc_bbs[i];
1549 gimple_stmt_iterator itr;
1551 /* Check the if-convertibility of statements in predicated BBs. */
1552 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1553 for (itr = gsi_start_bb (bb); !gsi_end_p (itr); gsi_next (&itr))
1554 if (!if_convertible_stmt_p (gsi_stmt (itr), *refs))
1555 return false;
1558 /* Checking PHIs needs to be done after stmts, as the fact whether there
1559 are any masked loads or stores affects the tests. */
1560 for (i = 0; i < loop->num_nodes; i++)
1562 basic_block bb = ifc_bbs[i];
1563 gphi_iterator itr;
1565 for (itr = gsi_start_phis (bb); !gsi_end_p (itr); gsi_next (&itr))
1566 if (!if_convertible_phi_p (loop, bb, itr.phi ()))
1567 return false;
1570 if (dump_file)
1571 fprintf (dump_file, "Applying if-conversion\n");
1573 return true;
1576 /* Return true when LOOP is if-convertible.
1577 LOOP is if-convertible if:
1578 - it is innermost,
1579 - it has two or more basic blocks,
1580 - it has only one exit,
1581 - loop header is not the exit edge,
1582 - if its basic blocks and phi nodes are if convertible. */
1584 static bool
1585 if_convertible_loop_p (class loop *loop, vec<data_reference_p> *refs)
1587 edge e;
1588 edge_iterator ei;
1589 bool res = false;
1591 /* Handle only innermost loop. */
1592 if (!loop || loop->inner)
1594 if (dump_file && (dump_flags & TDF_DETAILS))
1595 fprintf (dump_file, "not innermost loop\n");
1596 return false;
1599 /* If only one block, no need for if-conversion. */
1600 if (loop->num_nodes <= 2)
1602 if (dump_file && (dump_flags & TDF_DETAILS))
1603 fprintf (dump_file, "less than 2 basic blocks\n");
1604 return false;
1607 /* More than one loop exit is too much to handle. */
1608 if (!single_exit (loop))
1610 if (dump_file && (dump_flags & TDF_DETAILS))
1611 fprintf (dump_file, "multiple exits\n");
1612 return false;
1615 /* If one of the loop header's edge is an exit edge then do not
1616 apply if-conversion. */
1617 FOR_EACH_EDGE (e, ei, loop->header->succs)
1618 if (loop_exit_edge_p (loop, e))
1619 return false;
1621 res = if_convertible_loop_p_1 (loop, refs);
1623 delete innermost_DR_map;
1624 innermost_DR_map = NULL;
1626 delete baseref_DR_map;
1627 baseref_DR_map = NULL;
1629 return res;
1632 /* Return reduc_1 if has_nop.
1634 if (...)
1635 tmp1 = (unsigned type) reduc_1;
1636 tmp2 = tmp1 + rhs2;
1637 reduc_3 = (signed type) tmp2. */
1638 static tree
1639 strip_nop_cond_scalar_reduction (bool has_nop, tree op)
1641 if (!has_nop)
1642 return op;
1644 if (TREE_CODE (op) != SSA_NAME)
1645 return NULL_TREE;
1647 gassign *stmt = safe_dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op));
1648 if (!stmt
1649 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt))
1650 || !tree_nop_conversion_p (TREE_TYPE (op), TREE_TYPE
1651 (gimple_assign_rhs1 (stmt))))
1652 return NULL_TREE;
1654 return gimple_assign_rhs1 (stmt);
1657 /* Returns true if def-stmt for phi argument ARG is simple increment/decrement
1658 which is in predicated basic block.
1659 In fact, the following PHI pattern is searching:
1660 loop-header:
1661 reduc_1 = PHI <..., reduc_2>
1663 if (...)
1664 reduc_3 = ...
1665 reduc_2 = PHI <reduc_1, reduc_3>
1667 ARG_0 and ARG_1 are correspondent PHI arguments.
1668 REDUC, OP0 and OP1 contain reduction stmt and its operands.
1669 EXTENDED is true if PHI has > 2 arguments. */
1671 static bool
1672 is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1,
1673 tree *op0, tree *op1, bool extended, bool* has_nop,
1674 gimple **nop_reduc)
1676 tree lhs, r_op1, r_op2, r_nop1, r_nop2;
1677 gimple *stmt;
1678 gimple *header_phi = NULL;
1679 enum tree_code reduction_op;
1680 basic_block bb = gimple_bb (phi);
1681 class loop *loop = bb->loop_father;
1682 edge latch_e = loop_latch_edge (loop);
1683 imm_use_iterator imm_iter;
1684 use_operand_p use_p;
1685 edge e;
1686 edge_iterator ei;
1687 bool result = *has_nop = false;
1688 if (TREE_CODE (arg_0) != SSA_NAME || TREE_CODE (arg_1) != SSA_NAME)
1689 return false;
1691 if (!extended && gimple_code (SSA_NAME_DEF_STMT (arg_0)) == GIMPLE_PHI)
1693 lhs = arg_1;
1694 header_phi = SSA_NAME_DEF_STMT (arg_0);
1695 stmt = SSA_NAME_DEF_STMT (arg_1);
1697 else if (gimple_code (SSA_NAME_DEF_STMT (arg_1)) == GIMPLE_PHI)
1699 lhs = arg_0;
1700 header_phi = SSA_NAME_DEF_STMT (arg_1);
1701 stmt = SSA_NAME_DEF_STMT (arg_0);
1703 else
1704 return false;
1705 if (gimple_bb (header_phi) != loop->header)
1706 return false;
1708 if (PHI_ARG_DEF_FROM_EDGE (header_phi, latch_e) != PHI_RESULT (phi))
1709 return false;
1711 if (gimple_code (stmt) != GIMPLE_ASSIGN
1712 || gimple_has_volatile_ops (stmt))
1713 return false;
1715 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
1716 return false;
1718 if (!is_predicated (gimple_bb (stmt)))
1719 return false;
1721 /* Check that stmt-block is predecessor of phi-block. */
1722 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->succs)
1723 if (e->dest == bb)
1725 result = true;
1726 break;
1728 if (!result)
1729 return false;
1731 if (!has_single_use (lhs))
1732 return false;
1734 reduction_op = gimple_assign_rhs_code (stmt);
1736 /* Catch something like below
1738 loop-header:
1739 reduc_1 = PHI <..., reduc_2>
1741 if (...)
1742 tmp1 = (unsigned type) reduc_1;
1743 tmp2 = tmp1 + rhs2;
1744 reduc_3 = (signed type) tmp2;
1746 reduc_2 = PHI <reduc_1, reduc_3>
1748 and convert to
1750 reduc_2 = PHI <0, reduc_1>
1751 tmp1 = (unsigned type)reduc_1;
1752 ifcvt = cond_expr ? rhs2 : 0
1753 tmp2 = tmp1 +/- ifcvt;
1754 reduc_1 = (signed type)tmp2; */
1756 if (CONVERT_EXPR_CODE_P (reduction_op))
1758 lhs = gimple_assign_rhs1 (stmt);
1759 if (TREE_CODE (lhs) != SSA_NAME
1760 || !has_single_use (lhs))
1761 return false;
1763 *nop_reduc = stmt;
1764 stmt = SSA_NAME_DEF_STMT (lhs);
1765 if (gimple_bb (stmt) != gimple_bb (*nop_reduc)
1766 || !is_gimple_assign (stmt))
1767 return false;
1769 *has_nop = true;
1770 reduction_op = gimple_assign_rhs_code (stmt);
1773 if (reduction_op != PLUS_EXPR
1774 && reduction_op != MINUS_EXPR
1775 && reduction_op != MULT_EXPR
1776 && reduction_op != BIT_IOR_EXPR
1777 && reduction_op != BIT_XOR_EXPR
1778 && reduction_op != BIT_AND_EXPR)
1779 return false;
1780 r_op1 = gimple_assign_rhs1 (stmt);
1781 r_op2 = gimple_assign_rhs2 (stmt);
1783 r_nop1 = strip_nop_cond_scalar_reduction (*has_nop, r_op1);
1784 r_nop2 = strip_nop_cond_scalar_reduction (*has_nop, r_op2);
1786 /* Make R_OP1 to hold reduction variable. */
1787 if (r_nop2 == PHI_RESULT (header_phi)
1788 && commutative_tree_code (reduction_op))
1790 std::swap (r_op1, r_op2);
1791 std::swap (r_nop1, r_nop2);
1793 else if (r_nop1 != PHI_RESULT (header_phi))
1794 return false;
1796 if (*has_nop)
1798 /* Check that R_NOP1 is used in nop_stmt or in PHI only. */
1799 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_nop1)
1801 gimple *use_stmt = USE_STMT (use_p);
1802 if (is_gimple_debug (use_stmt))
1803 continue;
1804 if (use_stmt == SSA_NAME_DEF_STMT (r_op1))
1805 continue;
1806 if (use_stmt != phi)
1807 return false;
1811 /* Check that R_OP1 is used in reduction stmt or in PHI only. */
1812 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_op1)
1814 gimple *use_stmt = USE_STMT (use_p);
1815 if (is_gimple_debug (use_stmt))
1816 continue;
1817 if (use_stmt == stmt)
1818 continue;
1819 if (gimple_code (use_stmt) != GIMPLE_PHI)
1820 return false;
1823 *op0 = r_op1; *op1 = r_op2;
1824 *reduc = stmt;
1825 return true;
1828 /* Converts conditional scalar reduction into unconditional form, e.g.
1829 bb_4
1830 if (_5 != 0) goto bb_5 else goto bb_6
1831 end_bb_4
1832 bb_5
1833 res_6 = res_13 + 1;
1834 end_bb_5
1835 bb_6
1836 # res_2 = PHI <res_13(4), res_6(5)>
1837 end_bb_6
1839 will be converted into sequence
1840 _ifc__1 = _5 != 0 ? 1 : 0;
1841 res_2 = res_13 + _ifc__1;
1842 Argument SWAP tells that arguments of conditional expression should be
1843 swapped.
1844 Returns rhs of resulting PHI assignment. */
1846 static tree
1847 convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi,
1848 tree cond, tree op0, tree op1, bool swap,
1849 bool has_nop, gimple* nop_reduc)
1851 gimple_stmt_iterator stmt_it;
1852 gimple *new_assign;
1853 tree rhs;
1854 tree rhs1 = gimple_assign_rhs1 (reduc);
1855 tree tmp = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, "_ifc_");
1856 tree c;
1857 enum tree_code reduction_op = gimple_assign_rhs_code (reduc);
1858 tree op_nochange = neutral_op_for_reduction (TREE_TYPE (rhs1), reduction_op, NULL);
1859 gimple_seq stmts = NULL;
1861 if (dump_file && (dump_flags & TDF_DETAILS))
1863 fprintf (dump_file, "Found cond scalar reduction.\n");
1864 print_gimple_stmt (dump_file, reduc, 0, TDF_SLIM);
1867 /* Build cond expression using COND and constant operand
1868 of reduction rhs. */
1869 c = fold_build_cond_expr (TREE_TYPE (rhs1),
1870 unshare_expr (cond),
1871 swap ? op_nochange : op1,
1872 swap ? op1 : op_nochange);
1874 /* Create assignment stmt and insert it at GSI. */
1875 new_assign = gimple_build_assign (tmp, c);
1876 gsi_insert_before (gsi, new_assign, GSI_SAME_STMT);
1877 /* Build rhs for unconditional increment/decrement/logic_operation. */
1878 rhs = gimple_build (&stmts, reduction_op,
1879 TREE_TYPE (rhs1), op0, tmp);
1881 if (has_nop)
1883 rhs = gimple_convert (&stmts,
1884 TREE_TYPE (gimple_assign_lhs (nop_reduc)), rhs);
1885 stmt_it = gsi_for_stmt (nop_reduc);
1886 gsi_remove (&stmt_it, true);
1887 release_defs (nop_reduc);
1889 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
1891 /* Delete original reduction stmt. */
1892 stmt_it = gsi_for_stmt (reduc);
1893 gsi_remove (&stmt_it, true);
1894 release_defs (reduc);
1895 return rhs;
1898 /* Generate a simplified conditional. */
1900 static tree
1901 gen_simplified_condition (tree cond, scalar_cond_masked_set_type &cond_set)
1903 /* Check if the value is already live in a previous branch. This resolves
1904 nested conditionals from diamond PHI reductions. */
1905 if (TREE_CODE (cond) == SSA_NAME)
1907 gimple *stmt = SSA_NAME_DEF_STMT (cond);
1908 gassign *assign = NULL;
1909 if ((assign = as_a <gassign *> (stmt))
1910 && gimple_assign_rhs_code (assign) == BIT_AND_EXPR)
1912 tree arg1 = gimple_assign_rhs1 (assign);
1913 tree arg2 = gimple_assign_rhs2 (assign);
1914 if (cond_set.contains ({ arg1, 1 }))
1915 arg1 = boolean_true_node;
1916 else
1917 arg1 = gen_simplified_condition (arg1, cond_set);
1919 if (cond_set.contains ({ arg2, 1 }))
1920 arg2 = boolean_true_node;
1921 else
1922 arg2 = gen_simplified_condition (arg2, cond_set);
1924 cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, arg1, arg2);
1927 return cond;
1930 /* Produce condition for all occurrences of ARG in PHI node. Set *INVERT
1931 as to whether the condition is inverted. */
1933 static tree
1934 gen_phi_arg_condition (gphi *phi, vec<int> *occur, gimple_stmt_iterator *gsi,
1935 scalar_cond_masked_set_type &cond_set, bool *invert)
1937 int len;
1938 int i;
1939 tree cond = NULL_TREE;
1940 tree c;
1941 edge e;
1943 *invert = false;
1944 len = occur->length ();
1945 gcc_assert (len > 0);
1946 for (i = 0; i < len; i++)
1948 e = gimple_phi_arg_edge (phi, (*occur)[i]);
1949 c = bb_predicate (e->src);
1950 if (is_true_predicate (c))
1952 cond = c;
1953 break;
1955 /* If we have just a single inverted predicate, signal that and
1956 instead invert the COND_EXPR arms. */
1957 if (len == 1 && TREE_CODE (c) == TRUTH_NOT_EXPR)
1959 c = TREE_OPERAND (c, 0);
1960 *invert = true;
1963 c = gen_simplified_condition (c, cond_set);
1964 c = force_gimple_operand_gsi (gsi, unshare_expr (c),
1965 true, NULL_TREE, true, GSI_SAME_STMT);
1966 if (cond != NULL_TREE)
1968 /* Must build OR expression. */
1969 cond = fold_or_predicates (EXPR_LOCATION (c), c, cond);
1970 cond = force_gimple_operand_gsi (gsi, unshare_expr (cond), true,
1971 NULL_TREE, true, GSI_SAME_STMT);
1973 else
1974 cond = c;
1976 /* Register the new possibly simplified conditional. When more than 2
1977 entries in a phi node we chain entries in the false branch, so the
1978 inverted condition is active. */
1979 scalar_cond_masked_key pred_cond ({ cond, 1 });
1980 if (!*invert)
1981 pred_cond.inverted_p = !pred_cond.inverted_p;
1982 cond_set.add (pred_cond);
1984 gcc_assert (cond != NULL_TREE);
1985 return cond;
1988 /* Create the smallest nested conditional possible. On pre-order we record
1989 which conditionals are live, and on post-order rewrite the chain by removing
1990 already active conditions.
1992 As an example we simplify:
1994 _7 = a_10 < 0;
1995 _21 = a_10 >= 0;
1996 _22 = a_10 < e_11(D);
1997 _23 = _21 & _22;
1998 _ifc__42 = _23 ? t_13 : 0;
1999 t_6 = _7 ? 1 : _ifc__42
2001 into
2003 _7 = a_10 < 0;
2004 _22 = a_10 < e_11(D);
2005 _ifc__42 = _22 ? t_13 : 0;
2006 t_6 = _7 ? 1 : _ifc__42;
2008 which produces better code. */
2010 static tree
2011 gen_phi_nest_statement (gphi *phi, gimple_stmt_iterator *gsi,
2012 scalar_cond_masked_set_type &cond_set, tree type,
2013 hash_map<tree_operand_hash, auto_vec<int>> &phi_arg_map,
2014 gimple **res_stmt, tree lhs0, vec<tree> &args,
2015 unsigned idx)
2017 if (idx == args.length ())
2018 return args[idx - 1];
2020 vec<int> *indexes = phi_arg_map.get (args[idx - 1]);
2021 bool invert;
2022 tree cond = gen_phi_arg_condition (phi, indexes, gsi, cond_set, &invert);
2023 tree arg1 = gen_phi_nest_statement (phi, gsi, cond_set, type, phi_arg_map,
2024 res_stmt, lhs0, args, idx + 1);
2026 unsigned prev = idx;
2027 unsigned curr = prev - 1;
2028 tree arg0 = args[curr];
2029 tree rhs, lhs;
2030 if (idx > 1)
2031 lhs = make_temp_ssa_name (type, NULL, "_ifc_");
2032 else
2033 lhs = lhs0;
2035 if (invert)
2036 rhs = fold_build_cond_expr (type, unshare_expr (cond),
2037 arg1, arg0);
2038 else
2039 rhs = fold_build_cond_expr (type, unshare_expr (cond),
2040 arg0, arg1);
2041 gassign *new_stmt = gimple_build_assign (lhs, rhs);
2042 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2043 update_stmt (new_stmt);
2044 *res_stmt = new_stmt;
2045 return lhs;
2048 /* Replace a scalar PHI node with a COND_EXPR using COND as condition.
2049 This routine can handle PHI nodes with more than two arguments.
2051 For example,
2052 S1: A = PHI <x1(1), x2(5)>
2053 is converted into,
2054 S2: A = cond ? x1 : x2;
2056 The generated code is inserted at GSI that points to the top of
2057 basic block's statement list.
2058 If PHI node has more than two arguments a chain of conditional
2059 expression is produced. */
2062 static void
2063 predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
2065 gimple *new_stmt = NULL, *reduc, *nop_reduc;
2066 tree rhs, res, arg0, arg1, op0, op1, scev;
2067 tree cond;
2068 unsigned int index0;
2069 edge e;
2070 basic_block bb;
2071 unsigned int i;
2072 bool has_nop;
2074 res = gimple_phi_result (phi);
2075 if (virtual_operand_p (res))
2076 return;
2078 if ((rhs = degenerate_phi_result (phi))
2079 || ((scev = analyze_scalar_evolution (gimple_bb (phi)->loop_father,
2080 res))
2081 && !chrec_contains_undetermined (scev)
2082 && scev != res
2083 && (rhs = gimple_phi_arg_def (phi, 0))))
2085 if (dump_file && (dump_flags & TDF_DETAILS))
2087 fprintf (dump_file, "Degenerate phi!\n");
2088 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
2090 new_stmt = gimple_build_assign (res, rhs);
2091 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2092 update_stmt (new_stmt);
2093 return;
2096 bb = gimple_bb (phi);
2097 /* Keep track of conditionals already seen. */
2098 scalar_cond_masked_set_type cond_set;
2099 if (EDGE_COUNT (bb->preds) == 2)
2101 /* Predicate ordinary PHI node with 2 arguments. */
2102 edge first_edge, second_edge;
2103 basic_block true_bb;
2104 first_edge = EDGE_PRED (bb, 0);
2105 second_edge = EDGE_PRED (bb, 1);
2106 cond = bb_predicate (first_edge->src);
2107 cond_set.add ({ cond, 1 });
2108 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2109 std::swap (first_edge, second_edge);
2110 if (EDGE_COUNT (first_edge->src->succs) > 1)
2112 cond = bb_predicate (second_edge->src);
2113 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2114 cond = TREE_OPERAND (cond, 0);
2115 else
2116 first_edge = second_edge;
2118 else
2119 cond = bb_predicate (first_edge->src);
2121 /* Gimplify the condition to a valid cond-expr conditonal operand. */
2122 cond = gen_simplified_condition (cond, cond_set);
2123 cond = force_gimple_operand_gsi (gsi, unshare_expr (cond), true,
2124 NULL_TREE, true, GSI_SAME_STMT);
2125 true_bb = first_edge->src;
2126 if (EDGE_PRED (bb, 1)->src == true_bb)
2128 arg0 = gimple_phi_arg_def (phi, 1);
2129 arg1 = gimple_phi_arg_def (phi, 0);
2131 else
2133 arg0 = gimple_phi_arg_def (phi, 0);
2134 arg1 = gimple_phi_arg_def (phi, 1);
2136 if (is_cond_scalar_reduction (phi, &reduc, arg0, arg1,
2137 &op0, &op1, false, &has_nop,
2138 &nop_reduc))
2140 /* Convert reduction stmt into vectorizable form. */
2141 rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
2142 true_bb != gimple_bb (reduc),
2143 has_nop, nop_reduc);
2144 redundant_ssa_names.safe_push (std::make_pair (res, rhs));
2146 else
2147 /* Build new RHS using selected condition and arguments. */
2148 rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
2149 arg0, arg1);
2150 new_stmt = gimple_build_assign (res, rhs);
2151 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2152 gimple_stmt_iterator new_gsi = gsi_for_stmt (new_stmt);
2153 if (fold_stmt (&new_gsi, follow_all_ssa_edges))
2155 new_stmt = gsi_stmt (new_gsi);
2156 update_stmt (new_stmt);
2159 if (dump_file && (dump_flags & TDF_DETAILS))
2161 fprintf (dump_file, "new phi replacement stmt\n");
2162 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
2164 return;
2167 /* Create hashmap for PHI node which contain vector of argument indexes
2168 having the same value. */
2169 bool swap = false;
2170 hash_map<tree_operand_hash, auto_vec<int> > phi_arg_map;
2171 unsigned int num_args = gimple_phi_num_args (phi);
2172 /* Vector of different PHI argument values. */
2173 auto_vec<tree> args (num_args);
2175 /* Compute phi_arg_map. */
2176 for (i = 0; i < num_args; i++)
2178 tree arg;
2180 arg = gimple_phi_arg_def (phi, i);
2181 if (!phi_arg_map.get (arg))
2182 args.quick_push (arg);
2183 phi_arg_map.get_or_insert (arg).safe_push (i);
2186 /* Determine element with max number of occurrences and complexity. Looking at only
2187 number of occurrences as a measure for complexity isn't enough as all usages can
2188 be unique but the comparisons to reach the PHI node differ per branch. */
2189 typedef std::pair <tree, std::pair <unsigned, unsigned>> ArgEntry;
2190 auto_vec<ArgEntry> argsKV;
2191 for (i = 0; i < args.length (); i++)
2193 unsigned int len = 0;
2194 for (int index : phi_arg_map.get (args[i]))
2196 edge e = gimple_phi_arg_edge (phi, index);
2197 len += get_bb_num_predicate_stmts (e->src);
2200 unsigned occur = phi_arg_map.get (args[i])->length ();
2201 if (dump_file && (dump_flags & TDF_DETAILS))
2202 fprintf (dump_file, "Ranking %d as len=%d, idx=%d\n", i, len, occur);
2203 argsKV.safe_push ({ args[i], { len, occur }});
2206 /* Sort elements based on rankings ARGS. */
2207 std::sort(argsKV.begin(), argsKV.end(), [](const ArgEntry &left,
2208 const ArgEntry &right) {
2209 return left.second < right.second;
2212 for (i = 0; i < args.length (); i++)
2213 args[i] = argsKV[i].first;
2215 /* Handle one special case when number of arguments with different values
2216 is equal 2 and one argument has the only occurrence. Such PHI can be
2217 handled as if would have only 2 arguments. */
2218 if (args.length () == 2 && phi_arg_map.get (args[0])->length () == 1)
2220 vec<int> *indexes;
2221 indexes = phi_arg_map.get (args[0]);
2222 index0 = (*indexes)[0];
2223 arg0 = args[0];
2224 arg1 = args[1];
2225 e = gimple_phi_arg_edge (phi, index0);
2226 cond = bb_predicate (e->src);
2227 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2229 swap = true;
2230 cond = TREE_OPERAND (cond, 0);
2232 /* Gimplify the condition to a valid cond-expr conditonal operand. */
2233 cond = force_gimple_operand_gsi (gsi, unshare_expr (cond), true,
2234 NULL_TREE, true, GSI_SAME_STMT);
2235 if (!(is_cond_scalar_reduction (phi, &reduc, arg0 , arg1,
2236 &op0, &op1, true, &has_nop, &nop_reduc)))
2237 rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
2238 swap? arg1 : arg0,
2239 swap? arg0 : arg1);
2240 else
2242 /* Convert reduction stmt into vectorizable form. */
2243 rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
2244 swap,has_nop, nop_reduc);
2245 redundant_ssa_names.safe_push (std::make_pair (res, rhs));
2247 new_stmt = gimple_build_assign (res, rhs);
2248 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2249 update_stmt (new_stmt);
2251 else
2253 /* Common case. */
2254 tree type = TREE_TYPE (gimple_phi_result (phi));
2255 gen_phi_nest_statement (phi, gsi, cond_set, type, phi_arg_map,
2256 &new_stmt, res, args, 1);
2259 if (dump_file && (dump_flags & TDF_DETAILS))
2261 fprintf (dump_file, "new extended phi replacement stmt\n");
2262 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
2266 /* Replaces in LOOP all the scalar phi nodes other than those in the
2267 LOOP->header block with conditional modify expressions. */
2269 static void
2270 predicate_all_scalar_phis (class loop *loop)
2272 basic_block bb;
2273 unsigned int orig_loop_num_nodes = loop->num_nodes;
2274 unsigned int i;
2276 for (i = 1; i < orig_loop_num_nodes; i++)
2278 gphi *phi;
2279 gimple_stmt_iterator gsi;
2280 gphi_iterator phi_gsi;
2281 bb = ifc_bbs[i];
2283 if (bb == loop->header)
2284 continue;
2286 phi_gsi = gsi_start_phis (bb);
2287 if (gsi_end_p (phi_gsi))
2288 continue;
2290 gsi = gsi_after_labels (bb);
2291 while (!gsi_end_p (phi_gsi))
2293 phi = phi_gsi.phi ();
2294 if (virtual_operand_p (gimple_phi_result (phi)))
2295 gsi_next (&phi_gsi);
2296 else
2298 predicate_scalar_phi (phi, &gsi);
2299 remove_phi_node (&phi_gsi, false);
2305 /* Insert in each basic block of LOOP the statements produced by the
2306 gimplification of the predicates. */
2308 static void
2309 insert_gimplified_predicates (loop_p loop)
2311 unsigned int i;
2313 for (i = 0; i < loop->num_nodes; i++)
2315 basic_block bb = ifc_bbs[i];
2316 gimple_seq stmts;
2317 if (!is_predicated (bb))
2318 gcc_assert (bb_predicate_gimplified_stmts (bb) == NULL);
2319 if (!is_predicated (bb))
2321 /* Do not insert statements for a basic block that is not
2322 predicated. Also make sure that the predicate of the
2323 basic block is set to true. */
2324 reset_bb_predicate (bb);
2325 continue;
2328 stmts = bb_predicate_gimplified_stmts (bb);
2329 if (stmts)
2331 if (need_to_predicate)
2333 /* Insert the predicate of the BB just after the label,
2334 as the if-conversion of memory writes will use this
2335 predicate. */
2336 gimple_stmt_iterator gsi = gsi_after_labels (bb);
2337 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2339 else
2341 /* Insert the predicate of the BB at the end of the BB
2342 as this would reduce the register pressure: the only
2343 use of this predicate will be in successor BBs. */
2344 gimple_stmt_iterator gsi = gsi_last_bb (bb);
2346 if (gsi_end_p (gsi)
2347 || stmt_ends_bb_p (gsi_stmt (gsi)))
2348 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2349 else
2350 gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
2353 /* Once the sequence is code generated, set it to NULL. */
2354 set_bb_predicate_gimplified_stmts (bb, NULL, true);
2359 /* Helper function for predicate_statements. Returns index of existent
2360 mask if it was created for given SIZE and -1 otherwise. */
2362 static int
2363 mask_exists (int size, const vec<int> &vec)
2365 unsigned int ix;
2366 int v;
2367 FOR_EACH_VEC_ELT (vec, ix, v)
2368 if (v == size)
2369 return (int) ix;
2370 return -1;
2373 /* Helper function for predicate_statements. STMT is a memory read or
2374 write and it needs to be predicated by MASK. Return a statement
2375 that does so. */
2377 static gimple *
2378 predicate_load_or_store (gimple_stmt_iterator *gsi, gassign *stmt, tree mask)
2380 gcall *new_stmt;
2382 tree lhs = gimple_assign_lhs (stmt);
2383 tree rhs = gimple_assign_rhs1 (stmt);
2384 tree ref = TREE_CODE (lhs) == SSA_NAME ? rhs : lhs;
2385 mark_addressable (ref);
2386 tree addr = force_gimple_operand_gsi (gsi, build_fold_addr_expr (ref),
2387 true, NULL_TREE, true, GSI_SAME_STMT);
2388 tree ptr = build_int_cst (reference_alias_ptr_type (ref),
2389 get_object_alignment (ref));
2390 /* Copy points-to info if possible. */
2391 if (TREE_CODE (addr) == SSA_NAME && !SSA_NAME_PTR_INFO (addr))
2392 copy_ref_info (build2 (MEM_REF, TREE_TYPE (ref), addr, ptr),
2393 ref);
2394 if (TREE_CODE (lhs) == SSA_NAME)
2396 new_stmt
2397 = gimple_build_call_internal (IFN_MASK_LOAD, 3, addr,
2398 ptr, mask);
2399 gimple_call_set_lhs (new_stmt, lhs);
2400 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
2402 else
2404 new_stmt
2405 = gimple_build_call_internal (IFN_MASK_STORE, 4, addr, ptr,
2406 mask, rhs);
2407 gimple_move_vops (new_stmt, stmt);
2409 gimple_call_set_nothrow (new_stmt, true);
2410 return new_stmt;
2413 /* STMT uses OP_LHS. Check whether it is equivalent to:
2415 ... = OP_MASK ? OP_LHS : X;
2417 Return X if so, otherwise return null. OP_MASK is an SSA_NAME that is
2418 known to have value OP_COND. */
2420 static tree
2421 check_redundant_cond_expr (gimple *stmt, tree op_mask, tree op_cond,
2422 tree op_lhs)
2424 gassign *assign = dyn_cast <gassign *> (stmt);
2425 if (!assign || gimple_assign_rhs_code (assign) != COND_EXPR)
2426 return NULL_TREE;
2428 tree use_cond = gimple_assign_rhs1 (assign);
2429 tree if_true = gimple_assign_rhs2 (assign);
2430 tree if_false = gimple_assign_rhs3 (assign);
2432 if ((use_cond == op_mask || operand_equal_p (use_cond, op_cond, 0))
2433 && if_true == op_lhs)
2434 return if_false;
2436 if (inverse_conditions_p (use_cond, op_cond) && if_false == op_lhs)
2437 return if_true;
2439 return NULL_TREE;
2442 /* Return true if VALUE is available for use at STMT. SSA_NAMES is
2443 the set of SSA names defined earlier in STMT's block. */
2445 static bool
2446 value_available_p (gimple *stmt, hash_set<tree_ssa_name_hash> *ssa_names,
2447 tree value)
2449 if (is_gimple_min_invariant (value))
2450 return true;
2452 if (TREE_CODE (value) == SSA_NAME)
2454 if (SSA_NAME_IS_DEFAULT_DEF (value))
2455 return true;
2457 basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (value));
2458 basic_block use_bb = gimple_bb (stmt);
2459 return (def_bb == use_bb
2460 ? ssa_names->contains (value)
2461 : dominated_by_p (CDI_DOMINATORS, use_bb, def_bb));
2464 return false;
2467 /* Helper function for predicate_statements. STMT is a potentially-trapping
2468 arithmetic operation that needs to be predicated by MASK, an SSA_NAME that
2469 has value COND. Return a statement that does so. SSA_NAMES is the set of
2470 SSA names defined earlier in STMT's block. */
2472 static gimple *
2473 predicate_rhs_code (gassign *stmt, tree mask, tree cond,
2474 hash_set<tree_ssa_name_hash> *ssa_names)
2476 tree lhs = gimple_assign_lhs (stmt);
2477 tree_code code = gimple_assign_rhs_code (stmt);
2478 unsigned int nops = gimple_num_ops (stmt);
2479 internal_fn cond_fn = get_conditional_internal_fn (code);
2481 /* Construct the arguments to the conditional internal function. */
2482 auto_vec<tree, 8> args;
2483 args.safe_grow (nops + 1, true);
2484 args[0] = mask;
2485 for (unsigned int i = 1; i < nops; ++i)
2486 args[i] = gimple_op (stmt, i);
2487 args[nops] = NULL_TREE;
2489 /* Look for uses of the result to see whether they are COND_EXPRs that can
2490 be folded into the conditional call. */
2491 imm_use_iterator imm_iter;
2492 gimple *use_stmt;
2493 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs)
2495 tree new_else = check_redundant_cond_expr (use_stmt, mask, cond, lhs);
2496 if (new_else && value_available_p (stmt, ssa_names, new_else))
2498 if (!args[nops])
2499 args[nops] = new_else;
2500 if (operand_equal_p (new_else, args[nops], 0))
2502 /* We have:
2504 LHS = IFN_COND (MASK, ..., ELSE);
2505 X = MASK ? LHS : ELSE;
2507 which makes X equivalent to LHS. */
2508 tree use_lhs = gimple_assign_lhs (use_stmt);
2509 redundant_ssa_names.safe_push (std::make_pair (use_lhs, lhs));
2513 if (!args[nops])
2514 args[nops] = targetm.preferred_else_value (cond_fn, TREE_TYPE (lhs),
2515 nops - 1, &args[1]);
2517 /* Create and insert the call. */
2518 gcall *new_stmt = gimple_build_call_internal_vec (cond_fn, args);
2519 gimple_call_set_lhs (new_stmt, lhs);
2520 gimple_call_set_nothrow (new_stmt, true);
2522 return new_stmt;
2525 /* Predicate each write to memory in LOOP.
2527 This function transforms control flow constructs containing memory
2528 writes of the form:
2530 | for (i = 0; i < N; i++)
2531 | if (cond)
2532 | A[i] = expr;
2534 into the following form that does not contain control flow:
2536 | for (i = 0; i < N; i++)
2537 | A[i] = cond ? expr : A[i];
2539 The original CFG looks like this:
2541 | bb_0
2542 | i = 0
2543 | end_bb_0
2545 | bb_1
2546 | if (i < N) goto bb_5 else goto bb_2
2547 | end_bb_1
2549 | bb_2
2550 | cond = some_computation;
2551 | if (cond) goto bb_3 else goto bb_4
2552 | end_bb_2
2554 | bb_3
2555 | A[i] = expr;
2556 | goto bb_4
2557 | end_bb_3
2559 | bb_4
2560 | goto bb_1
2561 | end_bb_4
2563 insert_gimplified_predicates inserts the computation of the COND
2564 expression at the beginning of the destination basic block:
2566 | bb_0
2567 | i = 0
2568 | end_bb_0
2570 | bb_1
2571 | if (i < N) goto bb_5 else goto bb_2
2572 | end_bb_1
2574 | bb_2
2575 | cond = some_computation;
2576 | if (cond) goto bb_3 else goto bb_4
2577 | end_bb_2
2579 | bb_3
2580 | cond = some_computation;
2581 | A[i] = expr;
2582 | goto bb_4
2583 | end_bb_3
2585 | bb_4
2586 | goto bb_1
2587 | end_bb_4
2589 predicate_statements is then predicating the memory write as follows:
2591 | bb_0
2592 | i = 0
2593 | end_bb_0
2595 | bb_1
2596 | if (i < N) goto bb_5 else goto bb_2
2597 | end_bb_1
2599 | bb_2
2600 | if (cond) goto bb_3 else goto bb_4
2601 | end_bb_2
2603 | bb_3
2604 | cond = some_computation;
2605 | A[i] = cond ? expr : A[i];
2606 | goto bb_4
2607 | end_bb_3
2609 | bb_4
2610 | goto bb_1
2611 | end_bb_4
2613 and finally combine_blocks removes the basic block boundaries making
2614 the loop vectorizable:
2616 | bb_0
2617 | i = 0
2618 | if (i < N) goto bb_5 else goto bb_1
2619 | end_bb_0
2621 | bb_1
2622 | cond = some_computation;
2623 | A[i] = cond ? expr : A[i];
2624 | if (i < N) goto bb_5 else goto bb_4
2625 | end_bb_1
2627 | bb_4
2628 | goto bb_1
2629 | end_bb_4
2632 static void
2633 predicate_statements (loop_p loop)
2635 unsigned int i, orig_loop_num_nodes = loop->num_nodes;
2636 auto_vec<int, 1> vect_sizes;
2637 auto_vec<tree, 1> vect_masks;
2638 hash_set<tree_ssa_name_hash> ssa_names;
2640 for (i = 1; i < orig_loop_num_nodes; i++)
2642 gimple_stmt_iterator gsi;
2643 basic_block bb = ifc_bbs[i];
2644 tree cond = bb_predicate (bb);
2645 bool swap;
2646 int index;
2648 if (is_true_predicate (cond))
2649 continue;
2651 swap = false;
2652 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2654 swap = true;
2655 cond = TREE_OPERAND (cond, 0);
2658 vect_sizes.truncate (0);
2659 vect_masks.truncate (0);
2661 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
2663 gassign *stmt = dyn_cast <gassign *> (gsi_stmt (gsi));
2664 tree lhs;
2665 if (!stmt)
2667 else if (is_false_predicate (cond)
2668 && gimple_vdef (stmt))
2670 unlink_stmt_vdef (stmt);
2671 gsi_remove (&gsi, true);
2672 release_defs (stmt);
2673 continue;
2675 else if (gimple_plf (stmt, GF_PLF_2)
2676 && is_gimple_assign (stmt))
2678 tree lhs = gimple_assign_lhs (stmt);
2679 tree mask;
2680 gimple *new_stmt;
2681 gimple_seq stmts = NULL;
2682 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
2683 /* We checked before setting GF_PLF_2 that an equivalent
2684 integer mode exists. */
2685 int bitsize = GET_MODE_BITSIZE (mode).to_constant ();
2686 if (!vect_sizes.is_empty ()
2687 && (index = mask_exists (bitsize, vect_sizes)) != -1)
2688 /* Use created mask. */
2689 mask = vect_masks[index];
2690 else
2692 if (COMPARISON_CLASS_P (cond))
2693 mask = gimple_build (&stmts, TREE_CODE (cond),
2694 boolean_type_node,
2695 TREE_OPERAND (cond, 0),
2696 TREE_OPERAND (cond, 1));
2697 else
2698 mask = cond;
2700 if (swap)
2702 tree true_val
2703 = constant_boolean_node (true, TREE_TYPE (mask));
2704 mask = gimple_build (&stmts, BIT_XOR_EXPR,
2705 TREE_TYPE (mask), mask, true_val);
2707 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2709 /* Save mask and its size for further use. */
2710 vect_sizes.safe_push (bitsize);
2711 vect_masks.safe_push (mask);
2713 if (gimple_assign_single_p (stmt))
2714 new_stmt = predicate_load_or_store (&gsi, stmt, mask);
2715 else
2716 new_stmt = predicate_rhs_code (stmt, mask, cond, &ssa_names);
2718 gsi_replace (&gsi, new_stmt, true);
2720 else if (((lhs = gimple_assign_lhs (stmt)), true)
2721 && (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2722 || POINTER_TYPE_P (TREE_TYPE (lhs)))
2723 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs))
2724 && arith_code_with_undefined_signed_overflow
2725 (gimple_assign_rhs_code (stmt)))
2727 gsi_remove (&gsi, true);
2728 gimple_seq stmts = rewrite_to_defined_overflow (stmt);
2729 bool first = true;
2730 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
2731 !gsi_end_p (gsi2);)
2733 gassign *stmt2 = as_a <gassign *> (gsi_stmt (gsi2));
2734 gsi_remove (&gsi2, false);
2735 if (first)
2737 gsi_insert_before (&gsi, stmt2, GSI_NEW_STMT);
2738 first = false;
2740 else
2741 gsi_insert_after (&gsi, stmt2, GSI_NEW_STMT);
2744 else if (gimple_vdef (stmt))
2746 tree lhs = gimple_assign_lhs (stmt);
2747 tree rhs = gimple_assign_rhs1 (stmt);
2748 tree type = TREE_TYPE (lhs);
2750 lhs = ifc_temp_var (type, unshare_expr (lhs), &gsi);
2751 rhs = ifc_temp_var (type, unshare_expr (rhs), &gsi);
2752 if (swap)
2753 std::swap (lhs, rhs);
2754 cond = force_gimple_operand_gsi (&gsi, unshare_expr (cond), true,
2755 NULL_TREE, true, GSI_SAME_STMT);
2756 rhs = fold_build_cond_expr (type, unshare_expr (cond), rhs, lhs);
2757 gimple_assign_set_rhs1 (stmt, ifc_temp_var (type, rhs, &gsi));
2758 update_stmt (stmt);
2761 if (gimple_plf (gsi_stmt (gsi), GF_PLF_2)
2762 && is_gimple_call (gsi_stmt (gsi)))
2764 /* Convert functions that have a SIMD clone to IFN_MASK_CALL.
2765 This will cause the vectorizer to match the "in branch"
2766 clone variants, and serves to build the mask vector
2767 in a natural way. */
2768 gcall *call = dyn_cast <gcall *> (gsi_stmt (gsi));
2769 tree orig_fn = gimple_call_fn (call);
2770 int orig_nargs = gimple_call_num_args (call);
2771 auto_vec<tree> args;
2772 args.safe_push (orig_fn);
2773 for (int i = 0; i < orig_nargs; i++)
2774 args.safe_push (gimple_call_arg (call, i));
2775 args.safe_push (cond);
2777 /* Replace the call with a IFN_MASK_CALL that has the extra
2778 condition parameter. */
2779 gcall *new_call = gimple_build_call_internal_vec (IFN_MASK_CALL,
2780 args);
2781 gimple_call_set_lhs (new_call, gimple_call_lhs (call));
2782 gsi_replace (&gsi, new_call, true);
2785 lhs = gimple_get_lhs (gsi_stmt (gsi));
2786 if (lhs && TREE_CODE (lhs) == SSA_NAME)
2787 ssa_names.add (lhs);
2788 gsi_next (&gsi);
2790 ssa_names.empty ();
2794 /* Remove all GIMPLE_CONDs and GIMPLE_LABELs of all the basic blocks
2795 other than the exit and latch of the LOOP. Also resets the
2796 GIMPLE_DEBUG information. */
2798 static void
2799 remove_conditions_and_labels (loop_p loop)
2801 gimple_stmt_iterator gsi;
2802 unsigned int i;
2804 for (i = 0; i < loop->num_nodes; i++)
2806 basic_block bb = ifc_bbs[i];
2808 if (bb_with_exit_edge_p (loop, bb)
2809 || bb == loop->latch)
2810 continue;
2812 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
2813 switch (gimple_code (gsi_stmt (gsi)))
2815 case GIMPLE_COND:
2816 case GIMPLE_LABEL:
2817 gsi_remove (&gsi, true);
2818 break;
2820 case GIMPLE_DEBUG:
2821 /* ??? Should there be conditional GIMPLE_DEBUG_BINDs? */
2822 if (gimple_debug_bind_p (gsi_stmt (gsi)))
2824 gimple_debug_bind_reset_value (gsi_stmt (gsi));
2825 update_stmt (gsi_stmt (gsi));
2827 gsi_next (&gsi);
2828 break;
2830 default:
2831 gsi_next (&gsi);
2836 /* Combine all the basic blocks from LOOP into one or two super basic
2837 blocks. Replace PHI nodes with conditional modify expressions. */
2839 static void
2840 combine_blocks (class loop *loop)
2842 basic_block bb, exit_bb, merge_target_bb;
2843 unsigned int orig_loop_num_nodes = loop->num_nodes;
2844 unsigned int i;
2845 edge e;
2846 edge_iterator ei;
2848 remove_conditions_and_labels (loop);
2849 insert_gimplified_predicates (loop);
2850 predicate_all_scalar_phis (loop);
2852 if (need_to_predicate || need_to_rewrite_undefined)
2853 predicate_statements (loop);
2855 /* Merge basic blocks. */
2856 exit_bb = NULL;
2857 bool *predicated = XNEWVEC (bool, orig_loop_num_nodes);
2858 for (i = 0; i < orig_loop_num_nodes; i++)
2860 bb = ifc_bbs[i];
2861 predicated[i] = !is_true_predicate (bb_predicate (bb));
2862 free_bb_predicate (bb);
2863 if (bb_with_exit_edge_p (loop, bb))
2865 gcc_assert (exit_bb == NULL);
2866 exit_bb = bb;
2869 gcc_assert (exit_bb != loop->latch);
2871 merge_target_bb = loop->header;
2873 /* Get at the virtual def valid for uses starting at the first block
2874 we merge into the header. Without a virtual PHI the loop has the
2875 same virtual use on all stmts. */
2876 gphi *vphi = get_virtual_phi (loop->header);
2877 tree last_vdef = NULL_TREE;
2878 if (vphi)
2880 last_vdef = gimple_phi_result (vphi);
2881 for (gimple_stmt_iterator gsi = gsi_start_bb (loop->header);
2882 ! gsi_end_p (gsi); gsi_next (&gsi))
2883 if (gimple_vdef (gsi_stmt (gsi)))
2884 last_vdef = gimple_vdef (gsi_stmt (gsi));
2886 for (i = 1; i < orig_loop_num_nodes; i++)
2888 gimple_stmt_iterator gsi;
2889 gimple_stmt_iterator last;
2891 bb = ifc_bbs[i];
2893 if (bb == exit_bb || bb == loop->latch)
2894 continue;
2896 /* We release virtual PHIs late because we have to propagate them
2897 out using the current VUSE. The def might be the one used
2898 after the loop. */
2899 vphi = get_virtual_phi (bb);
2900 if (vphi)
2902 /* When there's just loads inside the loop a stray virtual
2903 PHI merging the uses can appear, update last_vdef from
2904 it. */
2905 if (!last_vdef)
2906 last_vdef = gimple_phi_arg_def (vphi, 0);
2907 imm_use_iterator iter;
2908 use_operand_p use_p;
2909 gimple *use_stmt;
2910 FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi))
2912 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2913 SET_USE (use_p, last_vdef);
2915 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (vphi)))
2916 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (last_vdef) = 1;
2917 gsi = gsi_for_stmt (vphi);
2918 remove_phi_node (&gsi, true);
2921 /* Make stmts member of loop->header and clear range info from all stmts
2922 in BB which is now no longer executed conditional on a predicate we
2923 could have derived it from. */
2924 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2926 gimple *stmt = gsi_stmt (gsi);
2927 gimple_set_bb (stmt, merge_target_bb);
2928 /* Update virtual operands. */
2929 if (last_vdef)
2931 use_operand_p use_p = ssa_vuse_operand (stmt);
2932 if (use_p
2933 && USE_FROM_PTR (use_p) != last_vdef)
2934 SET_USE (use_p, last_vdef);
2935 if (gimple_vdef (stmt))
2936 last_vdef = gimple_vdef (stmt);
2938 else
2939 /* If this is the first load we arrive at update last_vdef
2940 so we handle stray PHIs correctly. */
2941 last_vdef = gimple_vuse (stmt);
2942 if (predicated[i])
2944 ssa_op_iter i;
2945 tree op;
2946 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF)
2947 reset_flow_sensitive_info (op);
2951 /* Update stmt list. */
2952 last = gsi_last_bb (merge_target_bb);
2953 gsi_insert_seq_after_without_update (&last, bb_seq (bb), GSI_NEW_STMT);
2954 set_bb_seq (bb, NULL);
2957 /* Fixup virtual operands in the exit block. */
2958 if (exit_bb
2959 && exit_bb != loop->header)
2961 /* We release virtual PHIs late because we have to propagate them
2962 out using the current VUSE. The def might be the one used
2963 after the loop. */
2964 vphi = get_virtual_phi (exit_bb);
2965 if (vphi)
2967 /* When there's just loads inside the loop a stray virtual
2968 PHI merging the uses can appear, update last_vdef from
2969 it. */
2970 if (!last_vdef)
2971 last_vdef = gimple_phi_arg_def (vphi, 0);
2972 imm_use_iterator iter;
2973 use_operand_p use_p;
2974 gimple *use_stmt;
2975 FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi))
2977 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2978 SET_USE (use_p, last_vdef);
2980 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (vphi)))
2981 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (last_vdef) = 1;
2982 gimple_stmt_iterator gsi = gsi_for_stmt (vphi);
2983 remove_phi_node (&gsi, true);
2987 /* Now remove all the edges in the loop, except for those from the exit
2988 block and delete the blocks we elided. */
2989 for (i = 1; i < orig_loop_num_nodes; i++)
2991 bb = ifc_bbs[i];
2993 for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));)
2995 if (e->src == exit_bb)
2996 ei_next (&ei);
2997 else
2998 remove_edge (e);
3001 for (i = 1; i < orig_loop_num_nodes; i++)
3003 bb = ifc_bbs[i];
3005 if (bb == exit_bb || bb == loop->latch)
3006 continue;
3008 delete_basic_block (bb);
3011 /* Re-connect the exit block. */
3012 if (exit_bb != NULL)
3014 if (exit_bb != loop->header)
3016 /* Connect this node to loop header. */
3017 make_single_succ_edge (loop->header, exit_bb, EDGE_FALLTHRU);
3018 set_immediate_dominator (CDI_DOMINATORS, exit_bb, loop->header);
3021 /* Redirect non-exit edges to loop->latch. */
3022 FOR_EACH_EDGE (e, ei, exit_bb->succs)
3024 if (!loop_exit_edge_p (loop, e))
3025 redirect_edge_and_branch (e, loop->latch);
3027 set_immediate_dominator (CDI_DOMINATORS, loop->latch, exit_bb);
3029 else
3031 /* If the loop does not have an exit, reconnect header and latch. */
3032 make_edge (loop->header, loop->latch, EDGE_FALLTHRU);
3033 set_immediate_dominator (CDI_DOMINATORS, loop->latch, loop->header);
3036 /* If possible, merge loop header to the block with the exit edge.
3037 This reduces the number of basic blocks to two, to please the
3038 vectorizer that handles only loops with two nodes. */
3039 if (exit_bb
3040 && exit_bb != loop->header)
3042 if (can_merge_blocks_p (loop->header, exit_bb))
3043 merge_blocks (loop->header, exit_bb);
3046 free (ifc_bbs);
3047 ifc_bbs = NULL;
3048 free (predicated);
3051 /* Version LOOP before if-converting it; the original loop
3052 will be if-converted, the new copy of the loop will not,
3053 and the LOOP_VECTORIZED internal call will be guarding which
3054 loop to execute. The vectorizer pass will fold this
3055 internal call into either true or false.
3057 Note that this function intentionally invalidates profile. Both edges
3058 out of LOOP_VECTORIZED must have 100% probability so the profile remains
3059 consistent after the condition is folded in the vectorizer. */
3061 static class loop *
3062 version_loop_for_if_conversion (class loop *loop, vec<gimple *> *preds)
3064 basic_block cond_bb;
3065 tree cond = make_ssa_name (boolean_type_node);
3066 class loop *new_loop;
3067 gimple *g;
3068 gimple_stmt_iterator gsi;
3069 unsigned int save_length = 0;
3071 g = gimple_build_call_internal (IFN_LOOP_VECTORIZED, 2,
3072 build_int_cst (integer_type_node, loop->num),
3073 integer_zero_node);
3074 gimple_call_set_lhs (g, cond);
3076 void **saved_preds = NULL;
3077 if (any_complicated_phi || need_to_predicate)
3079 /* Save BB->aux around loop_version as that uses the same field. */
3080 save_length = loop->inner ? loop->inner->num_nodes : loop->num_nodes;
3081 saved_preds = XALLOCAVEC (void *, save_length);
3082 for (unsigned i = 0; i < save_length; i++)
3083 saved_preds[i] = ifc_bbs[i]->aux;
3086 initialize_original_copy_tables ();
3087 /* At this point we invalidate porfile confistency until IFN_LOOP_VECTORIZED
3088 is re-merged in the vectorizer. */
3089 new_loop = loop_version (loop, cond, &cond_bb,
3090 profile_probability::always (),
3091 profile_probability::always (),
3092 profile_probability::always (),
3093 profile_probability::always (), true);
3094 free_original_copy_tables ();
3096 if (any_complicated_phi || need_to_predicate)
3097 for (unsigned i = 0; i < save_length; i++)
3098 ifc_bbs[i]->aux = saved_preds[i];
3100 if (new_loop == NULL)
3101 return NULL;
3103 new_loop->dont_vectorize = true;
3104 new_loop->force_vectorize = false;
3105 gsi = gsi_last_bb (cond_bb);
3106 gimple_call_set_arg (g, 1, build_int_cst (integer_type_node, new_loop->num));
3107 if (preds)
3108 preds->safe_push (g);
3109 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3110 update_ssa (TODO_update_ssa_no_phi);
3111 return new_loop;
3114 /* Return true when LOOP satisfies the follow conditions that will
3115 allow it to be recognized by the vectorizer for outer-loop
3116 vectorization:
3117 - The loop is not the root node of the loop tree.
3118 - The loop has exactly one inner loop.
3119 - The loop has a single exit.
3120 - The loop header has a single successor, which is the inner
3121 loop header.
3122 - Each of the inner and outer loop latches have a single
3123 predecessor.
3124 - The loop exit block has a single predecessor, which is the
3125 inner loop's exit block. */
3127 static bool
3128 versionable_outer_loop_p (class loop *loop)
3130 if (!loop_outer (loop)
3131 || loop->dont_vectorize
3132 || !loop->inner
3133 || loop->inner->next
3134 || !single_exit (loop)
3135 || !single_succ_p (loop->header)
3136 || single_succ (loop->header) != loop->inner->header
3137 || !single_pred_p (loop->latch)
3138 || !single_pred_p (loop->inner->latch))
3139 return false;
3141 basic_block outer_exit = single_pred (loop->latch);
3142 basic_block inner_exit = single_pred (loop->inner->latch);
3144 if (!single_pred_p (outer_exit) || single_pred (outer_exit) != inner_exit)
3145 return false;
3147 if (dump_file)
3148 fprintf (dump_file, "Found vectorizable outer loop for versioning\n");
3150 return true;
3153 /* Performs splitting of critical edges. Skip splitting and return false
3154 if LOOP will not be converted because:
3156 - LOOP is not well formed.
3157 - LOOP has PHI with more than MAX_PHI_ARG_NUM arguments.
3159 Last restriction is valid only if AGGRESSIVE_IF_CONV is false. */
3161 static bool
3162 ifcvt_split_critical_edges (class loop *loop, bool aggressive_if_conv)
3164 basic_block *body;
3165 basic_block bb;
3166 unsigned int num = loop->num_nodes;
3167 unsigned int i;
3168 edge e;
3169 edge_iterator ei;
3170 auto_vec<edge> critical_edges;
3172 /* Loop is not well formed. */
3173 if (loop->inner)
3174 return false;
3176 body = get_loop_body (loop);
3177 for (i = 0; i < num; i++)
3179 bb = body[i];
3180 if (!aggressive_if_conv
3181 && phi_nodes (bb)
3182 && EDGE_COUNT (bb->preds) > MAX_PHI_ARG_NUM)
3184 if (dump_file && (dump_flags & TDF_DETAILS))
3185 fprintf (dump_file,
3186 "BB %d has complicated PHI with more than %u args.\n",
3187 bb->index, MAX_PHI_ARG_NUM);
3189 free (body);
3190 return false;
3192 if (bb == loop->latch || bb_with_exit_edge_p (loop, bb))
3193 continue;
3195 /* Skip basic blocks not ending with conditional branch. */
3196 if (!safe_is_a <gcond *> (*gsi_last_bb (bb)))
3197 continue;
3199 FOR_EACH_EDGE (e, ei, bb->succs)
3200 if (EDGE_CRITICAL_P (e) && e->dest->loop_father == loop)
3201 critical_edges.safe_push (e);
3203 free (body);
3205 while (critical_edges.length () > 0)
3207 e = critical_edges.pop ();
3208 /* Don't split if bb can be predicated along non-critical edge. */
3209 if (EDGE_COUNT (e->dest->preds) > 2 || all_preds_critical_p (e->dest))
3210 split_edge (e);
3213 return true;
3216 /* Delete redundant statements produced by predication which prevents
3217 loop vectorization. */
3219 static void
3220 ifcvt_local_dce (class loop *loop)
3222 gimple *stmt;
3223 gimple *stmt1;
3224 gimple *phi;
3225 gimple_stmt_iterator gsi;
3226 auto_vec<gimple *> worklist;
3227 enum gimple_code code;
3228 use_operand_p use_p;
3229 imm_use_iterator imm_iter;
3231 /* The loop has a single BB only. */
3232 basic_block bb = loop->header;
3233 tree latch_vdef = NULL_TREE;
3235 worklist.create (64);
3236 /* Consider all phi as live statements. */
3237 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3239 phi = gsi_stmt (gsi);
3240 gimple_set_plf (phi, GF_PLF_2, true);
3241 worklist.safe_push (phi);
3242 if (virtual_operand_p (gimple_phi_result (phi)))
3243 latch_vdef = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
3245 /* Consider load/store statements, CALL and COND as live. */
3246 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3248 stmt = gsi_stmt (gsi);
3249 if (is_gimple_debug (stmt))
3251 gimple_set_plf (stmt, GF_PLF_2, true);
3252 continue;
3254 if (gimple_store_p (stmt) || gimple_assign_load_p (stmt))
3256 gimple_set_plf (stmt, GF_PLF_2, true);
3257 worklist.safe_push (stmt);
3258 continue;
3260 code = gimple_code (stmt);
3261 if (code == GIMPLE_COND || code == GIMPLE_CALL)
3263 gimple_set_plf (stmt, GF_PLF_2, true);
3264 worklist.safe_push (stmt);
3265 continue;
3267 gimple_set_plf (stmt, GF_PLF_2, false);
3269 if (code == GIMPLE_ASSIGN)
3271 tree lhs = gimple_assign_lhs (stmt);
3272 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
3274 stmt1 = USE_STMT (use_p);
3275 if (!is_gimple_debug (stmt1) && gimple_bb (stmt1) != bb)
3277 gimple_set_plf (stmt, GF_PLF_2, true);
3278 worklist.safe_push (stmt);
3279 break;
3284 /* Propagate liveness through arguments of live stmt. */
3285 while (worklist.length () > 0)
3287 ssa_op_iter iter;
3288 use_operand_p use_p;
3289 tree use;
3291 stmt = worklist.pop ();
3292 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
3294 use = USE_FROM_PTR (use_p);
3295 if (TREE_CODE (use) != SSA_NAME)
3296 continue;
3297 stmt1 = SSA_NAME_DEF_STMT (use);
3298 if (gimple_bb (stmt1) != bb || gimple_plf (stmt1, GF_PLF_2))
3299 continue;
3300 gimple_set_plf (stmt1, GF_PLF_2, true);
3301 worklist.safe_push (stmt1);
3304 /* Delete dead statements. */
3305 gsi = gsi_last_bb (bb);
3306 while (!gsi_end_p (gsi))
3308 gimple_stmt_iterator gsiprev = gsi;
3309 gsi_prev (&gsiprev);
3310 stmt = gsi_stmt (gsi);
3311 if (gimple_store_p (stmt) && gimple_vdef (stmt))
3313 tree lhs = gimple_get_lhs (stmt);
3314 ao_ref write;
3315 ao_ref_init (&write, lhs);
3317 if (dse_classify_store (&write, stmt, false, NULL, NULL, latch_vdef)
3318 == DSE_STORE_DEAD)
3319 delete_dead_or_redundant_assignment (&gsi, "dead");
3320 gsi = gsiprev;
3321 continue;
3324 if (gimple_plf (stmt, GF_PLF_2))
3326 gsi = gsiprev;
3327 continue;
3329 if (dump_file && (dump_flags & TDF_DETAILS))
3331 fprintf (dump_file, "Delete dead stmt in bb#%d\n", bb->index);
3332 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3334 gsi_remove (&gsi, true);
3335 release_defs (stmt);
3336 gsi = gsiprev;
3340 /* Return true if VALUE is already available on edge PE. */
3342 static bool
3343 ifcvt_available_on_edge_p (edge pe, tree value)
3345 if (is_gimple_min_invariant (value))
3346 return true;
3348 if (TREE_CODE (value) == SSA_NAME)
3350 basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (value));
3351 if (!def_bb || dominated_by_p (CDI_DOMINATORS, pe->dest, def_bb))
3352 return true;
3355 return false;
3358 /* Return true if STMT can be hoisted from if-converted loop LOOP to
3359 edge PE. */
3361 static bool
3362 ifcvt_can_hoist (class loop *loop, edge pe, gimple *stmt)
3364 if (auto *call = dyn_cast<gcall *> (stmt))
3366 if (gimple_call_internal_p (call)
3367 && internal_fn_mask_index (gimple_call_internal_fn (call)) >= 0)
3368 return false;
3370 else if (auto *assign = dyn_cast<gassign *> (stmt))
3372 if (gimple_assign_rhs_code (assign) == COND_EXPR)
3373 return false;
3375 else
3376 return false;
3378 if (gimple_has_side_effects (stmt)
3379 || gimple_could_trap_p (stmt)
3380 || stmt_could_throw_p (cfun, stmt)
3381 || gimple_vdef (stmt)
3382 || gimple_vuse (stmt))
3383 return false;
3385 int num_args = gimple_num_args (stmt);
3386 if (pe != loop_preheader_edge (loop))
3388 for (int i = 0; i < num_args; ++i)
3389 if (!ifcvt_available_on_edge_p (pe, gimple_arg (stmt, i)))
3390 return false;
3392 else
3394 for (int i = 0; i < num_args; ++i)
3395 if (!expr_invariant_in_loop_p (loop, gimple_arg (stmt, i)))
3396 return false;
3399 return true;
3402 /* Hoist invariant statements from LOOP to edge PE. */
3404 static void
3405 ifcvt_hoist_invariants (class loop *loop, edge pe)
3407 gimple_stmt_iterator hoist_gsi = {};
3408 unsigned int num_blocks = loop->num_nodes;
3409 basic_block *body = get_loop_body (loop);
3410 for (unsigned int i = 0; i < num_blocks; ++i)
3411 for (gimple_stmt_iterator gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi);)
3413 gimple *stmt = gsi_stmt (gsi);
3414 if (ifcvt_can_hoist (loop, pe, stmt))
3416 /* Once we've hoisted one statement, insert other statements
3417 after it. */
3418 gsi_remove (&gsi, false);
3419 if (hoist_gsi.ptr)
3420 gsi_insert_after (&hoist_gsi, stmt, GSI_NEW_STMT);
3421 else
3423 gsi_insert_on_edge_immediate (pe, stmt);
3424 hoist_gsi = gsi_for_stmt (stmt);
3426 continue;
3428 gsi_next (&gsi);
3430 free (body);
3433 /* Returns the DECL_FIELD_BIT_OFFSET of the bitfield accesse in stmt iff its
3434 type mode is not BLKmode. If BITPOS is not NULL it will hold the poly_int64
3435 value of the DECL_FIELD_BIT_OFFSET of the bitfield access and STRUCT_EXPR,
3436 if not NULL, will hold the tree representing the base struct of this
3437 bitfield. */
3439 static tree
3440 get_bitfield_rep (gassign *stmt, bool write, tree *bitpos,
3441 tree *struct_expr)
3443 tree comp_ref = write ? gimple_assign_lhs (stmt)
3444 : gimple_assign_rhs1 (stmt);
3446 tree field_decl = TREE_OPERAND (comp_ref, 1);
3447 tree rep_decl = DECL_BIT_FIELD_REPRESENTATIVE (field_decl);
3449 /* Bail out if the representative is not a suitable type for a scalar
3450 register variable. */
3451 if (!is_gimple_reg_type (TREE_TYPE (rep_decl)))
3452 return NULL_TREE;
3454 /* Bail out if the DECL_SIZE of the field_decl isn't the same as the BF's
3455 precision. */
3456 unsigned HOST_WIDE_INT bf_prec
3457 = TYPE_PRECISION (TREE_TYPE (gimple_assign_lhs (stmt)));
3458 if (compare_tree_int (DECL_SIZE (field_decl), bf_prec) != 0)
3459 return NULL_TREE;
3461 if (struct_expr)
3462 *struct_expr = TREE_OPERAND (comp_ref, 0);
3464 if (bitpos)
3466 /* To calculate the bitposition of the BITFIELD_REF we have to determine
3467 where our bitfield starts in relation to the container REP_DECL. The
3468 DECL_FIELD_OFFSET of the original bitfield's member FIELD_DECL tells
3469 us how many bytes from the start of the structure there are until the
3470 start of the group of bitfield members the FIELD_DECL belongs to,
3471 whereas DECL_FIELD_BIT_OFFSET will tell us how many bits from that
3472 position our actual bitfield member starts. For the container
3473 REP_DECL adding DECL_FIELD_OFFSET and DECL_FIELD_BIT_OFFSET will tell
3474 us the distance between the start of the structure and the start of
3475 the container, though the first is in bytes and the later other in
3476 bits. With this in mind we calculate the bit position of our new
3477 BITFIELD_REF by subtracting the number of bits between the start of
3478 the structure and the container from the number of bits from the start
3479 of the structure and the actual bitfield member. */
3480 tree bf_pos = fold_build2 (MULT_EXPR, bitsizetype,
3481 DECL_FIELD_OFFSET (field_decl),
3482 build_int_cst (bitsizetype, BITS_PER_UNIT));
3483 bf_pos = fold_build2 (PLUS_EXPR, bitsizetype, bf_pos,
3484 DECL_FIELD_BIT_OFFSET (field_decl));
3485 tree rep_pos = fold_build2 (MULT_EXPR, bitsizetype,
3486 DECL_FIELD_OFFSET (rep_decl),
3487 build_int_cst (bitsizetype, BITS_PER_UNIT));
3488 rep_pos = fold_build2 (PLUS_EXPR, bitsizetype, rep_pos,
3489 DECL_FIELD_BIT_OFFSET (rep_decl));
3491 *bitpos = fold_build2 (MINUS_EXPR, bitsizetype, bf_pos, rep_pos);
3494 return rep_decl;
3498 /* Lowers the bitfield described by DATA.
3499 For a write like:
3501 struct.bf = _1;
3503 lower to:
3505 __ifc_1 = struct.<representative>;
3506 __ifc_2 = BIT_INSERT_EXPR (__ifc_1, _1, bitpos);
3507 struct.<representative> = __ifc_2;
3509 For a read:
3511 _1 = struct.bf;
3513 lower to:
3515 __ifc_1 = struct.<representative>;
3516 _1 = BIT_FIELD_REF (__ifc_1, bitsize, bitpos);
3518 where representative is a legal load that contains the bitfield value,
3519 bitsize is the size of the bitfield and bitpos the offset to the start of
3520 the bitfield within the representative. */
3522 static void
3523 lower_bitfield (gassign *stmt, bool write)
3525 tree struct_expr;
3526 tree bitpos;
3527 tree rep_decl = get_bitfield_rep (stmt, write, &bitpos, &struct_expr);
3528 tree rep_type = TREE_TYPE (rep_decl);
3529 tree bf_type = TREE_TYPE (gimple_assign_lhs (stmt));
3531 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3532 if (dump_file && (dump_flags & TDF_DETAILS))
3534 fprintf (dump_file, "Lowering:\n");
3535 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3536 fprintf (dump_file, "to:\n");
3539 /* REP_COMP_REF is a COMPONENT_REF for the representative. NEW_VAL is it's
3540 defining SSA_NAME. */
3541 tree rep_comp_ref = build3 (COMPONENT_REF, rep_type, struct_expr, rep_decl,
3542 NULL_TREE);
3543 tree new_val = ifc_temp_var (rep_type, rep_comp_ref, &gsi);
3545 if (dump_file && (dump_flags & TDF_DETAILS))
3546 print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (new_val), 0, TDF_SLIM);
3548 if (write)
3550 new_val = ifc_temp_var (rep_type,
3551 build3 (BIT_INSERT_EXPR, rep_type, new_val,
3552 unshare_expr (gimple_assign_rhs1 (stmt)),
3553 bitpos), &gsi);
3555 if (dump_file && (dump_flags & TDF_DETAILS))
3556 print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (new_val), 0, TDF_SLIM);
3558 gimple *new_stmt = gimple_build_assign (unshare_expr (rep_comp_ref),
3559 new_val);
3560 gimple_move_vops (new_stmt, stmt);
3561 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
3563 if (dump_file && (dump_flags & TDF_DETAILS))
3564 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
3566 else
3568 tree bfr = build3 (BIT_FIELD_REF, bf_type, new_val,
3569 build_int_cst (bitsizetype, TYPE_PRECISION (bf_type)),
3570 bitpos);
3571 new_val = ifc_temp_var (bf_type, bfr, &gsi);
3573 gimple *new_stmt = gimple_build_assign (gimple_assign_lhs (stmt),
3574 new_val);
3575 gimple_move_vops (new_stmt, stmt);
3576 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
3578 if (dump_file && (dump_flags & TDF_DETAILS))
3579 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
3582 gsi_remove (&gsi, true);
3585 /* Return TRUE if there are bitfields to lower in this LOOP. Fill TO_LOWER
3586 with data structures representing these bitfields. */
3588 static bool
3589 bitfields_to_lower_p (class loop *loop,
3590 vec <gassign *> &reads_to_lower,
3591 vec <gassign *> &writes_to_lower)
3593 gimple_stmt_iterator gsi;
3595 if (dump_file && (dump_flags & TDF_DETAILS))
3597 fprintf (dump_file, "Analyzing loop %d for bitfields:\n", loop->num);
3600 for (unsigned i = 0; i < loop->num_nodes; ++i)
3602 basic_block bb = ifc_bbs[i];
3603 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3605 gassign *stmt = dyn_cast<gassign*> (gsi_stmt (gsi));
3606 if (!stmt)
3607 continue;
3609 tree op = gimple_assign_lhs (stmt);
3610 bool write = TREE_CODE (op) == COMPONENT_REF;
3612 if (!write)
3613 op = gimple_assign_rhs1 (stmt);
3615 if (TREE_CODE (op) != COMPONENT_REF)
3616 continue;
3618 if (DECL_BIT_FIELD_TYPE (TREE_OPERAND (op, 1)))
3620 if (dump_file && (dump_flags & TDF_DETAILS))
3621 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3623 if (!INTEGRAL_TYPE_P (TREE_TYPE (op)))
3625 if (dump_file && (dump_flags & TDF_DETAILS))
3626 fprintf (dump_file, "\t Bitfield NO OK to lower,"
3627 " field type is not Integral.\n");
3628 return false;
3631 if (!get_bitfield_rep (stmt, write, NULL, NULL))
3633 if (dump_file && (dump_flags & TDF_DETAILS))
3634 fprintf (dump_file, "\t Bitfield NOT OK to lower,"
3635 " representative is BLKmode.\n");
3636 return false;
3639 if (dump_file && (dump_flags & TDF_DETAILS))
3640 fprintf (dump_file, "\tBitfield OK to lower.\n");
3641 if (write)
3642 writes_to_lower.safe_push (stmt);
3643 else
3644 reads_to_lower.safe_push (stmt);
3648 return !reads_to_lower.is_empty () || !writes_to_lower.is_empty ();
3652 /* If-convert LOOP when it is legal. For the moment this pass has no
3653 profitability analysis. Returns non-zero todo flags when something
3654 changed. */
3656 unsigned int
3657 tree_if_conversion (class loop *loop, vec<gimple *> *preds)
3659 unsigned int todo = 0;
3660 bool aggressive_if_conv;
3661 class loop *rloop;
3662 auto_vec <gassign *, 4> reads_to_lower;
3663 auto_vec <gassign *, 4> writes_to_lower;
3664 bitmap exit_bbs;
3665 edge pe;
3666 auto_vec<data_reference_p, 10> refs;
3668 again:
3669 rloop = NULL;
3670 ifc_bbs = NULL;
3671 need_to_lower_bitfields = false;
3672 need_to_ifcvt = false;
3673 need_to_predicate = false;
3674 need_to_rewrite_undefined = false;
3675 any_complicated_phi = false;
3677 /* Apply more aggressive if-conversion when loop or its outer loop were
3678 marked with simd pragma. When that's the case, we try to if-convert
3679 loop containing PHIs with more than MAX_PHI_ARG_NUM arguments. */
3680 aggressive_if_conv = loop->force_vectorize;
3681 if (!aggressive_if_conv)
3683 class loop *outer_loop = loop_outer (loop);
3684 if (outer_loop && outer_loop->force_vectorize)
3685 aggressive_if_conv = true;
3688 if (!single_exit (loop))
3689 goto cleanup;
3691 /* If there are more than two BBs in the loop then there is at least one if
3692 to convert. */
3693 if (loop->num_nodes > 2
3694 && !ifcvt_split_critical_edges (loop, aggressive_if_conv))
3695 goto cleanup;
3697 ifc_bbs = get_loop_body_in_if_conv_order (loop);
3698 if (!ifc_bbs)
3700 if (dump_file && (dump_flags & TDF_DETAILS))
3701 fprintf (dump_file, "Irreducible loop\n");
3702 goto cleanup;
3705 if (find_data_references_in_loop (loop, &refs) == chrec_dont_know)
3706 goto cleanup;
3708 if (loop->num_nodes > 2)
3710 need_to_ifcvt = true;
3712 if (!if_convertible_loop_p (loop, &refs) || !dbg_cnt (if_conversion_tree))
3713 goto cleanup;
3715 if ((need_to_predicate || any_complicated_phi)
3716 && ((!flag_tree_loop_vectorize && !loop->force_vectorize)
3717 || loop->dont_vectorize))
3718 goto cleanup;
3721 if ((flag_tree_loop_vectorize || loop->force_vectorize)
3722 && !loop->dont_vectorize)
3723 need_to_lower_bitfields = bitfields_to_lower_p (loop, reads_to_lower,
3724 writes_to_lower);
3726 if (!need_to_ifcvt && !need_to_lower_bitfields)
3727 goto cleanup;
3729 /* The edge to insert invariant stmts on. */
3730 pe = loop_preheader_edge (loop);
3732 /* Since we have no cost model, always version loops unless the user
3733 specified -ftree-loop-if-convert or unless versioning is required.
3734 Either version this loop, or if the pattern is right for outer-loop
3735 vectorization, version the outer loop. In the latter case we will
3736 still if-convert the original inner loop. */
3737 if (need_to_lower_bitfields
3738 || need_to_predicate
3739 || any_complicated_phi
3740 || flag_tree_loop_if_convert != 1)
3742 class loop *vloop
3743 = (versionable_outer_loop_p (loop_outer (loop))
3744 ? loop_outer (loop) : loop);
3745 class loop *nloop = version_loop_for_if_conversion (vloop, preds);
3746 if (nloop == NULL)
3747 goto cleanup;
3748 if (vloop != loop)
3750 /* If versionable_outer_loop_p decided to version the
3751 outer loop, version also the inner loop of the non-vectorized
3752 loop copy. So we transform:
3753 loop1
3754 loop2
3755 into:
3756 if (LOOP_VECTORIZED (1, 3))
3758 loop1
3759 loop2
3761 else
3762 loop3 (copy of loop1)
3763 if (LOOP_VECTORIZED (4, 5))
3764 loop4 (copy of loop2)
3765 else
3766 loop5 (copy of loop4) */
3767 gcc_assert (nloop->inner && nloop->inner->next == NULL);
3768 rloop = nloop->inner;
3770 else
3771 /* If we versioned loop then make sure to insert invariant
3772 stmts before the .LOOP_VECTORIZED check since the vectorizer
3773 will re-use that for things like runtime alias versioning
3774 whose condition can end up using those invariants. */
3775 pe = single_pred_edge (gimple_bb (preds->last ()));
3778 if (need_to_lower_bitfields)
3780 if (dump_file && (dump_flags & TDF_DETAILS))
3782 fprintf (dump_file, "-------------------------\n");
3783 fprintf (dump_file, "Start lowering bitfields\n");
3785 while (!reads_to_lower.is_empty ())
3786 lower_bitfield (reads_to_lower.pop (), false);
3787 while (!writes_to_lower.is_empty ())
3788 lower_bitfield (writes_to_lower.pop (), true);
3790 if (dump_file && (dump_flags & TDF_DETAILS))
3792 fprintf (dump_file, "Done lowering bitfields\n");
3793 fprintf (dump_file, "-------------------------\n");
3796 if (need_to_ifcvt)
3798 /* Now all statements are if-convertible. Combine all the basic
3799 blocks into one huge basic block doing the if-conversion
3800 on-the-fly. */
3801 combine_blocks (loop);
3804 /* Perform local CSE, this esp. helps the vectorizer analysis if loads
3805 and stores are involved. CSE only the loop body, not the entry
3806 PHIs, those are to be kept in sync with the non-if-converted copy.
3807 ??? We'll still keep dead stores though. */
3808 exit_bbs = BITMAP_ALLOC (NULL);
3809 bitmap_set_bit (exit_bbs, single_exit (loop)->dest->index);
3810 bitmap_set_bit (exit_bbs, loop->latch->index);
3812 std::pair <tree, tree> *name_pair;
3813 unsigned ssa_names_idx;
3814 FOR_EACH_VEC_ELT (redundant_ssa_names, ssa_names_idx, name_pair)
3815 replace_uses_by (name_pair->first, name_pair->second);
3816 redundant_ssa_names.release ();
3818 todo |= do_rpo_vn (cfun, loop_preheader_edge (loop), exit_bbs);
3820 /* Delete dead predicate computations. */
3821 ifcvt_local_dce (loop);
3822 BITMAP_FREE (exit_bbs);
3824 ifcvt_hoist_invariants (loop, pe);
3826 todo |= TODO_cleanup_cfg;
3828 cleanup:
3829 data_reference_p dr;
3830 unsigned int i;
3831 for (i = 0; refs.iterate (i, &dr); i++)
3833 free (dr->aux);
3834 free_data_ref (dr);
3836 refs.truncate (0);
3838 if (ifc_bbs)
3840 unsigned int i;
3842 for (i = 0; i < loop->num_nodes; i++)
3843 free_bb_predicate (ifc_bbs[i]);
3845 free (ifc_bbs);
3846 ifc_bbs = NULL;
3848 if (rloop != NULL)
3850 loop = rloop;
3851 reads_to_lower.truncate (0);
3852 writes_to_lower.truncate (0);
3853 goto again;
3856 return todo;
3859 /* Tree if-conversion pass management. */
3861 namespace {
3863 const pass_data pass_data_if_conversion =
3865 GIMPLE_PASS, /* type */
3866 "ifcvt", /* name */
3867 OPTGROUP_NONE, /* optinfo_flags */
3868 TV_TREE_LOOP_IFCVT, /* tv_id */
3869 ( PROP_cfg | PROP_ssa ), /* properties_required */
3870 0, /* properties_provided */
3871 0, /* properties_destroyed */
3872 0, /* todo_flags_start */
3873 0, /* todo_flags_finish */
3876 class pass_if_conversion : public gimple_opt_pass
3878 public:
3879 pass_if_conversion (gcc::context *ctxt)
3880 : gimple_opt_pass (pass_data_if_conversion, ctxt)
3883 /* opt_pass methods: */
3884 bool gate (function *) final override;
3885 unsigned int execute (function *) final override;
3887 }; // class pass_if_conversion
3889 bool
3890 pass_if_conversion::gate (function *fun)
3892 return (((flag_tree_loop_vectorize || fun->has_force_vectorize_loops)
3893 && flag_tree_loop_if_convert != 0)
3894 || flag_tree_loop_if_convert == 1);
3897 unsigned int
3898 pass_if_conversion::execute (function *fun)
3900 unsigned todo = 0;
3902 if (number_of_loops (fun) <= 1)
3903 return 0;
3905 auto_vec<gimple *> preds;
3906 for (auto loop : loops_list (cfun, 0))
3907 if (flag_tree_loop_if_convert == 1
3908 || ((flag_tree_loop_vectorize || loop->force_vectorize)
3909 && !loop->dont_vectorize))
3910 todo |= tree_if_conversion (loop, &preds);
3912 if (todo)
3914 free_numbers_of_iterations_estimates (fun);
3915 scev_reset ();
3918 if (flag_checking)
3920 basic_block bb;
3921 FOR_EACH_BB_FN (bb, fun)
3922 gcc_assert (!bb->aux);
3925 /* Perform IL update now, it might elide some loops. */
3926 if (todo & TODO_cleanup_cfg)
3928 cleanup_tree_cfg ();
3929 if (need_ssa_update_p (fun))
3930 todo |= TODO_update_ssa;
3932 if (todo & TODO_update_ssa_any)
3933 update_ssa (todo & TODO_update_ssa_any);
3935 /* If if-conversion elided the loop fall back to the original one. */
3936 for (unsigned i = 0; i < preds.length (); ++i)
3938 gimple *g = preds[i];
3939 if (!gimple_bb (g))
3940 continue;
3941 unsigned ifcvt_loop = tree_to_uhwi (gimple_call_arg (g, 0));
3942 unsigned orig_loop = tree_to_uhwi (gimple_call_arg (g, 1));
3943 if (!get_loop (fun, ifcvt_loop) || !get_loop (fun, orig_loop))
3945 if (dump_file)
3946 fprintf (dump_file, "If-converted loop vanished\n");
3947 fold_loop_internal_call (g, boolean_false_node);
3951 return 0;
3954 } // anon namespace
3956 gimple_opt_pass *
3957 make_pass_if_conversion (gcc::context *ctxt)
3959 return new pass_if_conversion (ctxt);