rust: build failure after NON_DEPENDENT_EXPR removal [PR111899]
[official-gcc.git] / gcc / tree-if-conv.cc
blob262765139ff3a8b8d3e41300f463a6921608d1a6
1 /* If-conversion for vectorizer.
2 Copyright (C) 2004-2023 Free Software Foundation, Inc.
3 Contributed by Devang Patel <dpatel@apple.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass implements a tree level if-conversion of loops. Its
22 initial goal is to help the vectorizer to vectorize loops with
23 conditions.
25 A short description of if-conversion:
27 o Decide if a loop is if-convertible or not.
28 o Walk all loop basic blocks in breadth first order (BFS order).
29 o Remove conditional statements (at the end of basic block)
30 and propagate condition into destination basic blocks'
31 predicate list.
32 o Replace modify expression with conditional modify expression
33 using current basic block's condition.
34 o Merge all basic blocks
35 o Replace phi nodes with conditional modify expr
36 o Merge all basic blocks into header
38 Sample transformation:
40 INPUT
41 -----
43 # i_23 = PHI <0(0), i_18(10)>;
44 <L0>:;
45 j_15 = A[i_23];
46 if (j_15 > 41) goto <L1>; else goto <L17>;
48 <L17>:;
49 goto <bb 3> (<L3>);
51 <L1>:;
53 # iftmp.2_4 = PHI <0(8), 42(2)>;
54 <L3>:;
55 A[i_23] = iftmp.2_4;
56 i_18 = i_23 + 1;
57 if (i_18 <= 15) goto <L19>; else goto <L18>;
59 <L19>:;
60 goto <bb 1> (<L0>);
62 <L18>:;
64 OUTPUT
65 ------
67 # i_23 = PHI <0(0), i_18(10)>;
68 <L0>:;
69 j_15 = A[i_23];
71 <L3>:;
72 iftmp.2_4 = j_15 > 41 ? 42 : 0;
73 A[i_23] = iftmp.2_4;
74 i_18 = i_23 + 1;
75 if (i_18 <= 15) goto <L19>; else goto <L18>;
77 <L19>:;
78 goto <bb 1> (<L0>);
80 <L18>:;
83 #include "config.h"
84 #include "system.h"
85 #include "coretypes.h"
86 #include "backend.h"
87 #include "rtl.h"
88 #include "tree.h"
89 #include "gimple.h"
90 #include "cfghooks.h"
91 #include "tree-pass.h"
92 #include "ssa.h"
93 #include "expmed.h"
94 #include "expr.h"
95 #include "optabs-tree.h"
96 #include "gimple-pretty-print.h"
97 #include "alias.h"
98 #include "fold-const.h"
99 #include "stor-layout.h"
100 #include "gimple-iterator.h"
101 #include "gimple-fold.h"
102 #include "gimplify.h"
103 #include "gimplify-me.h"
104 #include "tree-cfg.h"
105 #include "tree-into-ssa.h"
106 #include "tree-ssa.h"
107 #include "cfgloop.h"
108 #include "tree-data-ref.h"
109 #include "tree-scalar-evolution.h"
110 #include "tree-ssa-loop.h"
111 #include "tree-ssa-loop-niter.h"
112 #include "tree-ssa-loop-ivopts.h"
113 #include "tree-ssa-address.h"
114 #include "dbgcnt.h"
115 #include "tree-hash-traits.h"
116 #include "varasm.h"
117 #include "builtins.h"
118 #include "cfganal.h"
119 #include "internal-fn.h"
120 #include "fold-const.h"
121 #include "tree-ssa-sccvn.h"
122 #include "tree-cfgcleanup.h"
123 #include "tree-ssa-dse.h"
124 #include "tree-vectorizer.h"
125 #include "tree-eh.h"
126 #include "cgraph.h"
128 /* For lang_hooks.types.type_for_mode. */
129 #include "langhooks.h"
131 /* Only handle PHIs with no more arguments unless we are asked to by
132 simd pragma. */
133 #define MAX_PHI_ARG_NUM \
134 ((unsigned) param_max_tree_if_conversion_phi_args)
136 /* True if we've converted a statement that was only executed when some
137 condition C was true, and if for correctness we need to predicate the
138 statement to ensure that it is a no-op when C is false. See
139 predicate_statements for the kinds of predication we support. */
140 static bool need_to_predicate;
142 /* True if we have to rewrite stmts that may invoke undefined behavior
143 when a condition C was false so it doesn't if it is always executed.
144 See predicate_statements for the kinds of predication we support. */
145 static bool need_to_rewrite_undefined;
147 /* Indicate if there are any complicated PHIs that need to be handled in
148 if-conversion. Complicated PHI has more than two arguments and can't
149 be degenerated to two arguments PHI. See more information in comment
150 before phi_convertible_by_degenerating_args. */
151 static bool any_complicated_phi;
153 /* True if we have bitfield accesses we can lower. */
154 static bool need_to_lower_bitfields;
156 /* True if there is any ifcvting to be done. */
157 static bool need_to_ifcvt;
159 /* Hash for struct innermost_loop_behavior. It depends on the user to
160 free the memory. */
162 struct innermost_loop_behavior_hash : nofree_ptr_hash <innermost_loop_behavior>
164 static inline hashval_t hash (const value_type &);
165 static inline bool equal (const value_type &,
166 const compare_type &);
169 inline hashval_t
170 innermost_loop_behavior_hash::hash (const value_type &e)
172 hashval_t hash;
174 hash = iterative_hash_expr (e->base_address, 0);
175 hash = iterative_hash_expr (e->offset, hash);
176 hash = iterative_hash_expr (e->init, hash);
177 return iterative_hash_expr (e->step, hash);
180 inline bool
181 innermost_loop_behavior_hash::equal (const value_type &e1,
182 const compare_type &e2)
184 if ((e1->base_address && !e2->base_address)
185 || (!e1->base_address && e2->base_address)
186 || (!e1->offset && e2->offset)
187 || (e1->offset && !e2->offset)
188 || (!e1->init && e2->init)
189 || (e1->init && !e2->init)
190 || (!e1->step && e2->step)
191 || (e1->step && !e2->step))
192 return false;
194 if (e1->base_address && e2->base_address
195 && !operand_equal_p (e1->base_address, e2->base_address, 0))
196 return false;
197 if (e1->offset && e2->offset
198 && !operand_equal_p (e1->offset, e2->offset, 0))
199 return false;
200 if (e1->init && e2->init
201 && !operand_equal_p (e1->init, e2->init, 0))
202 return false;
203 if (e1->step && e2->step
204 && !operand_equal_p (e1->step, e2->step, 0))
205 return false;
207 return true;
210 /* List of basic blocks in if-conversion-suitable order. */
211 static basic_block *ifc_bbs;
213 /* Hash table to store <DR's innermost loop behavior, DR> pairs. */
214 static hash_map<innermost_loop_behavior_hash,
215 data_reference_p> *innermost_DR_map;
217 /* Hash table to store <base reference, DR> pairs. */
218 static hash_map<tree_operand_hash, data_reference_p> *baseref_DR_map;
220 /* List of redundant SSA names: the first should be replaced by the second. */
221 static vec< std::pair<tree, tree> > redundant_ssa_names;
223 /* Structure used to predicate basic blocks. This is attached to the
224 ->aux field of the BBs in the loop to be if-converted. */
225 struct bb_predicate {
227 /* The condition under which this basic block is executed. */
228 tree predicate;
230 /* PREDICATE is gimplified, and the sequence of statements is
231 recorded here, in order to avoid the duplication of computations
232 that occur in previous conditions. See PR44483. */
233 gimple_seq predicate_gimplified_stmts;
235 /* Records the number of statements recorded into
236 PREDICATE_GIMPLIFIED_STMTS. */
237 unsigned no_predicate_stmts;
240 /* Returns true when the basic block BB has a predicate. */
242 static inline bool
243 bb_has_predicate (basic_block bb)
245 return bb->aux != NULL;
248 /* Returns the gimplified predicate for basic block BB. */
250 static inline tree
251 bb_predicate (basic_block bb)
253 return ((struct bb_predicate *) bb->aux)->predicate;
256 /* Sets the gimplified predicate COND for basic block BB. */
258 static inline void
259 set_bb_predicate (basic_block bb, tree cond)
261 auto aux = (struct bb_predicate *) bb->aux;
262 gcc_assert ((TREE_CODE (cond) == TRUTH_NOT_EXPR
263 && is_gimple_val (TREE_OPERAND (cond, 0)))
264 || is_gimple_val (cond));
265 aux->predicate = cond;
266 aux->no_predicate_stmts++;
268 if (dump_file && (dump_flags & TDF_DETAILS))
269 fprintf (dump_file, "Recording block %d value %d\n", bb->index,
270 aux->no_predicate_stmts);
273 /* Returns the sequence of statements of the gimplification of the
274 predicate for basic block BB. */
276 static inline gimple_seq
277 bb_predicate_gimplified_stmts (basic_block bb)
279 return ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts;
282 /* Sets the sequence of statements STMTS of the gimplification of the
283 predicate for basic block BB. If PRESERVE_COUNTS then don't clear the predicate
284 counts. */
286 static inline void
287 set_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts,
288 bool preserve_counts)
290 ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts = stmts;
291 if (stmts == NULL && !preserve_counts)
292 ((struct bb_predicate *) bb->aux)->no_predicate_stmts = 0;
295 /* Adds the sequence of statements STMTS to the sequence of statements
296 of the predicate for basic block BB. */
298 static inline void
299 add_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
301 /* We might have updated some stmts in STMTS via force_gimple_operand
302 calling fold_stmt and that producing multiple stmts. Delink immediate
303 uses so update_ssa after loop versioning doesn't get confused for
304 the not yet inserted predicates.
305 ??? This should go away once we reliably avoid updating stmts
306 not in any BB. */
307 for (gimple_stmt_iterator gsi = gsi_start (stmts);
308 !gsi_end_p (gsi); gsi_next (&gsi))
310 gimple *stmt = gsi_stmt (gsi);
311 delink_stmt_imm_use (stmt);
312 gimple_set_modified (stmt, true);
313 ((struct bb_predicate *) bb->aux)->no_predicate_stmts++;
315 gimple_seq_add_seq_without_update
316 (&(((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts), stmts);
319 /* Return the number of statements the predicate of the basic block consists
320 of. */
322 static inline unsigned
323 get_bb_num_predicate_stmts (basic_block bb)
325 return ((struct bb_predicate *) bb->aux)->no_predicate_stmts;
328 /* Initializes to TRUE the predicate of basic block BB. */
330 static inline void
331 init_bb_predicate (basic_block bb)
333 bb->aux = XNEW (struct bb_predicate);
334 set_bb_predicate_gimplified_stmts (bb, NULL, false);
335 set_bb_predicate (bb, boolean_true_node);
338 /* Release the SSA_NAMEs associated with the predicate of basic block BB. */
340 static inline void
341 release_bb_predicate (basic_block bb)
343 gimple_seq stmts = bb_predicate_gimplified_stmts (bb);
344 if (stmts)
346 /* Ensure that these stmts haven't yet been added to a bb. */
347 if (flag_checking)
348 for (gimple_stmt_iterator i = gsi_start (stmts);
349 !gsi_end_p (i); gsi_next (&i))
350 gcc_assert (! gimple_bb (gsi_stmt (i)));
352 /* Discard them. */
353 gimple_seq_discard (stmts);
354 set_bb_predicate_gimplified_stmts (bb, NULL, false);
358 /* Free the predicate of basic block BB. */
360 static inline void
361 free_bb_predicate (basic_block bb)
363 if (!bb_has_predicate (bb))
364 return;
366 release_bb_predicate (bb);
367 free (bb->aux);
368 bb->aux = NULL;
371 /* Reinitialize predicate of BB with the true predicate. */
373 static inline void
374 reset_bb_predicate (basic_block bb)
376 if (!bb_has_predicate (bb))
377 init_bb_predicate (bb);
378 else
380 release_bb_predicate (bb);
381 set_bb_predicate (bb, boolean_true_node);
385 /* Returns a new SSA_NAME of type TYPE that is assigned the value of
386 the expression EXPR. Inserts the statement created for this
387 computation before GSI and leaves the iterator GSI at the same
388 statement. */
390 static tree
391 ifc_temp_var (tree type, tree expr, gimple_stmt_iterator *gsi)
393 tree new_name = make_temp_ssa_name (type, NULL, "_ifc_");
394 gimple *stmt = gimple_build_assign (new_name, expr);
395 gimple_set_vuse (stmt, gimple_vuse (gsi_stmt (*gsi)));
396 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
397 return new_name;
400 /* Return true when COND is a false predicate. */
402 static inline bool
403 is_false_predicate (tree cond)
405 return (cond != NULL_TREE
406 && (cond == boolean_false_node
407 || integer_zerop (cond)));
410 /* Return true when COND is a true predicate. */
412 static inline bool
413 is_true_predicate (tree cond)
415 return (cond == NULL_TREE
416 || cond == boolean_true_node
417 || integer_onep (cond));
420 /* Returns true when BB has a predicate that is not trivial: true or
421 NULL_TREE. */
423 static inline bool
424 is_predicated (basic_block bb)
426 return !is_true_predicate (bb_predicate (bb));
429 /* Parses the predicate COND and returns its comparison code and
430 operands OP0 and OP1. */
432 static enum tree_code
433 parse_predicate (tree cond, tree *op0, tree *op1)
435 gimple *s;
437 if (TREE_CODE (cond) == SSA_NAME
438 && is_gimple_assign (s = SSA_NAME_DEF_STMT (cond)))
440 if (TREE_CODE_CLASS (gimple_assign_rhs_code (s)) == tcc_comparison)
442 *op0 = gimple_assign_rhs1 (s);
443 *op1 = gimple_assign_rhs2 (s);
444 return gimple_assign_rhs_code (s);
447 else if (gimple_assign_rhs_code (s) == TRUTH_NOT_EXPR)
449 tree op = gimple_assign_rhs1 (s);
450 tree type = TREE_TYPE (op);
451 enum tree_code code = parse_predicate (op, op0, op1);
453 return code == ERROR_MARK ? ERROR_MARK
454 : invert_tree_comparison (code, HONOR_NANS (type));
457 return ERROR_MARK;
460 if (COMPARISON_CLASS_P (cond))
462 *op0 = TREE_OPERAND (cond, 0);
463 *op1 = TREE_OPERAND (cond, 1);
464 return TREE_CODE (cond);
467 return ERROR_MARK;
470 /* Returns the fold of predicate C1 OR C2 at location LOC. */
472 static tree
473 fold_or_predicates (location_t loc, tree c1, tree c2)
475 tree op1a, op1b, op2a, op2b;
476 enum tree_code code1 = parse_predicate (c1, &op1a, &op1b);
477 enum tree_code code2 = parse_predicate (c2, &op2a, &op2b);
479 if (code1 != ERROR_MARK && code2 != ERROR_MARK)
481 tree t = maybe_fold_or_comparisons (boolean_type_node, code1, op1a, op1b,
482 code2, op2a, op2b);
483 if (t)
484 return t;
487 return fold_build2_loc (loc, TRUTH_OR_EXPR, boolean_type_node, c1, c2);
490 /* Returns either a COND_EXPR or the folded expression if the folded
491 expression is a MIN_EXPR, a MAX_EXPR, an ABS_EXPR,
492 a constant or a SSA_NAME. */
494 static tree
495 fold_build_cond_expr (tree type, tree cond, tree rhs, tree lhs)
497 /* If COND is comparison r != 0 and r has boolean type, convert COND
498 to SSA_NAME to accept by vect bool pattern. */
499 if (TREE_CODE (cond) == NE_EXPR)
501 tree op0 = TREE_OPERAND (cond, 0);
502 tree op1 = TREE_OPERAND (cond, 1);
503 if (TREE_CODE (op0) == SSA_NAME
504 && TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE
505 && (integer_zerop (op1)))
506 cond = op0;
509 gimple_match_op cexpr (gimple_match_cond::UNCOND, COND_EXPR,
510 type, cond, rhs, lhs);
511 if (cexpr.resimplify (NULL, follow_all_ssa_edges))
513 if (gimple_simplified_result_is_gimple_val (&cexpr))
514 return cexpr.ops[0];
515 else if (cexpr.code == ABS_EXPR)
516 return build1 (ABS_EXPR, type, cexpr.ops[0]);
517 else if (cexpr.code == MIN_EXPR
518 || cexpr.code == MAX_EXPR)
519 return build2 ((tree_code)cexpr.code, type, cexpr.ops[0], cexpr.ops[1]);
522 return build3 (COND_EXPR, type, cond, rhs, lhs);
525 /* Add condition NC to the predicate list of basic block BB. LOOP is
526 the loop to be if-converted. Use predicate of cd-equivalent block
527 for join bb if it exists: we call basic blocks bb1 and bb2
528 cd-equivalent if they are executed under the same condition. */
530 static inline void
531 add_to_predicate_list (class loop *loop, basic_block bb, tree nc)
533 tree bc, *tp;
534 basic_block dom_bb;
536 if (is_true_predicate (nc))
537 return;
539 /* If dominance tells us this basic block is always executed,
540 don't record any predicates for it. */
541 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
542 return;
544 dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb);
545 /* We use notion of cd equivalence to get simpler predicate for
546 join block, e.g. if join block has 2 predecessors with predicates
547 p1 & p2 and p1 & !p2, we'd like to get p1 for it instead of
548 p1 & p2 | p1 & !p2. */
549 if (dom_bb != loop->header
550 && get_immediate_dominator (CDI_POST_DOMINATORS, dom_bb) == bb)
552 gcc_assert (flow_bb_inside_loop_p (loop, dom_bb));
553 bc = bb_predicate (dom_bb);
554 if (!is_true_predicate (bc))
555 set_bb_predicate (bb, bc);
556 else
557 gcc_assert (is_true_predicate (bb_predicate (bb)));
558 if (dump_file && (dump_flags & TDF_DETAILS))
559 fprintf (dump_file, "Use predicate of bb#%d for bb#%d\n",
560 dom_bb->index, bb->index);
561 return;
564 if (!is_predicated (bb))
565 bc = nc;
566 else
568 bc = bb_predicate (bb);
569 bc = fold_or_predicates (EXPR_LOCATION (bc), nc, bc);
570 if (is_true_predicate (bc))
572 reset_bb_predicate (bb);
573 return;
577 /* Allow a TRUTH_NOT_EXPR around the main predicate. */
578 if (TREE_CODE (bc) == TRUTH_NOT_EXPR)
579 tp = &TREE_OPERAND (bc, 0);
580 else
581 tp = &bc;
582 if (!is_gimple_val (*tp))
584 gimple_seq stmts;
585 *tp = force_gimple_operand (*tp, &stmts, true, NULL_TREE);
586 add_bb_predicate_gimplified_stmts (bb, stmts);
588 set_bb_predicate (bb, bc);
591 /* Add the condition COND to the previous condition PREV_COND, and add
592 this to the predicate list of the destination of edge E. LOOP is
593 the loop to be if-converted. */
595 static void
596 add_to_dst_predicate_list (class loop *loop, edge e,
597 tree prev_cond, tree cond)
599 if (!flow_bb_inside_loop_p (loop, e->dest))
600 return;
602 if (!is_true_predicate (prev_cond))
603 cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
604 prev_cond, cond);
606 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, e->dest))
607 add_to_predicate_list (loop, e->dest, cond);
610 /* Return true if one of the successor edges of BB exits LOOP. */
612 static bool
613 bb_with_exit_edge_p (const class loop *loop, basic_block bb)
615 edge e;
616 edge_iterator ei;
618 FOR_EACH_EDGE (e, ei, bb->succs)
619 if (loop_exit_edge_p (loop, e))
620 return true;
622 return false;
625 /* Given PHI which has more than two arguments, this function checks if
626 it's if-convertible by degenerating its arguments. Specifically, if
627 below two conditions are satisfied:
629 1) Number of PHI arguments with different values equals to 2 and one
630 argument has the only occurrence.
631 2) The edge corresponding to the unique argument isn't critical edge.
633 Such PHI can be handled as PHIs have only two arguments. For example,
634 below PHI:
636 res = PHI <A_1(e1), A_1(e2), A_2(e3)>;
638 can be transformed into:
640 res = (predicate of e3) ? A_2 : A_1;
642 Return TRUE if it is the case, FALSE otherwise. */
644 static bool
645 phi_convertible_by_degenerating_args (gphi *phi)
647 edge e;
648 tree arg, t1 = NULL, t2 = NULL;
649 unsigned int i, i1 = 0, i2 = 0, n1 = 0, n2 = 0;
650 unsigned int num_args = gimple_phi_num_args (phi);
652 gcc_assert (num_args > 2);
654 for (i = 0; i < num_args; i++)
656 arg = gimple_phi_arg_def (phi, i);
657 if (t1 == NULL || operand_equal_p (t1, arg, 0))
659 n1++;
660 i1 = i;
661 t1 = arg;
663 else if (t2 == NULL || operand_equal_p (t2, arg, 0))
665 n2++;
666 i2 = i;
667 t2 = arg;
669 else
670 return false;
673 if (n1 != 1 && n2 != 1)
674 return false;
676 /* Check if the edge corresponding to the unique arg is critical. */
677 e = gimple_phi_arg_edge (phi, (n1 == 1) ? i1 : i2);
678 if (EDGE_COUNT (e->src->succs) > 1)
679 return false;
681 return true;
684 /* Return true when PHI is if-convertible. PHI is part of loop LOOP
685 and it belongs to basic block BB. Note at this point, it is sure
686 that PHI is if-convertible. This function updates global variable
687 ANY_COMPLICATED_PHI if PHI is complicated. */
689 static bool
690 if_convertible_phi_p (class loop *loop, basic_block bb, gphi *phi)
692 if (dump_file && (dump_flags & TDF_DETAILS))
694 fprintf (dump_file, "-------------------------\n");
695 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
698 if (bb != loop->header
699 && gimple_phi_num_args (phi) > 2
700 && !phi_convertible_by_degenerating_args (phi))
701 any_complicated_phi = true;
703 return true;
706 /* Records the status of a data reference. This struct is attached to
707 each DR->aux field. */
709 struct ifc_dr {
710 bool rw_unconditionally;
711 bool w_unconditionally;
712 bool written_at_least_once;
714 tree rw_predicate;
715 tree w_predicate;
716 tree base_w_predicate;
719 #define IFC_DR(DR) ((struct ifc_dr *) (DR)->aux)
720 #define DR_BASE_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->written_at_least_once)
721 #define DR_RW_UNCONDITIONALLY(DR) (IFC_DR (DR)->rw_unconditionally)
722 #define DR_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->w_unconditionally)
724 /* Iterates over DR's and stores refs, DR and base refs, DR pairs in
725 HASH tables. While storing them in HASH table, it checks if the
726 reference is unconditionally read or written and stores that as a flag
727 information. For base reference it checks if it is written atlest once
728 unconditionally and stores it as flag information along with DR.
729 In other words for every data reference A in STMT there exist other
730 accesses to a data reference with the same base with predicates that
731 add up (OR-up) to the true predicate: this ensures that the data
732 reference A is touched (read or written) on every iteration of the
733 if-converted loop. */
734 static void
735 hash_memrefs_baserefs_and_store_DRs_read_written_info (data_reference_p a)
738 data_reference_p *master_dr, *base_master_dr;
739 tree base_ref = DR_BASE_OBJECT (a);
740 innermost_loop_behavior *innermost = &DR_INNERMOST (a);
741 tree ca = bb_predicate (gimple_bb (DR_STMT (a)));
742 bool exist1, exist2;
744 master_dr = &innermost_DR_map->get_or_insert (innermost, &exist1);
745 if (!exist1)
746 *master_dr = a;
748 if (DR_IS_WRITE (a))
750 IFC_DR (*master_dr)->w_predicate
751 = fold_or_predicates (UNKNOWN_LOCATION, ca,
752 IFC_DR (*master_dr)->w_predicate);
753 if (is_true_predicate (IFC_DR (*master_dr)->w_predicate))
754 DR_W_UNCONDITIONALLY (*master_dr) = true;
756 IFC_DR (*master_dr)->rw_predicate
757 = fold_or_predicates (UNKNOWN_LOCATION, ca,
758 IFC_DR (*master_dr)->rw_predicate);
759 if (is_true_predicate (IFC_DR (*master_dr)->rw_predicate))
760 DR_RW_UNCONDITIONALLY (*master_dr) = true;
762 if (DR_IS_WRITE (a))
764 base_master_dr = &baseref_DR_map->get_or_insert (base_ref, &exist2);
765 if (!exist2)
766 *base_master_dr = a;
767 IFC_DR (*base_master_dr)->base_w_predicate
768 = fold_or_predicates (UNKNOWN_LOCATION, ca,
769 IFC_DR (*base_master_dr)->base_w_predicate);
770 if (is_true_predicate (IFC_DR (*base_master_dr)->base_w_predicate))
771 DR_BASE_W_UNCONDITIONALLY (*base_master_dr) = true;
775 /* Return TRUE if can prove the index IDX of an array reference REF is
776 within array bound. Return false otherwise. */
778 static bool
779 idx_within_array_bound (tree ref, tree *idx, void *dta)
781 wi::overflow_type overflow;
782 widest_int niter, valid_niter, delta, wi_step;
783 tree ev, init, step;
784 tree low, high;
785 class loop *loop = (class loop*) dta;
787 /* Only support within-bound access for array references. */
788 if (TREE_CODE (ref) != ARRAY_REF)
789 return false;
791 /* For arrays that might have flexible sizes, it is not guaranteed that they
792 do not extend over their declared size. */
793 if (array_ref_flexible_size_p (ref))
794 return false;
796 ev = analyze_scalar_evolution (loop, *idx);
797 ev = instantiate_parameters (loop, ev);
798 init = initial_condition (ev);
799 step = evolution_part_in_loop_num (ev, loop->num);
801 if (!init || TREE_CODE (init) != INTEGER_CST
802 || (step && TREE_CODE (step) != INTEGER_CST))
803 return false;
805 low = array_ref_low_bound (ref);
806 high = array_ref_up_bound (ref);
808 /* The case of nonconstant bounds could be handled, but it would be
809 complicated. */
810 if (TREE_CODE (low) != INTEGER_CST
811 || !high || TREE_CODE (high) != INTEGER_CST)
812 return false;
814 /* Check if the intial idx is within bound. */
815 if (wi::to_widest (init) < wi::to_widest (low)
816 || wi::to_widest (init) > wi::to_widest (high))
817 return false;
819 /* The idx is always within bound. */
820 if (!step || integer_zerop (step))
821 return true;
823 if (!max_loop_iterations (loop, &niter))
824 return false;
826 if (wi::to_widest (step) < 0)
828 delta = wi::to_widest (init) - wi::to_widest (low);
829 wi_step = -wi::to_widest (step);
831 else
833 delta = wi::to_widest (high) - wi::to_widest (init);
834 wi_step = wi::to_widest (step);
837 valid_niter = wi::div_floor (delta, wi_step, SIGNED, &overflow);
838 /* The iteration space of idx is within array bound. */
839 if (!overflow && niter <= valid_niter)
840 return true;
842 return false;
845 /* Return TRUE if ref is a within bound array reference. */
847 static bool
848 ref_within_array_bound (gimple *stmt, tree ref)
850 class loop *loop = loop_containing_stmt (stmt);
852 gcc_assert (loop != NULL);
853 return for_each_index (&ref, idx_within_array_bound, loop);
857 /* Given a memory reference expression T, return TRUE if base object
858 it refers to is writable. The base object of a memory reference
859 is the main object being referenced, which is returned by function
860 get_base_address. */
862 static bool
863 base_object_writable (tree ref)
865 tree base_tree = get_base_address (ref);
867 return (base_tree
868 && DECL_P (base_tree)
869 && decl_binds_to_current_def_p (base_tree)
870 && !TREE_READONLY (base_tree));
873 /* Return true when the memory references of STMT won't trap in the
874 if-converted code. There are two things that we have to check for:
876 - writes to memory occur to writable memory: if-conversion of
877 memory writes transforms the conditional memory writes into
878 unconditional writes, i.e. "if (cond) A[i] = foo" is transformed
879 into "A[i] = cond ? foo : A[i]", and as the write to memory may not
880 be executed at all in the original code, it may be a readonly
881 memory. To check that A is not const-qualified, we check that
882 there exists at least an unconditional write to A in the current
883 function.
885 - reads or writes to memory are valid memory accesses for every
886 iteration. To check that the memory accesses are correctly formed
887 and that we are allowed to read and write in these locations, we
888 check that the memory accesses to be if-converted occur at every
889 iteration unconditionally.
891 Returns true for the memory reference in STMT, same memory reference
892 is read or written unconditionally atleast once and the base memory
893 reference is written unconditionally once. This is to check reference
894 will not write fault. Also retuns true if the memory reference is
895 unconditionally read once then we are conditionally writing to memory
896 which is defined as read and write and is bound to the definition
897 we are seeing. */
898 static bool
899 ifcvt_memrefs_wont_trap (gimple *stmt, vec<data_reference_p> drs)
901 /* If DR didn't see a reference here we can't use it to tell
902 whether the ref traps or not. */
903 if (gimple_uid (stmt) == 0)
904 return false;
906 data_reference_p *master_dr, *base_master_dr;
907 data_reference_p a = drs[gimple_uid (stmt) - 1];
909 tree base = DR_BASE_OBJECT (a);
910 innermost_loop_behavior *innermost = &DR_INNERMOST (a);
912 gcc_assert (DR_STMT (a) == stmt);
913 gcc_assert (DR_BASE_ADDRESS (a) || DR_OFFSET (a)
914 || DR_INIT (a) || DR_STEP (a));
916 master_dr = innermost_DR_map->get (innermost);
917 gcc_assert (master_dr != NULL);
919 base_master_dr = baseref_DR_map->get (base);
921 /* If a is unconditionally written to it doesn't trap. */
922 if (DR_W_UNCONDITIONALLY (*master_dr))
923 return true;
925 /* If a is unconditionally accessed then ...
927 Even a is conditional access, we can treat it as an unconditional
928 one if it's an array reference and all its index are within array
929 bound. */
930 if (DR_RW_UNCONDITIONALLY (*master_dr)
931 || ref_within_array_bound (stmt, DR_REF (a)))
933 /* an unconditional read won't trap. */
934 if (DR_IS_READ (a))
935 return true;
937 /* an unconditionaly write won't trap if the base is written
938 to unconditionally. */
939 if (base_master_dr
940 && DR_BASE_W_UNCONDITIONALLY (*base_master_dr))
941 return flag_store_data_races;
942 /* or the base is known to be not readonly. */
943 else if (base_object_writable (DR_REF (a)))
944 return flag_store_data_races;
947 return false;
950 /* Return true if STMT could be converted into a masked load or store
951 (conditional load or store based on a mask computed from bb predicate). */
953 static bool
954 ifcvt_can_use_mask_load_store (gimple *stmt)
956 /* Check whether this is a load or store. */
957 tree lhs = gimple_assign_lhs (stmt);
958 bool is_load;
959 tree ref;
960 if (gimple_store_p (stmt))
962 if (!is_gimple_val (gimple_assign_rhs1 (stmt)))
963 return false;
964 is_load = false;
965 ref = lhs;
967 else if (gimple_assign_load_p (stmt))
969 is_load = true;
970 ref = gimple_assign_rhs1 (stmt);
972 else
973 return false;
975 if (may_be_nonaddressable_p (ref))
976 return false;
978 /* Mask should be integer mode of the same size as the load/store
979 mode. */
980 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
981 if (!int_mode_for_mode (mode).exists () || VECTOR_MODE_P (mode))
982 return false;
984 if (can_vec_mask_load_store_p (mode, VOIDmode, is_load))
985 return true;
987 return false;
990 /* Return true if STMT could be converted from an operation that is
991 unconditional to one that is conditional on a bb predicate mask. */
993 static bool
994 ifcvt_can_predicate (gimple *stmt)
996 basic_block bb = gimple_bb (stmt);
998 if (!(flag_tree_loop_vectorize || bb->loop_father->force_vectorize)
999 || bb->loop_father->dont_vectorize
1000 || gimple_has_volatile_ops (stmt))
1001 return false;
1003 if (gimple_assign_single_p (stmt))
1004 return ifcvt_can_use_mask_load_store (stmt);
1006 tree_code code = gimple_assign_rhs_code (stmt);
1007 tree lhs_type = TREE_TYPE (gimple_assign_lhs (stmt));
1008 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
1009 if (!types_compatible_p (lhs_type, rhs_type))
1010 return false;
1011 internal_fn cond_fn = get_conditional_internal_fn (code);
1012 return (cond_fn != IFN_LAST
1013 && vectorized_internal_fn_supported_p (cond_fn, lhs_type));
1016 /* Return true when STMT is if-convertible.
1018 GIMPLE_ASSIGN statement is not if-convertible if,
1019 - it is not movable,
1020 - it could trap,
1021 - LHS is not var decl. */
1023 static bool
1024 if_convertible_gimple_assign_stmt_p (gimple *stmt,
1025 vec<data_reference_p> refs)
1027 tree lhs = gimple_assign_lhs (stmt);
1029 if (dump_file && (dump_flags & TDF_DETAILS))
1031 fprintf (dump_file, "-------------------------\n");
1032 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1035 if (!is_gimple_reg_type (TREE_TYPE (lhs)))
1036 return false;
1038 /* Some of these constrains might be too conservative. */
1039 if (stmt_ends_bb_p (stmt)
1040 || gimple_has_volatile_ops (stmt)
1041 || (TREE_CODE (lhs) == SSA_NAME
1042 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
1043 || gimple_has_side_effects (stmt))
1045 if (dump_file && (dump_flags & TDF_DETAILS))
1046 fprintf (dump_file, "stmt not suitable for ifcvt\n");
1047 return false;
1050 /* tree-into-ssa.cc uses GF_PLF_1, so avoid it, because
1051 in between if_convertible_loop_p and combine_blocks
1052 we can perform loop versioning. */
1053 gimple_set_plf (stmt, GF_PLF_2, false);
1055 if ((! gimple_vuse (stmt)
1056 || gimple_could_trap_p_1 (stmt, false, false)
1057 || ! ifcvt_memrefs_wont_trap (stmt, refs))
1058 && gimple_could_trap_p (stmt))
1060 if (ifcvt_can_predicate (stmt))
1062 gimple_set_plf (stmt, GF_PLF_2, true);
1063 need_to_predicate = true;
1064 return true;
1066 if (dump_file && (dump_flags & TDF_DETAILS))
1067 fprintf (dump_file, "tree could trap...\n");
1068 return false;
1070 else if ((INTEGRAL_TYPE_P (TREE_TYPE (lhs))
1071 || POINTER_TYPE_P (TREE_TYPE (lhs)))
1072 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs))
1073 && arith_code_with_undefined_signed_overflow
1074 (gimple_assign_rhs_code (stmt)))
1075 /* We have to rewrite stmts with undefined overflow. */
1076 need_to_rewrite_undefined = true;
1078 /* When if-converting stores force versioning, likewise if we
1079 ended up generating store data races. */
1080 if (gimple_vdef (stmt))
1081 need_to_predicate = true;
1083 return true;
1086 /* Return true when STMT is if-convertible.
1088 A statement is if-convertible if:
1089 - it is an if-convertible GIMPLE_ASSIGN,
1090 - it is a GIMPLE_LABEL or a GIMPLE_COND,
1091 - it is builtins call,
1092 - it is a call to a function with a SIMD clone. */
1094 static bool
1095 if_convertible_stmt_p (gimple *stmt, vec<data_reference_p> refs)
1097 switch (gimple_code (stmt))
1099 case GIMPLE_LABEL:
1100 case GIMPLE_DEBUG:
1101 case GIMPLE_COND:
1102 return true;
1104 case GIMPLE_ASSIGN:
1105 return if_convertible_gimple_assign_stmt_p (stmt, refs);
1107 case GIMPLE_CALL:
1109 /* There are some IFN_s that are used to replace builtins but have the
1110 same semantics. Even if MASK_CALL cannot handle them vectorable_call
1111 will insert the proper selection, so do not block conversion. */
1112 int flags = gimple_call_flags (stmt);
1113 if ((flags & ECF_CONST)
1114 && !(flags & ECF_LOOPING_CONST_OR_PURE)
1115 && gimple_call_combined_fn (stmt) != CFN_LAST)
1116 return true;
1118 tree fndecl = gimple_call_fndecl (stmt);
1119 if (fndecl)
1121 /* We can vectorize some builtins and functions with SIMD
1122 "inbranch" clones. */
1123 struct cgraph_node *node = cgraph_node::get (fndecl);
1124 if (node && node->simd_clones != NULL)
1125 /* Ensure that at least one clone can be "inbranch". */
1126 for (struct cgraph_node *n = node->simd_clones; n != NULL;
1127 n = n->simdclone->next_clone)
1128 if (n->simdclone->inbranch)
1130 gimple_set_plf (stmt, GF_PLF_2, true);
1131 need_to_predicate = true;
1132 return true;
1136 return false;
1139 default:
1140 /* Don't know what to do with 'em so don't do anything. */
1141 if (dump_file && (dump_flags & TDF_DETAILS))
1143 fprintf (dump_file, "don't know what to do\n");
1144 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1146 return false;
1150 /* Assumes that BB has more than 1 predecessors.
1151 Returns false if at least one successor is not on critical edge
1152 and true otherwise. */
1154 static inline bool
1155 all_preds_critical_p (basic_block bb)
1157 edge e;
1158 edge_iterator ei;
1160 FOR_EACH_EDGE (e, ei, bb->preds)
1161 if (EDGE_COUNT (e->src->succs) == 1)
1162 return false;
1163 return true;
1166 /* Return true when BB is if-convertible. This routine does not check
1167 basic block's statements and phis.
1169 A basic block is not if-convertible if:
1170 - it is non-empty and it is after the exit block (in BFS order),
1171 - it is after the exit block but before the latch,
1172 - its edges are not normal.
1174 EXIT_BB is the basic block containing the exit of the LOOP. BB is
1175 inside LOOP. */
1177 static bool
1178 if_convertible_bb_p (class loop *loop, basic_block bb, basic_block exit_bb)
1180 edge e;
1181 edge_iterator ei;
1183 if (dump_file && (dump_flags & TDF_DETAILS))
1184 fprintf (dump_file, "----------[%d]-------------\n", bb->index);
1186 if (EDGE_COUNT (bb->succs) > 2)
1187 return false;
1189 if (gcall *call = safe_dyn_cast <gcall *> (*gsi_last_bb (bb)))
1190 if (gimple_call_ctrl_altering_p (call))
1191 return false;
1193 if (exit_bb)
1195 if (bb != loop->latch)
1197 if (dump_file && (dump_flags & TDF_DETAILS))
1198 fprintf (dump_file, "basic block after exit bb but before latch\n");
1199 return false;
1201 else if (!empty_block_p (bb))
1203 if (dump_file && (dump_flags & TDF_DETAILS))
1204 fprintf (dump_file, "non empty basic block after exit bb\n");
1205 return false;
1207 else if (bb == loop->latch
1208 && bb != exit_bb
1209 && !dominated_by_p (CDI_DOMINATORS, bb, exit_bb))
1211 if (dump_file && (dump_flags & TDF_DETAILS))
1212 fprintf (dump_file, "latch is not dominated by exit_block\n");
1213 return false;
1217 /* Be less adventurous and handle only normal edges. */
1218 FOR_EACH_EDGE (e, ei, bb->succs)
1219 if (e->flags & (EDGE_EH | EDGE_ABNORMAL | EDGE_IRREDUCIBLE_LOOP))
1221 if (dump_file && (dump_flags & TDF_DETAILS))
1222 fprintf (dump_file, "Difficult to handle edges\n");
1223 return false;
1226 return true;
1229 /* Return true when all predecessor blocks of BB are visited. The
1230 VISITED bitmap keeps track of the visited blocks. */
1232 static bool
1233 pred_blocks_visited_p (basic_block bb, bitmap *visited)
1235 edge e;
1236 edge_iterator ei;
1237 FOR_EACH_EDGE (e, ei, bb->preds)
1238 if (!bitmap_bit_p (*visited, e->src->index))
1239 return false;
1241 return true;
1244 /* Get body of a LOOP in suitable order for if-conversion. It is
1245 caller's responsibility to deallocate basic block list.
1246 If-conversion suitable order is, breadth first sort (BFS) order
1247 with an additional constraint: select a block only if all its
1248 predecessors are already selected. */
1250 static basic_block *
1251 get_loop_body_in_if_conv_order (const class loop *loop)
1253 basic_block *blocks, *blocks_in_bfs_order;
1254 basic_block bb;
1255 bitmap visited;
1256 unsigned int index = 0;
1257 unsigned int visited_count = 0;
1259 gcc_assert (loop->num_nodes);
1260 gcc_assert (loop->latch != EXIT_BLOCK_PTR_FOR_FN (cfun));
1262 blocks = XCNEWVEC (basic_block, loop->num_nodes);
1263 visited = BITMAP_ALLOC (NULL);
1265 blocks_in_bfs_order = get_loop_body_in_bfs_order (loop);
1267 index = 0;
1268 while (index < loop->num_nodes)
1270 bb = blocks_in_bfs_order [index];
1272 if (bb->flags & BB_IRREDUCIBLE_LOOP)
1274 free (blocks_in_bfs_order);
1275 BITMAP_FREE (visited);
1276 free (blocks);
1277 return NULL;
1280 if (!bitmap_bit_p (visited, bb->index))
1282 if (pred_blocks_visited_p (bb, &visited)
1283 || bb == loop->header)
1285 /* This block is now visited. */
1286 bitmap_set_bit (visited, bb->index);
1287 blocks[visited_count++] = bb;
1291 index++;
1293 if (index == loop->num_nodes
1294 && visited_count != loop->num_nodes)
1295 /* Not done yet. */
1296 index = 0;
1298 free (blocks_in_bfs_order);
1299 BITMAP_FREE (visited);
1301 /* Go through loop and reject if-conversion or lowering of bitfields if we
1302 encounter statements we do not believe the vectorizer will be able to
1303 handle. If adding a new type of statement here, make sure
1304 'ifcvt_local_dce' is also able to handle it propertly. */
1305 for (index = 0; index < loop->num_nodes; index++)
1307 basic_block bb = blocks[index];
1308 gimple_stmt_iterator gsi;
1310 bool may_have_nonlocal_labels
1311 = bb_with_exit_edge_p (loop, bb) || bb == loop->latch;
1312 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1313 switch (gimple_code (gsi_stmt (gsi)))
1315 case GIMPLE_LABEL:
1316 if (!may_have_nonlocal_labels)
1318 tree label
1319 = gimple_label_label (as_a <glabel *> (gsi_stmt (gsi)));
1320 if (DECL_NONLOCAL (label) || FORCED_LABEL (label))
1322 free (blocks);
1323 return NULL;
1326 /* Fallthru. */
1327 case GIMPLE_ASSIGN:
1328 case GIMPLE_CALL:
1329 case GIMPLE_DEBUG:
1330 case GIMPLE_COND:
1331 gimple_set_uid (gsi_stmt (gsi), 0);
1332 break;
1333 default:
1334 free (blocks);
1335 return NULL;
1338 return blocks;
1341 /* Returns true when the analysis of the predicates for all the basic
1342 blocks in LOOP succeeded.
1344 predicate_bbs first allocates the predicates of the basic blocks.
1345 These fields are then initialized with the tree expressions
1346 representing the predicates under which a basic block is executed
1347 in the LOOP. As the loop->header is executed at each iteration, it
1348 has the "true" predicate. Other statements executed under a
1349 condition are predicated with that condition, for example
1351 | if (x)
1352 | S1;
1353 | else
1354 | S2;
1356 S1 will be predicated with "x", and
1357 S2 will be predicated with "!x". */
1359 static void
1360 predicate_bbs (loop_p loop)
1362 unsigned int i;
1364 for (i = 0; i < loop->num_nodes; i++)
1365 init_bb_predicate (ifc_bbs[i]);
1367 for (i = 0; i < loop->num_nodes; i++)
1369 basic_block bb = ifc_bbs[i];
1370 tree cond;
1372 /* The loop latch and loop exit block are always executed and
1373 have no extra conditions to be processed: skip them. */
1374 if (bb == loop->latch
1375 || bb_with_exit_edge_p (loop, bb))
1377 reset_bb_predicate (bb);
1378 continue;
1381 cond = bb_predicate (bb);
1382 if (gcond *stmt = safe_dyn_cast <gcond *> (*gsi_last_bb (bb)))
1384 tree c2;
1385 edge true_edge, false_edge;
1386 location_t loc = gimple_location (stmt);
1387 tree c;
1388 /* gcc.dg/fold-bopcond-1.c shows that despite all forwprop passes
1389 conditions can remain unfolded because of multiple uses so
1390 try to re-fold here, especially to get precision changing
1391 conversions sorted out. Do not simply fold the stmt since
1392 this is analysis only. When conditions were embedded in
1393 COND_EXPRs those were folded separately before folding the
1394 COND_EXPR but as they are now outside we have to make sure
1395 to fold them. Do it here - another opportunity would be to
1396 fold predicates as they are inserted. */
1397 gimple_match_op cexpr (gimple_match_cond::UNCOND,
1398 gimple_cond_code (stmt),
1399 boolean_type_node,
1400 gimple_cond_lhs (stmt),
1401 gimple_cond_rhs (stmt));
1402 if (cexpr.resimplify (NULL, follow_all_ssa_edges)
1403 && cexpr.code.is_tree_code ()
1404 && TREE_CODE_CLASS ((tree_code)cexpr.code) == tcc_comparison)
1405 c = build2_loc (loc, (tree_code)cexpr.code, boolean_type_node,
1406 cexpr.ops[0], cexpr.ops[1]);
1407 else
1408 c = build2_loc (loc, gimple_cond_code (stmt),
1409 boolean_type_node,
1410 gimple_cond_lhs (stmt),
1411 gimple_cond_rhs (stmt));
1413 /* Add new condition into destination's predicate list. */
1414 extract_true_false_edges_from_block (gimple_bb (stmt),
1415 &true_edge, &false_edge);
1417 /* If C is true, then TRUE_EDGE is taken. */
1418 add_to_dst_predicate_list (loop, true_edge, unshare_expr (cond),
1419 unshare_expr (c));
1421 /* If C is false, then FALSE_EDGE is taken. */
1422 c2 = build1_loc (loc, TRUTH_NOT_EXPR, boolean_type_node,
1423 unshare_expr (c));
1424 add_to_dst_predicate_list (loop, false_edge,
1425 unshare_expr (cond), c2);
1427 cond = NULL_TREE;
1430 /* If current bb has only one successor, then consider it as an
1431 unconditional goto. */
1432 if (single_succ_p (bb))
1434 basic_block bb_n = single_succ (bb);
1436 /* The successor bb inherits the predicate of its
1437 predecessor. If there is no predicate in the predecessor
1438 bb, then consider the successor bb as always executed. */
1439 if (cond == NULL_TREE)
1440 cond = boolean_true_node;
1442 add_to_predicate_list (loop, bb_n, cond);
1446 /* The loop header is always executed. */
1447 reset_bb_predicate (loop->header);
1448 gcc_assert (bb_predicate_gimplified_stmts (loop->header) == NULL
1449 && bb_predicate_gimplified_stmts (loop->latch) == NULL);
1452 /* Build region by adding loop pre-header and post-header blocks. */
1454 static vec<basic_block>
1455 build_region (class loop *loop)
1457 vec<basic_block> region = vNULL;
1458 basic_block exit_bb = NULL;
1460 gcc_assert (ifc_bbs);
1461 /* The first element is loop pre-header. */
1462 region.safe_push (loop_preheader_edge (loop)->src);
1464 for (unsigned int i = 0; i < loop->num_nodes; i++)
1466 basic_block bb = ifc_bbs[i];
1467 region.safe_push (bb);
1468 /* Find loop postheader. */
1469 edge e;
1470 edge_iterator ei;
1471 FOR_EACH_EDGE (e, ei, bb->succs)
1472 if (loop_exit_edge_p (loop, e))
1474 exit_bb = e->dest;
1475 break;
1478 /* The last element is loop post-header. */
1479 gcc_assert (exit_bb);
1480 region.safe_push (exit_bb);
1481 return region;
1484 /* Return true when LOOP is if-convertible. This is a helper function
1485 for if_convertible_loop_p. REFS and DDRS are initialized and freed
1486 in if_convertible_loop_p. */
1488 static bool
1489 if_convertible_loop_p_1 (class loop *loop, vec<data_reference_p> *refs)
1491 unsigned int i;
1492 basic_block exit_bb = NULL;
1493 vec<basic_block> region;
1495 calculate_dominance_info (CDI_DOMINATORS);
1497 for (i = 0; i < loop->num_nodes; i++)
1499 basic_block bb = ifc_bbs[i];
1501 if (!if_convertible_bb_p (loop, bb, exit_bb))
1502 return false;
1504 if (bb_with_exit_edge_p (loop, bb))
1505 exit_bb = bb;
1508 data_reference_p dr;
1510 innermost_DR_map
1511 = new hash_map<innermost_loop_behavior_hash, data_reference_p>;
1512 baseref_DR_map = new hash_map<tree_operand_hash, data_reference_p>;
1514 /* Compute post-dominator tree locally. */
1515 region = build_region (loop);
1516 calculate_dominance_info_for_region (CDI_POST_DOMINATORS, region);
1518 predicate_bbs (loop);
1520 /* Free post-dominator tree since it is not used after predication. */
1521 free_dominance_info_for_region (cfun, CDI_POST_DOMINATORS, region);
1522 region.release ();
1524 for (i = 0; refs->iterate (i, &dr); i++)
1526 tree ref = DR_REF (dr);
1528 dr->aux = XNEW (struct ifc_dr);
1529 DR_BASE_W_UNCONDITIONALLY (dr) = false;
1530 DR_RW_UNCONDITIONALLY (dr) = false;
1531 DR_W_UNCONDITIONALLY (dr) = false;
1532 IFC_DR (dr)->rw_predicate = boolean_false_node;
1533 IFC_DR (dr)->w_predicate = boolean_false_node;
1534 IFC_DR (dr)->base_w_predicate = boolean_false_node;
1535 if (gimple_uid (DR_STMT (dr)) == 0)
1536 gimple_set_uid (DR_STMT (dr), i + 1);
1538 /* If DR doesn't have innermost loop behavior or it's a compound
1539 memory reference, we synthesize its innermost loop behavior
1540 for hashing. */
1541 if (TREE_CODE (ref) == COMPONENT_REF
1542 || TREE_CODE (ref) == IMAGPART_EXPR
1543 || TREE_CODE (ref) == REALPART_EXPR
1544 || !(DR_BASE_ADDRESS (dr) || DR_OFFSET (dr)
1545 || DR_INIT (dr) || DR_STEP (dr)))
1547 while (TREE_CODE (ref) == COMPONENT_REF
1548 || TREE_CODE (ref) == IMAGPART_EXPR
1549 || TREE_CODE (ref) == REALPART_EXPR)
1550 ref = TREE_OPERAND (ref, 0);
1552 memset (&DR_INNERMOST (dr), 0, sizeof (DR_INNERMOST (dr)));
1553 DR_BASE_ADDRESS (dr) = ref;
1555 hash_memrefs_baserefs_and_store_DRs_read_written_info (dr);
1558 for (i = 0; i < loop->num_nodes; i++)
1560 basic_block bb = ifc_bbs[i];
1561 gimple_stmt_iterator itr;
1563 /* Check the if-convertibility of statements in predicated BBs. */
1564 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1565 for (itr = gsi_start_bb (bb); !gsi_end_p (itr); gsi_next (&itr))
1566 if (!if_convertible_stmt_p (gsi_stmt (itr), *refs))
1567 return false;
1570 /* Checking PHIs needs to be done after stmts, as the fact whether there
1571 are any masked loads or stores affects the tests. */
1572 for (i = 0; i < loop->num_nodes; i++)
1574 basic_block bb = ifc_bbs[i];
1575 gphi_iterator itr;
1577 for (itr = gsi_start_phis (bb); !gsi_end_p (itr); gsi_next (&itr))
1578 if (!if_convertible_phi_p (loop, bb, itr.phi ()))
1579 return false;
1582 if (dump_file)
1583 fprintf (dump_file, "Applying if-conversion\n");
1585 return true;
1588 /* Return true when LOOP is if-convertible.
1589 LOOP is if-convertible if:
1590 - it is innermost,
1591 - it has two or more basic blocks,
1592 - it has only one exit,
1593 - loop header is not the exit edge,
1594 - if its basic blocks and phi nodes are if convertible. */
1596 static bool
1597 if_convertible_loop_p (class loop *loop, vec<data_reference_p> *refs)
1599 edge e;
1600 edge_iterator ei;
1601 bool res = false;
1603 /* Handle only innermost loop. */
1604 if (!loop || loop->inner)
1606 if (dump_file && (dump_flags & TDF_DETAILS))
1607 fprintf (dump_file, "not innermost loop\n");
1608 return false;
1611 /* If only one block, no need for if-conversion. */
1612 if (loop->num_nodes <= 2)
1614 if (dump_file && (dump_flags & TDF_DETAILS))
1615 fprintf (dump_file, "less than 2 basic blocks\n");
1616 return false;
1619 /* If one of the loop header's edge is an exit edge then do not
1620 apply if-conversion. */
1621 FOR_EACH_EDGE (e, ei, loop->header->succs)
1622 if (loop_exit_edge_p (loop, e))
1623 return false;
1625 res = if_convertible_loop_p_1 (loop, refs);
1627 delete innermost_DR_map;
1628 innermost_DR_map = NULL;
1630 delete baseref_DR_map;
1631 baseref_DR_map = NULL;
1633 return res;
1636 /* Return reduc_1 if has_nop.
1638 if (...)
1639 tmp1 = (unsigned type) reduc_1;
1640 tmp2 = tmp1 + rhs2;
1641 reduc_3 = (signed type) tmp2. */
1642 static tree
1643 strip_nop_cond_scalar_reduction (bool has_nop, tree op)
1645 if (!has_nop)
1646 return op;
1648 if (TREE_CODE (op) != SSA_NAME)
1649 return NULL_TREE;
1651 gassign *stmt = safe_dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op));
1652 if (!stmt
1653 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt))
1654 || !tree_nop_conversion_p (TREE_TYPE (op), TREE_TYPE
1655 (gimple_assign_rhs1 (stmt))))
1656 return NULL_TREE;
1658 return gimple_assign_rhs1 (stmt);
1661 /* Returns true if def-stmt for phi argument ARG is simple increment/decrement
1662 which is in predicated basic block.
1663 In fact, the following PHI pattern is searching:
1664 loop-header:
1665 reduc_1 = PHI <..., reduc_2>
1667 if (...)
1668 reduc_3 = ...
1669 reduc_2 = PHI <reduc_1, reduc_3>
1671 ARG_0 and ARG_1 are correspondent PHI arguments.
1672 REDUC, OP0 and OP1 contain reduction stmt and its operands.
1673 EXTENDED is true if PHI has > 2 arguments. */
1675 static bool
1676 is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1,
1677 tree *op0, tree *op1, bool extended, bool* has_nop,
1678 gimple **nop_reduc)
1680 tree lhs, r_op1, r_op2, r_nop1, r_nop2;
1681 gimple *stmt;
1682 gimple *header_phi = NULL;
1683 enum tree_code reduction_op;
1684 basic_block bb = gimple_bb (phi);
1685 class loop *loop = bb->loop_father;
1686 edge latch_e = loop_latch_edge (loop);
1687 imm_use_iterator imm_iter;
1688 use_operand_p use_p;
1689 edge e;
1690 edge_iterator ei;
1691 bool result = *has_nop = false;
1692 if (TREE_CODE (arg_0) != SSA_NAME || TREE_CODE (arg_1) != SSA_NAME)
1693 return false;
1695 if (!extended && gimple_code (SSA_NAME_DEF_STMT (arg_0)) == GIMPLE_PHI)
1697 lhs = arg_1;
1698 header_phi = SSA_NAME_DEF_STMT (arg_0);
1699 stmt = SSA_NAME_DEF_STMT (arg_1);
1701 else if (gimple_code (SSA_NAME_DEF_STMT (arg_1)) == GIMPLE_PHI)
1703 lhs = arg_0;
1704 header_phi = SSA_NAME_DEF_STMT (arg_1);
1705 stmt = SSA_NAME_DEF_STMT (arg_0);
1707 else
1708 return false;
1709 if (gimple_bb (header_phi) != loop->header)
1710 return false;
1712 if (PHI_ARG_DEF_FROM_EDGE (header_phi, latch_e) != PHI_RESULT (phi))
1713 return false;
1715 if (gimple_code (stmt) != GIMPLE_ASSIGN
1716 || gimple_has_volatile_ops (stmt))
1717 return false;
1719 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
1720 return false;
1722 if (!is_predicated (gimple_bb (stmt)))
1723 return false;
1725 /* Check that stmt-block is predecessor of phi-block. */
1726 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->succs)
1727 if (e->dest == bb)
1729 result = true;
1730 break;
1732 if (!result)
1733 return false;
1735 if (!has_single_use (lhs))
1736 return false;
1738 reduction_op = gimple_assign_rhs_code (stmt);
1740 /* Catch something like below
1742 loop-header:
1743 reduc_1 = PHI <..., reduc_2>
1745 if (...)
1746 tmp1 = (unsigned type) reduc_1;
1747 tmp2 = tmp1 + rhs2;
1748 reduc_3 = (signed type) tmp2;
1750 reduc_2 = PHI <reduc_1, reduc_3>
1752 and convert to
1754 reduc_2 = PHI <0, reduc_1>
1755 tmp1 = (unsigned type)reduc_1;
1756 ifcvt = cond_expr ? rhs2 : 0
1757 tmp2 = tmp1 +/- ifcvt;
1758 reduc_1 = (signed type)tmp2; */
1760 if (CONVERT_EXPR_CODE_P (reduction_op))
1762 lhs = gimple_assign_rhs1 (stmt);
1763 if (TREE_CODE (lhs) != SSA_NAME
1764 || !has_single_use (lhs))
1765 return false;
1767 *nop_reduc = stmt;
1768 stmt = SSA_NAME_DEF_STMT (lhs);
1769 if (gimple_bb (stmt) != gimple_bb (*nop_reduc)
1770 || !is_gimple_assign (stmt))
1771 return false;
1773 *has_nop = true;
1774 reduction_op = gimple_assign_rhs_code (stmt);
1777 if (reduction_op != PLUS_EXPR
1778 && reduction_op != MINUS_EXPR
1779 && reduction_op != MULT_EXPR
1780 && reduction_op != BIT_IOR_EXPR
1781 && reduction_op != BIT_XOR_EXPR
1782 && reduction_op != BIT_AND_EXPR)
1783 return false;
1784 r_op1 = gimple_assign_rhs1 (stmt);
1785 r_op2 = gimple_assign_rhs2 (stmt);
1787 r_nop1 = strip_nop_cond_scalar_reduction (*has_nop, r_op1);
1788 r_nop2 = strip_nop_cond_scalar_reduction (*has_nop, r_op2);
1790 /* Make R_OP1 to hold reduction variable. */
1791 if (r_nop2 == PHI_RESULT (header_phi)
1792 && commutative_tree_code (reduction_op))
1794 std::swap (r_op1, r_op2);
1795 std::swap (r_nop1, r_nop2);
1797 else if (r_nop1 != PHI_RESULT (header_phi))
1798 return false;
1800 if (*has_nop)
1802 /* Check that R_NOP1 is used in nop_stmt or in PHI only. */
1803 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_nop1)
1805 gimple *use_stmt = USE_STMT (use_p);
1806 if (is_gimple_debug (use_stmt))
1807 continue;
1808 if (use_stmt == SSA_NAME_DEF_STMT (r_op1))
1809 continue;
1810 if (use_stmt != phi)
1811 return false;
1815 /* Check that R_OP1 is used in reduction stmt or in PHI only. */
1816 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_op1)
1818 gimple *use_stmt = USE_STMT (use_p);
1819 if (is_gimple_debug (use_stmt))
1820 continue;
1821 if (use_stmt == stmt)
1822 continue;
1823 if (gimple_code (use_stmt) != GIMPLE_PHI)
1824 return false;
1827 *op0 = r_op1; *op1 = r_op2;
1828 *reduc = stmt;
1829 return true;
1832 /* Converts conditional scalar reduction into unconditional form, e.g.
1833 bb_4
1834 if (_5 != 0) goto bb_5 else goto bb_6
1835 end_bb_4
1836 bb_5
1837 res_6 = res_13 + 1;
1838 end_bb_5
1839 bb_6
1840 # res_2 = PHI <res_13(4), res_6(5)>
1841 end_bb_6
1843 will be converted into sequence
1844 _ifc__1 = _5 != 0 ? 1 : 0;
1845 res_2 = res_13 + _ifc__1;
1846 Argument SWAP tells that arguments of conditional expression should be
1847 swapped.
1848 Returns rhs of resulting PHI assignment. */
1850 static tree
1851 convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi,
1852 tree cond, tree op0, tree op1, bool swap,
1853 bool has_nop, gimple* nop_reduc)
1855 gimple_stmt_iterator stmt_it;
1856 gimple *new_assign;
1857 tree rhs;
1858 tree rhs1 = gimple_assign_rhs1 (reduc);
1859 tree tmp = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, "_ifc_");
1860 tree c;
1861 enum tree_code reduction_op = gimple_assign_rhs_code (reduc);
1862 tree op_nochange = neutral_op_for_reduction (TREE_TYPE (rhs1), reduction_op, NULL);
1863 gimple_seq stmts = NULL;
1865 if (dump_file && (dump_flags & TDF_DETAILS))
1867 fprintf (dump_file, "Found cond scalar reduction.\n");
1868 print_gimple_stmt (dump_file, reduc, 0, TDF_SLIM);
1871 /* Build cond expression using COND and constant operand
1872 of reduction rhs. */
1873 c = fold_build_cond_expr (TREE_TYPE (rhs1),
1874 unshare_expr (cond),
1875 swap ? op_nochange : op1,
1876 swap ? op1 : op_nochange);
1878 /* Create assignment stmt and insert it at GSI. */
1879 new_assign = gimple_build_assign (tmp, c);
1880 gsi_insert_before (gsi, new_assign, GSI_SAME_STMT);
1881 /* Build rhs for unconditional increment/decrement/logic_operation. */
1882 rhs = gimple_build (&stmts, reduction_op,
1883 TREE_TYPE (rhs1), op0, tmp);
1885 if (has_nop)
1887 rhs = gimple_convert (&stmts,
1888 TREE_TYPE (gimple_assign_lhs (nop_reduc)), rhs);
1889 stmt_it = gsi_for_stmt (nop_reduc);
1890 gsi_remove (&stmt_it, true);
1891 release_defs (nop_reduc);
1893 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
1895 /* Delete original reduction stmt. */
1896 stmt_it = gsi_for_stmt (reduc);
1897 gsi_remove (&stmt_it, true);
1898 release_defs (reduc);
1899 return rhs;
1902 /* Generate a simplified conditional. */
1904 static tree
1905 gen_simplified_condition (tree cond, scalar_cond_masked_set_type &cond_set)
1907 /* Check if the value is already live in a previous branch. This resolves
1908 nested conditionals from diamond PHI reductions. */
1909 if (TREE_CODE (cond) == SSA_NAME)
1911 gimple *stmt = SSA_NAME_DEF_STMT (cond);
1912 gassign *assign = NULL;
1913 if ((assign = as_a <gassign *> (stmt))
1914 && gimple_assign_rhs_code (assign) == BIT_AND_EXPR)
1916 tree arg1 = gimple_assign_rhs1 (assign);
1917 tree arg2 = gimple_assign_rhs2 (assign);
1918 if (cond_set.contains ({ arg1, 1 }))
1919 arg1 = boolean_true_node;
1920 else
1921 arg1 = gen_simplified_condition (arg1, cond_set);
1923 if (cond_set.contains ({ arg2, 1 }))
1924 arg2 = boolean_true_node;
1925 else
1926 arg2 = gen_simplified_condition (arg2, cond_set);
1928 cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, arg1, arg2);
1931 return cond;
1934 /* Structure used to track meta-data on PHI arguments used to generate
1935 most efficient comparison sequence to slatten a PHI node. */
1937 typedef struct ifcvt_arg_entry
1939 /* The PHI node argument value. */
1940 tree arg;
1942 /* The number of compares required to reach this PHI node from start of the
1943 BB being if-converted. */
1944 unsigned num_compares;
1946 /* The number of times this PHI node argument appears in the current PHI
1947 node. */
1948 unsigned occurs;
1950 /* The indices at which this PHI arg occurs inside the PHI node. */
1951 vec <int> *indexes;
1952 } ifcvt_arg_entry_t;
1954 /* Produce condition for all occurrences of ARG in PHI node. Set *INVERT
1955 as to whether the condition is inverted. */
1957 static tree
1958 gen_phi_arg_condition (gphi *phi, ifcvt_arg_entry_t &arg,
1959 gimple_stmt_iterator *gsi,
1960 scalar_cond_masked_set_type &cond_set, bool *invert)
1962 int len;
1963 int i;
1964 tree cond = NULL_TREE;
1965 tree c;
1966 edge e;
1968 *invert = false;
1969 len = arg.indexes->length ();
1970 gcc_assert (len > 0);
1971 for (i = 0; i < len; i++)
1973 e = gimple_phi_arg_edge (phi, (*arg.indexes)[i]);
1974 c = bb_predicate (e->src);
1975 if (is_true_predicate (c))
1977 cond = c;
1978 break;
1980 /* If we have just a single inverted predicate, signal that and
1981 instead invert the COND_EXPR arms. */
1982 if (len == 1 && TREE_CODE (c) == TRUTH_NOT_EXPR)
1984 c = TREE_OPERAND (c, 0);
1985 *invert = true;
1988 c = gen_simplified_condition (c, cond_set);
1989 c = force_gimple_operand_gsi (gsi, unshare_expr (c),
1990 true, NULL_TREE, true, GSI_SAME_STMT);
1991 if (cond != NULL_TREE)
1993 /* Must build OR expression. */
1994 cond = fold_or_predicates (EXPR_LOCATION (c), c, cond);
1995 cond = force_gimple_operand_gsi (gsi, unshare_expr (cond), true,
1996 NULL_TREE, true, GSI_SAME_STMT);
1998 else
1999 cond = c;
2001 /* Register the new possibly simplified conditional. When more than 2
2002 entries in a phi node we chain entries in the false branch, so the
2003 inverted condition is active. */
2004 scalar_cond_masked_key pred_cond ({ cond, 1 });
2005 if (!*invert)
2006 pred_cond.inverted_p = !pred_cond.inverted_p;
2007 cond_set.add (pred_cond);
2009 gcc_assert (cond != NULL_TREE);
2010 return cond;
2013 /* Create the smallest nested conditional possible. On pre-order we record
2014 which conditionals are live, and on post-order rewrite the chain by removing
2015 already active conditions.
2017 As an example we simplify:
2019 _7 = a_10 < 0;
2020 _21 = a_10 >= 0;
2021 _22 = a_10 < e_11(D);
2022 _23 = _21 & _22;
2023 _ifc__42 = _23 ? t_13 : 0;
2024 t_6 = _7 ? 1 : _ifc__42
2026 into
2028 _7 = a_10 < 0;
2029 _22 = a_10 < e_11(D);
2030 _ifc__42 = _22 ? t_13 : 0;
2031 t_6 = _7 ? 1 : _ifc__42;
2033 which produces better code. */
2035 static tree
2036 gen_phi_nest_statement (gphi *phi, gimple_stmt_iterator *gsi,
2037 scalar_cond_masked_set_type &cond_set, tree type,
2038 gimple **res_stmt, tree lhs0,
2039 vec<struct ifcvt_arg_entry> &args, unsigned idx)
2041 if (idx == args.length ())
2042 return args[idx - 1].arg;
2044 bool invert;
2045 tree cond = gen_phi_arg_condition (phi, args[idx - 1], gsi, cond_set,
2046 &invert);
2047 tree arg1 = gen_phi_nest_statement (phi, gsi, cond_set, type, res_stmt, lhs0,
2048 args, idx + 1);
2050 unsigned prev = idx;
2051 unsigned curr = prev - 1;
2052 tree arg0 = args[curr].arg;
2053 tree rhs, lhs;
2054 if (idx > 1)
2055 lhs = make_temp_ssa_name (type, NULL, "_ifc_");
2056 else
2057 lhs = lhs0;
2059 if (invert)
2060 rhs = fold_build_cond_expr (type, unshare_expr (cond),
2061 arg1, arg0);
2062 else
2063 rhs = fold_build_cond_expr (type, unshare_expr (cond),
2064 arg0, arg1);
2065 gassign *new_stmt = gimple_build_assign (lhs, rhs);
2066 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2067 update_stmt (new_stmt);
2068 *res_stmt = new_stmt;
2069 return lhs;
2072 /* When flattening a PHI node we have a choice of which conditions to test to
2073 for all the paths from the start of the dominator block of the BB with the
2074 PHI node. If the PHI node has X arguments we have to only test X - 1
2075 conditions as the last one is implicit. It does matter which conditions we
2076 test first. We should test the shortest condition first (distance here is
2077 measures in the number of logical operators in the condition) and the
2078 longest one last. This allows us to skip testing the most expensive
2079 condition. To accomplish this we need to sort the conditions. P1 and P2
2080 are sorted first based on the number of logical operations (num_compares)
2081 and then by how often they occur in the PHI node. */
2083 static int
2084 cmp_arg_entry (const void *p1, const void *p2, void * /* data. */)
2086 const ifcvt_arg_entry sval1 = *(const ifcvt_arg_entry *)p1;
2087 const ifcvt_arg_entry sval2 = *(const ifcvt_arg_entry *)p2;
2089 if (sval1.num_compares < sval2.num_compares)
2090 return -1;
2091 else if (sval1.num_compares > sval2.num_compares)
2092 return 1;
2094 if (sval1.occurs < sval2.occurs)
2095 return -1;
2096 else if (sval1.occurs > sval2.occurs)
2097 return 1;
2099 return 0;
2102 /* Replace a scalar PHI node with a COND_EXPR using COND as condition.
2103 This routine can handle PHI nodes with more than two arguments.
2105 For example,
2106 S1: A = PHI <x1(1), x2(5)>
2107 is converted into,
2108 S2: A = cond ? x1 : x2;
2110 The generated code is inserted at GSI that points to the top of
2111 basic block's statement list.
2112 If PHI node has more than two arguments a chain of conditional
2113 expression is produced. */
2116 static void
2117 predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
2119 gimple *new_stmt = NULL, *reduc, *nop_reduc;
2120 tree rhs, res, arg0, arg1, op0, op1, scev;
2121 tree cond;
2122 unsigned int index0;
2123 edge e;
2124 basic_block bb;
2125 unsigned int i;
2126 bool has_nop;
2128 res = gimple_phi_result (phi);
2129 if (virtual_operand_p (res))
2130 return;
2132 if ((rhs = degenerate_phi_result (phi))
2133 || ((scev = analyze_scalar_evolution (gimple_bb (phi)->loop_father,
2134 res))
2135 && !chrec_contains_undetermined (scev)
2136 && scev != res
2137 && (rhs = gimple_phi_arg_def (phi, 0))))
2139 if (dump_file && (dump_flags & TDF_DETAILS))
2141 fprintf (dump_file, "Degenerate phi!\n");
2142 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
2144 new_stmt = gimple_build_assign (res, rhs);
2145 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2146 update_stmt (new_stmt);
2147 return;
2150 bb = gimple_bb (phi);
2151 /* Keep track of conditionals already seen. */
2152 scalar_cond_masked_set_type cond_set;
2153 if (EDGE_COUNT (bb->preds) == 2)
2155 /* Predicate ordinary PHI node with 2 arguments. */
2156 edge first_edge, second_edge;
2157 basic_block true_bb;
2158 first_edge = EDGE_PRED (bb, 0);
2159 second_edge = EDGE_PRED (bb, 1);
2160 cond = bb_predicate (first_edge->src);
2161 cond_set.add ({ cond, 1 });
2162 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2163 std::swap (first_edge, second_edge);
2164 if (EDGE_COUNT (first_edge->src->succs) > 1)
2166 cond = bb_predicate (second_edge->src);
2167 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2168 cond = TREE_OPERAND (cond, 0);
2169 else
2170 first_edge = second_edge;
2172 else
2173 cond = bb_predicate (first_edge->src);
2175 /* Gimplify the condition to a valid cond-expr conditonal operand. */
2176 cond = gen_simplified_condition (cond, cond_set);
2177 cond = force_gimple_operand_gsi (gsi, unshare_expr (cond), true,
2178 NULL_TREE, true, GSI_SAME_STMT);
2179 true_bb = first_edge->src;
2180 if (EDGE_PRED (bb, 1)->src == true_bb)
2182 arg0 = gimple_phi_arg_def (phi, 1);
2183 arg1 = gimple_phi_arg_def (phi, 0);
2185 else
2187 arg0 = gimple_phi_arg_def (phi, 0);
2188 arg1 = gimple_phi_arg_def (phi, 1);
2190 if (is_cond_scalar_reduction (phi, &reduc, arg0, arg1,
2191 &op0, &op1, false, &has_nop,
2192 &nop_reduc))
2194 /* Convert reduction stmt into vectorizable form. */
2195 rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
2196 true_bb != gimple_bb (reduc),
2197 has_nop, nop_reduc);
2198 redundant_ssa_names.safe_push (std::make_pair (res, rhs));
2200 else
2201 /* Build new RHS using selected condition and arguments. */
2202 rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
2203 arg0, arg1);
2204 new_stmt = gimple_build_assign (res, rhs);
2205 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2206 gimple_stmt_iterator new_gsi = gsi_for_stmt (new_stmt);
2207 if (fold_stmt (&new_gsi, follow_all_ssa_edges))
2209 new_stmt = gsi_stmt (new_gsi);
2210 update_stmt (new_stmt);
2213 if (dump_file && (dump_flags & TDF_DETAILS))
2215 fprintf (dump_file, "new phi replacement stmt\n");
2216 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
2218 return;
2221 /* Create hashmap for PHI node which contain vector of argument indexes
2222 having the same value. */
2223 bool swap = false;
2224 hash_map<tree_operand_hash, auto_vec<int> > phi_arg_map;
2225 unsigned int num_args = gimple_phi_num_args (phi);
2226 /* Vector of different PHI argument values. */
2227 auto_vec<ifcvt_arg_entry_t> args;
2229 /* Compute phi_arg_map, determine the list of unique PHI args and the indices
2230 where they are in the PHI node. The indices will be used to determine
2231 the conditions to apply and their complexity. */
2232 for (i = 0; i < num_args; i++)
2234 tree arg;
2236 arg = gimple_phi_arg_def (phi, i);
2237 if (!phi_arg_map.get (arg))
2238 args.safe_push ({ arg, 0, 0, NULL });
2239 phi_arg_map.get_or_insert (arg).safe_push (i);
2242 /* Determine element with max number of occurrences and complexity. Looking
2243 at only number of occurrences as a measure for complexity isn't enough as
2244 all usages can be unique but the comparisons to reach the PHI node differ
2245 per branch. */
2246 for (unsigned i = 0; i < args.length (); i++)
2248 unsigned int len = 0;
2249 vec<int> *indices = phi_arg_map.get (args[i].arg);
2250 for (int index : *indices)
2252 edge e = gimple_phi_arg_edge (phi, index);
2253 len += get_bb_num_predicate_stmts (e->src);
2256 unsigned occur = indices->length ();
2257 if (dump_file && (dump_flags & TDF_DETAILS))
2258 fprintf (dump_file, "Ranking %d as len=%d, idx=%d\n", i, len, occur);
2259 args[i].num_compares = len;
2260 args[i].occurs = occur;
2261 args[i].indexes = indices;
2264 /* Sort elements based on rankings ARGS. */
2265 args.stablesort (cmp_arg_entry, NULL);
2267 /* Handle one special case when number of arguments with different values
2268 is equal 2 and one argument has the only occurrence. Such PHI can be
2269 handled as if would have only 2 arguments. */
2270 if (args.length () == 2
2271 && args[0].indexes->length () == 1)
2273 index0 = (*args[0].indexes)[0];
2274 arg0 = args[0].arg;
2275 arg1 = args[1].arg;
2276 e = gimple_phi_arg_edge (phi, index0);
2277 cond = bb_predicate (e->src);
2278 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2280 swap = true;
2281 cond = TREE_OPERAND (cond, 0);
2283 /* Gimplify the condition to a valid cond-expr conditonal operand. */
2284 cond = force_gimple_operand_gsi (gsi, unshare_expr (cond), true,
2285 NULL_TREE, true, GSI_SAME_STMT);
2286 if (!(is_cond_scalar_reduction (phi, &reduc, arg0 , arg1,
2287 &op0, &op1, true, &has_nop, &nop_reduc)))
2288 rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
2289 swap ? arg1 : arg0,
2290 swap ? arg0 : arg1);
2291 else
2293 /* Convert reduction stmt into vectorizable form. */
2294 rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
2295 swap,has_nop, nop_reduc);
2296 redundant_ssa_names.safe_push (std::make_pair (res, rhs));
2298 new_stmt = gimple_build_assign (res, rhs);
2299 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2300 update_stmt (new_stmt);
2302 else
2304 /* Common case. */
2305 tree type = TREE_TYPE (gimple_phi_result (phi));
2306 gen_phi_nest_statement (phi, gsi, cond_set, type, &new_stmt, res,
2307 args, 1);
2310 if (dump_file && (dump_flags & TDF_DETAILS))
2312 fprintf (dump_file, "new extended phi replacement stmt\n");
2313 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
2317 /* Replaces in LOOP all the scalar phi nodes other than those in the
2318 LOOP->header block with conditional modify expressions. */
2320 static void
2321 predicate_all_scalar_phis (class loop *loop)
2323 basic_block bb;
2324 unsigned int orig_loop_num_nodes = loop->num_nodes;
2325 unsigned int i;
2327 for (i = 1; i < orig_loop_num_nodes; i++)
2329 gphi *phi;
2330 gimple_stmt_iterator gsi;
2331 gphi_iterator phi_gsi;
2332 bb = ifc_bbs[i];
2334 if (bb == loop->header)
2335 continue;
2337 phi_gsi = gsi_start_phis (bb);
2338 if (gsi_end_p (phi_gsi))
2339 continue;
2341 gsi = gsi_after_labels (bb);
2342 while (!gsi_end_p (phi_gsi))
2344 phi = phi_gsi.phi ();
2345 if (virtual_operand_p (gimple_phi_result (phi)))
2346 gsi_next (&phi_gsi);
2347 else
2349 predicate_scalar_phi (phi, &gsi);
2350 remove_phi_node (&phi_gsi, false);
2356 /* Insert in each basic block of LOOP the statements produced by the
2357 gimplification of the predicates. */
2359 static void
2360 insert_gimplified_predicates (loop_p loop)
2362 unsigned int i;
2364 for (i = 0; i < loop->num_nodes; i++)
2366 basic_block bb = ifc_bbs[i];
2367 gimple_seq stmts;
2368 if (!is_predicated (bb))
2369 gcc_assert (bb_predicate_gimplified_stmts (bb) == NULL);
2370 if (!is_predicated (bb))
2372 /* Do not insert statements for a basic block that is not
2373 predicated. Also make sure that the predicate of the
2374 basic block is set to true. */
2375 reset_bb_predicate (bb);
2376 continue;
2379 stmts = bb_predicate_gimplified_stmts (bb);
2380 if (stmts)
2382 if (need_to_predicate)
2384 /* Insert the predicate of the BB just after the label,
2385 as the if-conversion of memory writes will use this
2386 predicate. */
2387 gimple_stmt_iterator gsi = gsi_after_labels (bb);
2388 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2390 else
2392 /* Insert the predicate of the BB at the end of the BB
2393 as this would reduce the register pressure: the only
2394 use of this predicate will be in successor BBs. */
2395 gimple_stmt_iterator gsi = gsi_last_bb (bb);
2397 if (gsi_end_p (gsi)
2398 || stmt_ends_bb_p (gsi_stmt (gsi)))
2399 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2400 else
2401 gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
2404 /* Once the sequence is code generated, set it to NULL. */
2405 set_bb_predicate_gimplified_stmts (bb, NULL, true);
2410 /* Helper function for predicate_statements. Returns index of existent
2411 mask if it was created for given SIZE and -1 otherwise. */
2413 static int
2414 mask_exists (int size, const vec<int> &vec)
2416 unsigned int ix;
2417 int v;
2418 FOR_EACH_VEC_ELT (vec, ix, v)
2419 if (v == size)
2420 return (int) ix;
2421 return -1;
2424 /* Helper function for predicate_statements. STMT is a memory read or
2425 write and it needs to be predicated by MASK. Return a statement
2426 that does so. */
2428 static gimple *
2429 predicate_load_or_store (gimple_stmt_iterator *gsi, gassign *stmt, tree mask)
2431 gcall *new_stmt;
2433 tree lhs = gimple_assign_lhs (stmt);
2434 tree rhs = gimple_assign_rhs1 (stmt);
2435 tree ref = TREE_CODE (lhs) == SSA_NAME ? rhs : lhs;
2436 mark_addressable (ref);
2437 tree addr = force_gimple_operand_gsi (gsi, build_fold_addr_expr (ref),
2438 true, NULL_TREE, true, GSI_SAME_STMT);
2439 tree ptr = build_int_cst (reference_alias_ptr_type (ref),
2440 get_object_alignment (ref));
2441 /* Copy points-to info if possible. */
2442 if (TREE_CODE (addr) == SSA_NAME && !SSA_NAME_PTR_INFO (addr))
2443 copy_ref_info (build2 (MEM_REF, TREE_TYPE (ref), addr, ptr),
2444 ref);
2445 if (TREE_CODE (lhs) == SSA_NAME)
2447 new_stmt
2448 = gimple_build_call_internal (IFN_MASK_LOAD, 3, addr,
2449 ptr, mask);
2450 gimple_call_set_lhs (new_stmt, lhs);
2451 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
2453 else
2455 new_stmt
2456 = gimple_build_call_internal (IFN_MASK_STORE, 4, addr, ptr,
2457 mask, rhs);
2458 gimple_move_vops (new_stmt, stmt);
2460 gimple_call_set_nothrow (new_stmt, true);
2461 return new_stmt;
2464 /* STMT uses OP_LHS. Check whether it is equivalent to:
2466 ... = OP_MASK ? OP_LHS : X;
2468 Return X if so, otherwise return null. OP_MASK is an SSA_NAME that is
2469 known to have value OP_COND. */
2471 static tree
2472 check_redundant_cond_expr (gimple *stmt, tree op_mask, tree op_cond,
2473 tree op_lhs)
2475 gassign *assign = dyn_cast <gassign *> (stmt);
2476 if (!assign || gimple_assign_rhs_code (assign) != COND_EXPR)
2477 return NULL_TREE;
2479 tree use_cond = gimple_assign_rhs1 (assign);
2480 tree if_true = gimple_assign_rhs2 (assign);
2481 tree if_false = gimple_assign_rhs3 (assign);
2483 if ((use_cond == op_mask || operand_equal_p (use_cond, op_cond, 0))
2484 && if_true == op_lhs)
2485 return if_false;
2487 if (inverse_conditions_p (use_cond, op_cond) && if_false == op_lhs)
2488 return if_true;
2490 return NULL_TREE;
2493 /* Return true if VALUE is available for use at STMT. SSA_NAMES is
2494 the set of SSA names defined earlier in STMT's block. */
2496 static bool
2497 value_available_p (gimple *stmt, hash_set<tree_ssa_name_hash> *ssa_names,
2498 tree value)
2500 if (is_gimple_min_invariant (value))
2501 return true;
2503 if (TREE_CODE (value) == SSA_NAME)
2505 if (SSA_NAME_IS_DEFAULT_DEF (value))
2506 return true;
2508 basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (value));
2509 basic_block use_bb = gimple_bb (stmt);
2510 return (def_bb == use_bb
2511 ? ssa_names->contains (value)
2512 : dominated_by_p (CDI_DOMINATORS, use_bb, def_bb));
2515 return false;
2518 /* Helper function for predicate_statements. STMT is a potentially-trapping
2519 arithmetic operation that needs to be predicated by MASK, an SSA_NAME that
2520 has value COND. Return a statement that does so. SSA_NAMES is the set of
2521 SSA names defined earlier in STMT's block. */
2523 static gimple *
2524 predicate_rhs_code (gassign *stmt, tree mask, tree cond,
2525 hash_set<tree_ssa_name_hash> *ssa_names)
2527 tree lhs = gimple_assign_lhs (stmt);
2528 tree_code code = gimple_assign_rhs_code (stmt);
2529 unsigned int nops = gimple_num_ops (stmt);
2530 internal_fn cond_fn = get_conditional_internal_fn (code);
2532 /* Construct the arguments to the conditional internal function. */
2533 auto_vec<tree, 8> args;
2534 args.safe_grow (nops + 1, true);
2535 args[0] = mask;
2536 for (unsigned int i = 1; i < nops; ++i)
2537 args[i] = gimple_op (stmt, i);
2538 args[nops] = NULL_TREE;
2540 /* Look for uses of the result to see whether they are COND_EXPRs that can
2541 be folded into the conditional call. */
2542 imm_use_iterator imm_iter;
2543 gimple *use_stmt;
2544 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs)
2546 tree new_else = check_redundant_cond_expr (use_stmt, mask, cond, lhs);
2547 if (new_else && value_available_p (stmt, ssa_names, new_else))
2549 if (!args[nops])
2550 args[nops] = new_else;
2551 if (operand_equal_p (new_else, args[nops], 0))
2553 /* We have:
2555 LHS = IFN_COND (MASK, ..., ELSE);
2556 X = MASK ? LHS : ELSE;
2558 which makes X equivalent to LHS. */
2559 tree use_lhs = gimple_assign_lhs (use_stmt);
2560 redundant_ssa_names.safe_push (std::make_pair (use_lhs, lhs));
2564 if (!args[nops])
2565 args[nops] = targetm.preferred_else_value (cond_fn, TREE_TYPE (lhs),
2566 nops - 1, &args[1]);
2568 /* Create and insert the call. */
2569 gcall *new_stmt = gimple_build_call_internal_vec (cond_fn, args);
2570 gimple_call_set_lhs (new_stmt, lhs);
2571 gimple_call_set_nothrow (new_stmt, true);
2573 return new_stmt;
2576 /* Predicate each write to memory in LOOP.
2578 This function transforms control flow constructs containing memory
2579 writes of the form:
2581 | for (i = 0; i < N; i++)
2582 | if (cond)
2583 | A[i] = expr;
2585 into the following form that does not contain control flow:
2587 | for (i = 0; i < N; i++)
2588 | A[i] = cond ? expr : A[i];
2590 The original CFG looks like this:
2592 | bb_0
2593 | i = 0
2594 | end_bb_0
2596 | bb_1
2597 | if (i < N) goto bb_5 else goto bb_2
2598 | end_bb_1
2600 | bb_2
2601 | cond = some_computation;
2602 | if (cond) goto bb_3 else goto bb_4
2603 | end_bb_2
2605 | bb_3
2606 | A[i] = expr;
2607 | goto bb_4
2608 | end_bb_3
2610 | bb_4
2611 | goto bb_1
2612 | end_bb_4
2614 insert_gimplified_predicates inserts the computation of the COND
2615 expression at the beginning of the destination basic block:
2617 | bb_0
2618 | i = 0
2619 | end_bb_0
2621 | bb_1
2622 | if (i < N) goto bb_5 else goto bb_2
2623 | end_bb_1
2625 | bb_2
2626 | cond = some_computation;
2627 | if (cond) goto bb_3 else goto bb_4
2628 | end_bb_2
2630 | bb_3
2631 | cond = some_computation;
2632 | A[i] = expr;
2633 | goto bb_4
2634 | end_bb_3
2636 | bb_4
2637 | goto bb_1
2638 | end_bb_4
2640 predicate_statements is then predicating the memory write as follows:
2642 | bb_0
2643 | i = 0
2644 | end_bb_0
2646 | bb_1
2647 | if (i < N) goto bb_5 else goto bb_2
2648 | end_bb_1
2650 | bb_2
2651 | if (cond) goto bb_3 else goto bb_4
2652 | end_bb_2
2654 | bb_3
2655 | cond = some_computation;
2656 | A[i] = cond ? expr : A[i];
2657 | goto bb_4
2658 | end_bb_3
2660 | bb_4
2661 | goto bb_1
2662 | end_bb_4
2664 and finally combine_blocks removes the basic block boundaries making
2665 the loop vectorizable:
2667 | bb_0
2668 | i = 0
2669 | if (i < N) goto bb_5 else goto bb_1
2670 | end_bb_0
2672 | bb_1
2673 | cond = some_computation;
2674 | A[i] = cond ? expr : A[i];
2675 | if (i < N) goto bb_5 else goto bb_4
2676 | end_bb_1
2678 | bb_4
2679 | goto bb_1
2680 | end_bb_4
2683 static void
2684 predicate_statements (loop_p loop)
2686 unsigned int i, orig_loop_num_nodes = loop->num_nodes;
2687 auto_vec<int, 1> vect_sizes;
2688 auto_vec<tree, 1> vect_masks;
2689 hash_set<tree_ssa_name_hash> ssa_names;
2691 for (i = 1; i < orig_loop_num_nodes; i++)
2693 gimple_stmt_iterator gsi;
2694 basic_block bb = ifc_bbs[i];
2695 tree cond = bb_predicate (bb);
2696 bool swap;
2697 int index;
2699 if (is_true_predicate (cond))
2700 continue;
2702 swap = false;
2703 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2705 swap = true;
2706 cond = TREE_OPERAND (cond, 0);
2709 vect_sizes.truncate (0);
2710 vect_masks.truncate (0);
2712 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
2714 gassign *stmt = dyn_cast <gassign *> (gsi_stmt (gsi));
2715 tree lhs;
2716 if (!stmt)
2718 else if (is_false_predicate (cond)
2719 && gimple_vdef (stmt))
2721 unlink_stmt_vdef (stmt);
2722 gsi_remove (&gsi, true);
2723 release_defs (stmt);
2724 continue;
2726 else if (gimple_plf (stmt, GF_PLF_2)
2727 && is_gimple_assign (stmt))
2729 tree lhs = gimple_assign_lhs (stmt);
2730 tree mask;
2731 gimple *new_stmt;
2732 gimple_seq stmts = NULL;
2733 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
2734 /* We checked before setting GF_PLF_2 that an equivalent
2735 integer mode exists. */
2736 int bitsize = GET_MODE_BITSIZE (mode).to_constant ();
2737 if (!vect_sizes.is_empty ()
2738 && (index = mask_exists (bitsize, vect_sizes)) != -1)
2739 /* Use created mask. */
2740 mask = vect_masks[index];
2741 else
2743 if (COMPARISON_CLASS_P (cond))
2744 mask = gimple_build (&stmts, TREE_CODE (cond),
2745 boolean_type_node,
2746 TREE_OPERAND (cond, 0),
2747 TREE_OPERAND (cond, 1));
2748 else
2749 mask = cond;
2751 if (swap)
2753 tree true_val
2754 = constant_boolean_node (true, TREE_TYPE (mask));
2755 mask = gimple_build (&stmts, BIT_XOR_EXPR,
2756 TREE_TYPE (mask), mask, true_val);
2758 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2760 /* Save mask and its size for further use. */
2761 vect_sizes.safe_push (bitsize);
2762 vect_masks.safe_push (mask);
2764 if (gimple_assign_single_p (stmt))
2765 new_stmt = predicate_load_or_store (&gsi, stmt, mask);
2766 else
2767 new_stmt = predicate_rhs_code (stmt, mask, cond, &ssa_names);
2769 gsi_replace (&gsi, new_stmt, true);
2771 else if (((lhs = gimple_assign_lhs (stmt)), true)
2772 && (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2773 || POINTER_TYPE_P (TREE_TYPE (lhs)))
2774 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs))
2775 && arith_code_with_undefined_signed_overflow
2776 (gimple_assign_rhs_code (stmt)))
2778 gsi_remove (&gsi, true);
2779 gimple_seq stmts = rewrite_to_defined_overflow (stmt);
2780 bool first = true;
2781 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
2782 !gsi_end_p (gsi2);)
2784 gassign *stmt2 = as_a <gassign *> (gsi_stmt (gsi2));
2785 gsi_remove (&gsi2, false);
2786 if (first)
2788 gsi_insert_before (&gsi, stmt2, GSI_NEW_STMT);
2789 first = false;
2791 else
2792 gsi_insert_after (&gsi, stmt2, GSI_NEW_STMT);
2795 else if (gimple_vdef (stmt))
2797 tree lhs = gimple_assign_lhs (stmt);
2798 tree rhs = gimple_assign_rhs1 (stmt);
2799 tree type = TREE_TYPE (lhs);
2801 lhs = ifc_temp_var (type, unshare_expr (lhs), &gsi);
2802 rhs = ifc_temp_var (type, unshare_expr (rhs), &gsi);
2803 if (swap)
2804 std::swap (lhs, rhs);
2805 cond = force_gimple_operand_gsi (&gsi, unshare_expr (cond), true,
2806 NULL_TREE, true, GSI_SAME_STMT);
2807 rhs = fold_build_cond_expr (type, unshare_expr (cond), rhs, lhs);
2808 gimple_assign_set_rhs1 (stmt, ifc_temp_var (type, rhs, &gsi));
2809 update_stmt (stmt);
2812 if (gimple_plf (gsi_stmt (gsi), GF_PLF_2)
2813 && is_gimple_call (gsi_stmt (gsi)))
2815 /* Convert functions that have a SIMD clone to IFN_MASK_CALL.
2816 This will cause the vectorizer to match the "in branch"
2817 clone variants, and serves to build the mask vector
2818 in a natural way. */
2819 gcall *call = dyn_cast <gcall *> (gsi_stmt (gsi));
2820 tree orig_fn = gimple_call_fn (call);
2821 int orig_nargs = gimple_call_num_args (call);
2822 auto_vec<tree> args;
2823 args.safe_push (orig_fn);
2824 for (int i = 0; i < orig_nargs; i++)
2825 args.safe_push (gimple_call_arg (call, i));
2826 args.safe_push (cond);
2828 /* Replace the call with a IFN_MASK_CALL that has the extra
2829 condition parameter. */
2830 gcall *new_call = gimple_build_call_internal_vec (IFN_MASK_CALL,
2831 args);
2832 gimple_call_set_lhs (new_call, gimple_call_lhs (call));
2833 gsi_replace (&gsi, new_call, true);
2836 lhs = gimple_get_lhs (gsi_stmt (gsi));
2837 if (lhs && TREE_CODE (lhs) == SSA_NAME)
2838 ssa_names.add (lhs);
2839 gsi_next (&gsi);
2841 ssa_names.empty ();
2845 /* Remove all GIMPLE_CONDs and GIMPLE_LABELs of all the basic blocks
2846 other than the exit and latch of the LOOP. Also resets the
2847 GIMPLE_DEBUG information. */
2849 static void
2850 remove_conditions_and_labels (loop_p loop)
2852 gimple_stmt_iterator gsi;
2853 unsigned int i;
2855 for (i = 0; i < loop->num_nodes; i++)
2857 basic_block bb = ifc_bbs[i];
2859 if (bb_with_exit_edge_p (loop, bb)
2860 || bb == loop->latch)
2861 continue;
2863 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
2864 switch (gimple_code (gsi_stmt (gsi)))
2866 case GIMPLE_COND:
2867 case GIMPLE_LABEL:
2868 gsi_remove (&gsi, true);
2869 break;
2871 case GIMPLE_DEBUG:
2872 /* ??? Should there be conditional GIMPLE_DEBUG_BINDs? */
2873 if (gimple_debug_bind_p (gsi_stmt (gsi)))
2875 gimple_debug_bind_reset_value (gsi_stmt (gsi));
2876 update_stmt (gsi_stmt (gsi));
2878 gsi_next (&gsi);
2879 break;
2881 default:
2882 gsi_next (&gsi);
2887 /* Combine all the basic blocks from LOOP into one or two super basic
2888 blocks. Replace PHI nodes with conditional modify expressions. */
2890 static void
2891 combine_blocks (class loop *loop)
2893 basic_block bb, exit_bb, merge_target_bb;
2894 unsigned int orig_loop_num_nodes = loop->num_nodes;
2895 unsigned int i;
2896 edge e;
2897 edge_iterator ei;
2899 remove_conditions_and_labels (loop);
2900 insert_gimplified_predicates (loop);
2901 predicate_all_scalar_phis (loop);
2903 if (need_to_predicate || need_to_rewrite_undefined)
2904 predicate_statements (loop);
2906 /* Merge basic blocks. */
2907 exit_bb = NULL;
2908 bool *predicated = XNEWVEC (bool, orig_loop_num_nodes);
2909 for (i = 0; i < orig_loop_num_nodes; i++)
2911 bb = ifc_bbs[i];
2912 predicated[i] = !is_true_predicate (bb_predicate (bb));
2913 free_bb_predicate (bb);
2914 if (bb_with_exit_edge_p (loop, bb))
2916 gcc_assert (exit_bb == NULL);
2917 exit_bb = bb;
2920 gcc_assert (exit_bb != loop->latch);
2922 merge_target_bb = loop->header;
2924 /* Get at the virtual def valid for uses starting at the first block
2925 we merge into the header. Without a virtual PHI the loop has the
2926 same virtual use on all stmts. */
2927 gphi *vphi = get_virtual_phi (loop->header);
2928 tree last_vdef = NULL_TREE;
2929 if (vphi)
2931 last_vdef = gimple_phi_result (vphi);
2932 for (gimple_stmt_iterator gsi = gsi_start_bb (loop->header);
2933 ! gsi_end_p (gsi); gsi_next (&gsi))
2934 if (gimple_vdef (gsi_stmt (gsi)))
2935 last_vdef = gimple_vdef (gsi_stmt (gsi));
2937 for (i = 1; i < orig_loop_num_nodes; i++)
2939 gimple_stmt_iterator gsi;
2940 gimple_stmt_iterator last;
2942 bb = ifc_bbs[i];
2944 if (bb == exit_bb || bb == loop->latch)
2945 continue;
2947 /* We release virtual PHIs late because we have to propagate them
2948 out using the current VUSE. The def might be the one used
2949 after the loop. */
2950 vphi = get_virtual_phi (bb);
2951 if (vphi)
2953 /* When there's just loads inside the loop a stray virtual
2954 PHI merging the uses can appear, update last_vdef from
2955 it. */
2956 if (!last_vdef)
2957 last_vdef = gimple_phi_arg_def (vphi, 0);
2958 imm_use_iterator iter;
2959 use_operand_p use_p;
2960 gimple *use_stmt;
2961 FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi))
2963 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2964 SET_USE (use_p, last_vdef);
2966 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (vphi)))
2967 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (last_vdef) = 1;
2968 gsi = gsi_for_stmt (vphi);
2969 remove_phi_node (&gsi, true);
2972 /* Make stmts member of loop->header and clear range info from all stmts
2973 in BB which is now no longer executed conditional on a predicate we
2974 could have derived it from. */
2975 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2977 gimple *stmt = gsi_stmt (gsi);
2978 gimple_set_bb (stmt, merge_target_bb);
2979 /* Update virtual operands. */
2980 if (last_vdef)
2982 use_operand_p use_p = ssa_vuse_operand (stmt);
2983 if (use_p
2984 && USE_FROM_PTR (use_p) != last_vdef)
2985 SET_USE (use_p, last_vdef);
2986 if (gimple_vdef (stmt))
2987 last_vdef = gimple_vdef (stmt);
2989 else
2990 /* If this is the first load we arrive at update last_vdef
2991 so we handle stray PHIs correctly. */
2992 last_vdef = gimple_vuse (stmt);
2993 if (predicated[i])
2995 ssa_op_iter i;
2996 tree op;
2997 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF)
2998 reset_flow_sensitive_info (op);
3002 /* Update stmt list. */
3003 last = gsi_last_bb (merge_target_bb);
3004 gsi_insert_seq_after_without_update (&last, bb_seq (bb), GSI_NEW_STMT);
3005 set_bb_seq (bb, NULL);
3008 /* Fixup virtual operands in the exit block. */
3009 if (exit_bb
3010 && exit_bb != loop->header)
3012 /* We release virtual PHIs late because we have to propagate them
3013 out using the current VUSE. The def might be the one used
3014 after the loop. */
3015 vphi = get_virtual_phi (exit_bb);
3016 if (vphi)
3018 /* When there's just loads inside the loop a stray virtual
3019 PHI merging the uses can appear, update last_vdef from
3020 it. */
3021 if (!last_vdef)
3022 last_vdef = gimple_phi_arg_def (vphi, 0);
3023 imm_use_iterator iter;
3024 use_operand_p use_p;
3025 gimple *use_stmt;
3026 FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi))
3028 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3029 SET_USE (use_p, last_vdef);
3031 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (vphi)))
3032 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (last_vdef) = 1;
3033 gimple_stmt_iterator gsi = gsi_for_stmt (vphi);
3034 remove_phi_node (&gsi, true);
3038 /* Now remove all the edges in the loop, except for those from the exit
3039 block and delete the blocks we elided. */
3040 for (i = 1; i < orig_loop_num_nodes; i++)
3042 bb = ifc_bbs[i];
3044 for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));)
3046 if (e->src == exit_bb)
3047 ei_next (&ei);
3048 else
3049 remove_edge (e);
3052 for (i = 1; i < orig_loop_num_nodes; i++)
3054 bb = ifc_bbs[i];
3056 if (bb == exit_bb || bb == loop->latch)
3057 continue;
3059 delete_basic_block (bb);
3062 /* Re-connect the exit block. */
3063 if (exit_bb != NULL)
3065 if (exit_bb != loop->header)
3067 /* Connect this node to loop header. */
3068 make_single_succ_edge (loop->header, exit_bb, EDGE_FALLTHRU);
3069 set_immediate_dominator (CDI_DOMINATORS, exit_bb, loop->header);
3072 /* Redirect non-exit edges to loop->latch. */
3073 FOR_EACH_EDGE (e, ei, exit_bb->succs)
3075 if (!loop_exit_edge_p (loop, e))
3076 redirect_edge_and_branch (e, loop->latch);
3078 set_immediate_dominator (CDI_DOMINATORS, loop->latch, exit_bb);
3080 else
3082 /* If the loop does not have an exit, reconnect header and latch. */
3083 make_edge (loop->header, loop->latch, EDGE_FALLTHRU);
3084 set_immediate_dominator (CDI_DOMINATORS, loop->latch, loop->header);
3087 /* If possible, merge loop header to the block with the exit edge.
3088 This reduces the number of basic blocks to two, to please the
3089 vectorizer that handles only loops with two nodes. */
3090 if (exit_bb
3091 && exit_bb != loop->header)
3093 if (can_merge_blocks_p (loop->header, exit_bb))
3094 merge_blocks (loop->header, exit_bb);
3097 free (ifc_bbs);
3098 ifc_bbs = NULL;
3099 free (predicated);
3102 /* Version LOOP before if-converting it; the original loop
3103 will be if-converted, the new copy of the loop will not,
3104 and the LOOP_VECTORIZED internal call will be guarding which
3105 loop to execute. The vectorizer pass will fold this
3106 internal call into either true or false.
3108 Note that this function intentionally invalidates profile. Both edges
3109 out of LOOP_VECTORIZED must have 100% probability so the profile remains
3110 consistent after the condition is folded in the vectorizer. */
3112 static class loop *
3113 version_loop_for_if_conversion (class loop *loop, vec<gimple *> *preds)
3115 basic_block cond_bb;
3116 tree cond = make_ssa_name (boolean_type_node);
3117 class loop *new_loop;
3118 gimple *g;
3119 gimple_stmt_iterator gsi;
3120 unsigned int save_length = 0;
3122 g = gimple_build_call_internal (IFN_LOOP_VECTORIZED, 2,
3123 build_int_cst (integer_type_node, loop->num),
3124 integer_zero_node);
3125 gimple_call_set_lhs (g, cond);
3127 void **saved_preds = NULL;
3128 if (any_complicated_phi || need_to_predicate)
3130 /* Save BB->aux around loop_version as that uses the same field. */
3131 save_length = loop->inner ? loop->inner->num_nodes : loop->num_nodes;
3132 saved_preds = XALLOCAVEC (void *, save_length);
3133 for (unsigned i = 0; i < save_length; i++)
3134 saved_preds[i] = ifc_bbs[i]->aux;
3137 initialize_original_copy_tables ();
3138 /* At this point we invalidate porfile confistency until IFN_LOOP_VECTORIZED
3139 is re-merged in the vectorizer. */
3140 new_loop = loop_version (loop, cond, &cond_bb,
3141 profile_probability::always (),
3142 profile_probability::always (),
3143 profile_probability::always (),
3144 profile_probability::always (), true);
3145 free_original_copy_tables ();
3147 if (any_complicated_phi || need_to_predicate)
3148 for (unsigned i = 0; i < save_length; i++)
3149 ifc_bbs[i]->aux = saved_preds[i];
3151 if (new_loop == NULL)
3152 return NULL;
3154 new_loop->dont_vectorize = true;
3155 new_loop->force_vectorize = false;
3156 gsi = gsi_last_bb (cond_bb);
3157 gimple_call_set_arg (g, 1, build_int_cst (integer_type_node, new_loop->num));
3158 if (preds)
3159 preds->safe_push (g);
3160 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3161 update_ssa (TODO_update_ssa_no_phi);
3162 return new_loop;
3165 /* Return true when LOOP satisfies the follow conditions that will
3166 allow it to be recognized by the vectorizer for outer-loop
3167 vectorization:
3168 - The loop is not the root node of the loop tree.
3169 - The loop has exactly one inner loop.
3170 - The loop has a single exit.
3171 - The loop header has a single successor, which is the inner
3172 loop header.
3173 - Each of the inner and outer loop latches have a single
3174 predecessor.
3175 - The loop exit block has a single predecessor, which is the
3176 inner loop's exit block. */
3178 static bool
3179 versionable_outer_loop_p (class loop *loop)
3181 if (!loop_outer (loop)
3182 || loop->dont_vectorize
3183 || !loop->inner
3184 || loop->inner->next
3185 || !single_exit (loop)
3186 || !single_succ_p (loop->header)
3187 || single_succ (loop->header) != loop->inner->header
3188 || !single_pred_p (loop->latch)
3189 || !single_pred_p (loop->inner->latch))
3190 return false;
3192 basic_block outer_exit = single_pred (loop->latch);
3193 basic_block inner_exit = single_pred (loop->inner->latch);
3195 if (!single_pred_p (outer_exit) || single_pred (outer_exit) != inner_exit)
3196 return false;
3198 if (dump_file)
3199 fprintf (dump_file, "Found vectorizable outer loop for versioning\n");
3201 return true;
3204 /* Performs splitting of critical edges. Skip splitting and return false
3205 if LOOP will not be converted because:
3207 - LOOP is not well formed.
3208 - LOOP has PHI with more than MAX_PHI_ARG_NUM arguments.
3210 Last restriction is valid only if AGGRESSIVE_IF_CONV is false. */
3212 static bool
3213 ifcvt_split_critical_edges (class loop *loop, bool aggressive_if_conv)
3215 basic_block *body;
3216 basic_block bb;
3217 unsigned int num = loop->num_nodes;
3218 unsigned int i;
3219 edge e;
3220 edge_iterator ei;
3221 auto_vec<edge> critical_edges;
3223 /* Loop is not well formed. */
3224 if (loop->inner)
3225 return false;
3227 body = get_loop_body (loop);
3228 for (i = 0; i < num; i++)
3230 bb = body[i];
3231 if (!aggressive_if_conv
3232 && phi_nodes (bb)
3233 && EDGE_COUNT (bb->preds) > MAX_PHI_ARG_NUM)
3235 if (dump_file && (dump_flags & TDF_DETAILS))
3236 fprintf (dump_file,
3237 "BB %d has complicated PHI with more than %u args.\n",
3238 bb->index, MAX_PHI_ARG_NUM);
3240 free (body);
3241 return false;
3243 if (bb == loop->latch || bb_with_exit_edge_p (loop, bb))
3244 continue;
3246 /* Skip basic blocks not ending with conditional branch. */
3247 if (!safe_is_a <gcond *> (*gsi_last_bb (bb)))
3248 continue;
3250 FOR_EACH_EDGE (e, ei, bb->succs)
3251 if (EDGE_CRITICAL_P (e) && e->dest->loop_father == loop)
3252 critical_edges.safe_push (e);
3254 free (body);
3256 while (critical_edges.length () > 0)
3258 e = critical_edges.pop ();
3259 /* Don't split if bb can be predicated along non-critical edge. */
3260 if (EDGE_COUNT (e->dest->preds) > 2 || all_preds_critical_p (e->dest))
3261 split_edge (e);
3264 return true;
3267 /* Delete redundant statements produced by predication which prevents
3268 loop vectorization. */
3270 static void
3271 ifcvt_local_dce (class loop *loop)
3273 gimple *stmt;
3274 gimple *stmt1;
3275 gimple *phi;
3276 gimple_stmt_iterator gsi;
3277 auto_vec<gimple *> worklist;
3278 enum gimple_code code;
3279 use_operand_p use_p;
3280 imm_use_iterator imm_iter;
3282 /* The loop has a single BB only. */
3283 basic_block bb = loop->header;
3284 tree latch_vdef = NULL_TREE;
3286 worklist.create (64);
3287 /* Consider all phi as live statements. */
3288 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3290 phi = gsi_stmt (gsi);
3291 gimple_set_plf (phi, GF_PLF_2, true);
3292 worklist.safe_push (phi);
3293 if (virtual_operand_p (gimple_phi_result (phi)))
3294 latch_vdef = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
3296 /* Consider load/store statements, CALL and COND as live. */
3297 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3299 stmt = gsi_stmt (gsi);
3300 if (is_gimple_debug (stmt))
3302 gimple_set_plf (stmt, GF_PLF_2, true);
3303 continue;
3305 if (gimple_store_p (stmt) || gimple_assign_load_p (stmt))
3307 gimple_set_plf (stmt, GF_PLF_2, true);
3308 worklist.safe_push (stmt);
3309 continue;
3311 code = gimple_code (stmt);
3312 if (code == GIMPLE_COND || code == GIMPLE_CALL)
3314 gimple_set_plf (stmt, GF_PLF_2, true);
3315 worklist.safe_push (stmt);
3316 continue;
3318 gimple_set_plf (stmt, GF_PLF_2, false);
3320 if (code == GIMPLE_ASSIGN)
3322 tree lhs = gimple_assign_lhs (stmt);
3323 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
3325 stmt1 = USE_STMT (use_p);
3326 if (!is_gimple_debug (stmt1) && gimple_bb (stmt1) != bb)
3328 gimple_set_plf (stmt, GF_PLF_2, true);
3329 worklist.safe_push (stmt);
3330 break;
3335 /* Propagate liveness through arguments of live stmt. */
3336 while (worklist.length () > 0)
3338 ssa_op_iter iter;
3339 use_operand_p use_p;
3340 tree use;
3342 stmt = worklist.pop ();
3343 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
3345 use = USE_FROM_PTR (use_p);
3346 if (TREE_CODE (use) != SSA_NAME)
3347 continue;
3348 stmt1 = SSA_NAME_DEF_STMT (use);
3349 if (gimple_bb (stmt1) != bb || gimple_plf (stmt1, GF_PLF_2))
3350 continue;
3351 gimple_set_plf (stmt1, GF_PLF_2, true);
3352 worklist.safe_push (stmt1);
3355 /* Delete dead statements. */
3356 gsi = gsi_last_bb (bb);
3357 while (!gsi_end_p (gsi))
3359 gimple_stmt_iterator gsiprev = gsi;
3360 gsi_prev (&gsiprev);
3361 stmt = gsi_stmt (gsi);
3362 if (gimple_store_p (stmt) && gimple_vdef (stmt))
3364 tree lhs = gimple_get_lhs (stmt);
3365 ao_ref write;
3366 ao_ref_init (&write, lhs);
3368 if (dse_classify_store (&write, stmt, false, NULL, NULL, latch_vdef)
3369 == DSE_STORE_DEAD)
3370 delete_dead_or_redundant_assignment (&gsi, "dead");
3371 gsi = gsiprev;
3372 continue;
3375 if (gimple_plf (stmt, GF_PLF_2))
3377 gsi = gsiprev;
3378 continue;
3380 if (dump_file && (dump_flags & TDF_DETAILS))
3382 fprintf (dump_file, "Delete dead stmt in bb#%d\n", bb->index);
3383 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3385 gsi_remove (&gsi, true);
3386 release_defs (stmt);
3387 gsi = gsiprev;
3391 /* Return true if VALUE is already available on edge PE. */
3393 static bool
3394 ifcvt_available_on_edge_p (edge pe, tree value)
3396 if (is_gimple_min_invariant (value))
3397 return true;
3399 if (TREE_CODE (value) == SSA_NAME)
3401 basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (value));
3402 if (!def_bb || dominated_by_p (CDI_DOMINATORS, pe->dest, def_bb))
3403 return true;
3406 return false;
3409 /* Return true if STMT can be hoisted from if-converted loop LOOP to
3410 edge PE. */
3412 static bool
3413 ifcvt_can_hoist (class loop *loop, edge pe, gimple *stmt)
3415 if (auto *call = dyn_cast<gcall *> (stmt))
3417 if (gimple_call_internal_p (call)
3418 && internal_fn_mask_index (gimple_call_internal_fn (call)) >= 0)
3419 return false;
3421 else if (auto *assign = dyn_cast<gassign *> (stmt))
3423 if (gimple_assign_rhs_code (assign) == COND_EXPR)
3424 return false;
3426 else
3427 return false;
3429 if (gimple_has_side_effects (stmt)
3430 || gimple_could_trap_p (stmt)
3431 || stmt_could_throw_p (cfun, stmt)
3432 || gimple_vdef (stmt)
3433 || gimple_vuse (stmt))
3434 return false;
3436 int num_args = gimple_num_args (stmt);
3437 if (pe != loop_preheader_edge (loop))
3439 for (int i = 0; i < num_args; ++i)
3440 if (!ifcvt_available_on_edge_p (pe, gimple_arg (stmt, i)))
3441 return false;
3443 else
3445 for (int i = 0; i < num_args; ++i)
3446 if (!expr_invariant_in_loop_p (loop, gimple_arg (stmt, i)))
3447 return false;
3450 return true;
3453 /* Hoist invariant statements from LOOP to edge PE. */
3455 static void
3456 ifcvt_hoist_invariants (class loop *loop, edge pe)
3458 gimple_stmt_iterator hoist_gsi = {};
3459 unsigned int num_blocks = loop->num_nodes;
3460 basic_block *body = get_loop_body (loop);
3461 for (unsigned int i = 0; i < num_blocks; ++i)
3462 for (gimple_stmt_iterator gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi);)
3464 gimple *stmt = gsi_stmt (gsi);
3465 if (ifcvt_can_hoist (loop, pe, stmt))
3467 /* Once we've hoisted one statement, insert other statements
3468 after it. */
3469 gsi_remove (&gsi, false);
3470 if (hoist_gsi.ptr)
3471 gsi_insert_after (&hoist_gsi, stmt, GSI_NEW_STMT);
3472 else
3474 gsi_insert_on_edge_immediate (pe, stmt);
3475 hoist_gsi = gsi_for_stmt (stmt);
3477 continue;
3479 gsi_next (&gsi);
3481 free (body);
3484 /* Returns the DECL_FIELD_BIT_OFFSET of the bitfield accesse in stmt iff its
3485 type mode is not BLKmode. If BITPOS is not NULL it will hold the poly_int64
3486 value of the DECL_FIELD_BIT_OFFSET of the bitfield access and STRUCT_EXPR,
3487 if not NULL, will hold the tree representing the base struct of this
3488 bitfield. */
3490 static tree
3491 get_bitfield_rep (gassign *stmt, bool write, tree *bitpos,
3492 tree *struct_expr)
3494 tree comp_ref = write ? gimple_assign_lhs (stmt)
3495 : gimple_assign_rhs1 (stmt);
3497 tree field_decl = TREE_OPERAND (comp_ref, 1);
3498 tree ref_offset = component_ref_field_offset (comp_ref);
3499 tree rep_decl = DECL_BIT_FIELD_REPRESENTATIVE (field_decl);
3501 /* Bail out if the representative is not a suitable type for a scalar
3502 register variable. */
3503 if (!is_gimple_reg_type (TREE_TYPE (rep_decl)))
3504 return NULL_TREE;
3506 /* Bail out if the DECL_SIZE of the field_decl isn't the same as the BF's
3507 precision. */
3508 unsigned HOST_WIDE_INT bf_prec
3509 = TYPE_PRECISION (TREE_TYPE (gimple_assign_lhs (stmt)));
3510 if (compare_tree_int (DECL_SIZE (field_decl), bf_prec) != 0)
3511 return NULL_TREE;
3513 if (TREE_CODE (DECL_FIELD_OFFSET (rep_decl)) != INTEGER_CST
3514 || TREE_CODE (ref_offset) != INTEGER_CST)
3516 if (dump_file && (dump_flags & TDF_DETAILS))
3517 fprintf (dump_file, "\t Bitfield NOT OK to lower,"
3518 " offset is non-constant.\n");
3519 return NULL_TREE;
3522 if (struct_expr)
3523 *struct_expr = TREE_OPERAND (comp_ref, 0);
3525 if (bitpos)
3527 /* To calculate the bitposition of the BITFIELD_REF we have to determine
3528 where our bitfield starts in relation to the container REP_DECL. The
3529 DECL_FIELD_OFFSET of the original bitfield's member FIELD_DECL tells
3530 us how many bytes from the start of the structure there are until the
3531 start of the group of bitfield members the FIELD_DECL belongs to,
3532 whereas DECL_FIELD_BIT_OFFSET will tell us how many bits from that
3533 position our actual bitfield member starts. For the container
3534 REP_DECL adding DECL_FIELD_OFFSET and DECL_FIELD_BIT_OFFSET will tell
3535 us the distance between the start of the structure and the start of
3536 the container, though the first is in bytes and the later other in
3537 bits. With this in mind we calculate the bit position of our new
3538 BITFIELD_REF by subtracting the number of bits between the start of
3539 the structure and the container from the number of bits from the start
3540 of the structure and the actual bitfield member. */
3541 tree bf_pos = fold_build2 (MULT_EXPR, bitsizetype,
3542 ref_offset,
3543 build_int_cst (bitsizetype, BITS_PER_UNIT));
3544 bf_pos = fold_build2 (PLUS_EXPR, bitsizetype, bf_pos,
3545 DECL_FIELD_BIT_OFFSET (field_decl));
3546 tree rep_pos = fold_build2 (MULT_EXPR, bitsizetype,
3547 DECL_FIELD_OFFSET (rep_decl),
3548 build_int_cst (bitsizetype, BITS_PER_UNIT));
3549 rep_pos = fold_build2 (PLUS_EXPR, bitsizetype, rep_pos,
3550 DECL_FIELD_BIT_OFFSET (rep_decl));
3552 *bitpos = fold_build2 (MINUS_EXPR, bitsizetype, bf_pos, rep_pos);
3555 return rep_decl;
3559 /* Lowers the bitfield described by DATA.
3560 For a write like:
3562 struct.bf = _1;
3564 lower to:
3566 __ifc_1 = struct.<representative>;
3567 __ifc_2 = BIT_INSERT_EXPR (__ifc_1, _1, bitpos);
3568 struct.<representative> = __ifc_2;
3570 For a read:
3572 _1 = struct.bf;
3574 lower to:
3576 __ifc_1 = struct.<representative>;
3577 _1 = BIT_FIELD_REF (__ifc_1, bitsize, bitpos);
3579 where representative is a legal load that contains the bitfield value,
3580 bitsize is the size of the bitfield and bitpos the offset to the start of
3581 the bitfield within the representative. */
3583 static void
3584 lower_bitfield (gassign *stmt, bool write)
3586 tree struct_expr;
3587 tree bitpos;
3588 tree rep_decl = get_bitfield_rep (stmt, write, &bitpos, &struct_expr);
3589 tree rep_type = TREE_TYPE (rep_decl);
3590 tree bf_type = TREE_TYPE (gimple_assign_lhs (stmt));
3592 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3593 if (dump_file && (dump_flags & TDF_DETAILS))
3595 fprintf (dump_file, "Lowering:\n");
3596 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3597 fprintf (dump_file, "to:\n");
3600 /* REP_COMP_REF is a COMPONENT_REF for the representative. NEW_VAL is it's
3601 defining SSA_NAME. */
3602 tree rep_comp_ref = build3 (COMPONENT_REF, rep_type, struct_expr, rep_decl,
3603 NULL_TREE);
3604 tree new_val = ifc_temp_var (rep_type, rep_comp_ref, &gsi);
3606 if (dump_file && (dump_flags & TDF_DETAILS))
3607 print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (new_val), 0, TDF_SLIM);
3609 if (write)
3611 new_val = ifc_temp_var (rep_type,
3612 build3 (BIT_INSERT_EXPR, rep_type, new_val,
3613 unshare_expr (gimple_assign_rhs1 (stmt)),
3614 bitpos), &gsi);
3616 if (dump_file && (dump_flags & TDF_DETAILS))
3617 print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (new_val), 0, TDF_SLIM);
3619 gimple *new_stmt = gimple_build_assign (unshare_expr (rep_comp_ref),
3620 new_val);
3621 gimple_move_vops (new_stmt, stmt);
3622 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
3624 if (dump_file && (dump_flags & TDF_DETAILS))
3625 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
3627 else
3629 tree bfr = build3 (BIT_FIELD_REF, bf_type, new_val,
3630 build_int_cst (bitsizetype, TYPE_PRECISION (bf_type)),
3631 bitpos);
3632 new_val = ifc_temp_var (bf_type, bfr, &gsi);
3634 gimple *new_stmt = gimple_build_assign (gimple_assign_lhs (stmt),
3635 new_val);
3636 gimple_move_vops (new_stmt, stmt);
3637 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
3639 if (dump_file && (dump_flags & TDF_DETAILS))
3640 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
3643 gsi_remove (&gsi, true);
3646 /* Return TRUE if there are bitfields to lower in this LOOP. Fill TO_LOWER
3647 with data structures representing these bitfields. */
3649 static bool
3650 bitfields_to_lower_p (class loop *loop,
3651 vec <gassign *> &reads_to_lower,
3652 vec <gassign *> &writes_to_lower)
3654 gimple_stmt_iterator gsi;
3656 if (dump_file && (dump_flags & TDF_DETAILS))
3658 fprintf (dump_file, "Analyzing loop %d for bitfields:\n", loop->num);
3661 for (unsigned i = 0; i < loop->num_nodes; ++i)
3663 basic_block bb = ifc_bbs[i];
3664 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3666 gassign *stmt = dyn_cast<gassign*> (gsi_stmt (gsi));
3667 if (!stmt)
3668 continue;
3670 tree op = gimple_assign_lhs (stmt);
3671 bool write = TREE_CODE (op) == COMPONENT_REF;
3673 if (!write)
3674 op = gimple_assign_rhs1 (stmt);
3676 if (TREE_CODE (op) != COMPONENT_REF)
3677 continue;
3679 if (DECL_BIT_FIELD_TYPE (TREE_OPERAND (op, 1)))
3681 if (dump_file && (dump_flags & TDF_DETAILS))
3682 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3684 if (!INTEGRAL_TYPE_P (TREE_TYPE (op)))
3686 if (dump_file && (dump_flags & TDF_DETAILS))
3687 fprintf (dump_file, "\t Bitfield NO OK to lower,"
3688 " field type is not Integral.\n");
3689 return false;
3692 if (!get_bitfield_rep (stmt, write, NULL, NULL))
3694 if (dump_file && (dump_flags & TDF_DETAILS))
3695 fprintf (dump_file, "\t Bitfield NOT OK to lower,"
3696 " representative is BLKmode.\n");
3697 return false;
3700 if (dump_file && (dump_flags & TDF_DETAILS))
3701 fprintf (dump_file, "\tBitfield OK to lower.\n");
3702 if (write)
3703 writes_to_lower.safe_push (stmt);
3704 else
3705 reads_to_lower.safe_push (stmt);
3709 return !reads_to_lower.is_empty () || !writes_to_lower.is_empty ();
3713 /* If-convert LOOP when it is legal. For the moment this pass has no
3714 profitability analysis. Returns non-zero todo flags when something
3715 changed. */
3717 unsigned int
3718 tree_if_conversion (class loop *loop, vec<gimple *> *preds)
3720 unsigned int todo = 0;
3721 bool aggressive_if_conv;
3722 class loop *rloop;
3723 auto_vec <gassign *, 4> reads_to_lower;
3724 auto_vec <gassign *, 4> writes_to_lower;
3725 bitmap exit_bbs;
3726 edge pe;
3727 auto_vec<data_reference_p, 10> refs;
3729 again:
3730 rloop = NULL;
3731 ifc_bbs = NULL;
3732 need_to_lower_bitfields = false;
3733 need_to_ifcvt = false;
3734 need_to_predicate = false;
3735 need_to_rewrite_undefined = false;
3736 any_complicated_phi = false;
3738 /* Apply more aggressive if-conversion when loop or its outer loop were
3739 marked with simd pragma. When that's the case, we try to if-convert
3740 loop containing PHIs with more than MAX_PHI_ARG_NUM arguments. */
3741 aggressive_if_conv = loop->force_vectorize;
3742 if (!aggressive_if_conv)
3744 class loop *outer_loop = loop_outer (loop);
3745 if (outer_loop && outer_loop->force_vectorize)
3746 aggressive_if_conv = true;
3749 /* If there are more than two BBs in the loop then there is at least one if
3750 to convert. */
3751 if (loop->num_nodes > 2
3752 && !ifcvt_split_critical_edges (loop, aggressive_if_conv))
3753 goto cleanup;
3755 ifc_bbs = get_loop_body_in_if_conv_order (loop);
3756 if (!ifc_bbs)
3758 if (dump_file && (dump_flags & TDF_DETAILS))
3759 fprintf (dump_file, "Irreducible loop\n");
3760 goto cleanup;
3763 if (find_data_references_in_loop (loop, &refs) == chrec_dont_know)
3764 goto cleanup;
3766 if (loop->num_nodes > 2)
3768 /* More than one loop exit is too much to handle. */
3769 if (!single_exit (loop))
3771 if (dump_file && (dump_flags & TDF_DETAILS))
3772 fprintf (dump_file, "Can not ifcvt due to multiple exits\n");
3774 else
3776 need_to_ifcvt = true;
3778 if (!if_convertible_loop_p (loop, &refs)
3779 || !dbg_cnt (if_conversion_tree))
3780 goto cleanup;
3782 if ((need_to_predicate || any_complicated_phi)
3783 && ((!flag_tree_loop_vectorize && !loop->force_vectorize)
3784 || loop->dont_vectorize))
3785 goto cleanup;
3789 if ((flag_tree_loop_vectorize || loop->force_vectorize)
3790 && !loop->dont_vectorize)
3791 need_to_lower_bitfields = bitfields_to_lower_p (loop, reads_to_lower,
3792 writes_to_lower);
3794 if (!need_to_ifcvt && !need_to_lower_bitfields)
3795 goto cleanup;
3797 /* The edge to insert invariant stmts on. */
3798 pe = loop_preheader_edge (loop);
3800 /* Since we have no cost model, always version loops unless the user
3801 specified -ftree-loop-if-convert or unless versioning is required.
3802 Either version this loop, or if the pattern is right for outer-loop
3803 vectorization, version the outer loop. In the latter case we will
3804 still if-convert the original inner loop. */
3805 if (need_to_lower_bitfields
3806 || need_to_predicate
3807 || any_complicated_phi
3808 || flag_tree_loop_if_convert != 1)
3810 class loop *vloop
3811 = (versionable_outer_loop_p (loop_outer (loop))
3812 ? loop_outer (loop) : loop);
3813 class loop *nloop = version_loop_for_if_conversion (vloop, preds);
3814 if (nloop == NULL)
3815 goto cleanup;
3816 if (vloop != loop)
3818 /* If versionable_outer_loop_p decided to version the
3819 outer loop, version also the inner loop of the non-vectorized
3820 loop copy. So we transform:
3821 loop1
3822 loop2
3823 into:
3824 if (LOOP_VECTORIZED (1, 3))
3826 loop1
3827 loop2
3829 else
3830 loop3 (copy of loop1)
3831 if (LOOP_VECTORIZED (4, 5))
3832 loop4 (copy of loop2)
3833 else
3834 loop5 (copy of loop4) */
3835 gcc_assert (nloop->inner && nloop->inner->next == NULL);
3836 rloop = nloop->inner;
3838 else
3839 /* If we versioned loop then make sure to insert invariant
3840 stmts before the .LOOP_VECTORIZED check since the vectorizer
3841 will re-use that for things like runtime alias versioning
3842 whose condition can end up using those invariants. */
3843 pe = single_pred_edge (gimple_bb (preds->last ()));
3846 if (need_to_lower_bitfields)
3848 if (dump_file && (dump_flags & TDF_DETAILS))
3850 fprintf (dump_file, "-------------------------\n");
3851 fprintf (dump_file, "Start lowering bitfields\n");
3853 while (!reads_to_lower.is_empty ())
3854 lower_bitfield (reads_to_lower.pop (), false);
3855 while (!writes_to_lower.is_empty ())
3856 lower_bitfield (writes_to_lower.pop (), true);
3858 if (dump_file && (dump_flags & TDF_DETAILS))
3860 fprintf (dump_file, "Done lowering bitfields\n");
3861 fprintf (dump_file, "-------------------------\n");
3864 if (need_to_ifcvt)
3866 /* Before we rewrite edges we'll record their original position in the
3867 edge map such that we can map the edges between the ifcvt and the
3868 non-ifcvt loop during peeling. */
3869 uintptr_t idx = 0;
3870 for (edge exit : get_loop_exit_edges (loop))
3871 exit->aux = (void*)idx++;
3873 /* Now all statements are if-convertible. Combine all the basic
3874 blocks into one huge basic block doing the if-conversion
3875 on-the-fly. */
3876 combine_blocks (loop);
3879 /* Perform local CSE, this esp. helps the vectorizer analysis if loads
3880 and stores are involved. CSE only the loop body, not the entry
3881 PHIs, those are to be kept in sync with the non-if-converted copy.
3882 ??? We'll still keep dead stores though. */
3883 exit_bbs = BITMAP_ALLOC (NULL);
3884 for (edge exit : get_loop_exit_edges (loop))
3885 bitmap_set_bit (exit_bbs, exit->dest->index);
3886 bitmap_set_bit (exit_bbs, loop->latch->index);
3888 std::pair <tree, tree> *name_pair;
3889 unsigned ssa_names_idx;
3890 FOR_EACH_VEC_ELT (redundant_ssa_names, ssa_names_idx, name_pair)
3891 replace_uses_by (name_pair->first, name_pair->second);
3892 redundant_ssa_names.release ();
3894 todo |= do_rpo_vn (cfun, loop_preheader_edge (loop), exit_bbs);
3896 /* Delete dead predicate computations. */
3897 ifcvt_local_dce (loop);
3898 BITMAP_FREE (exit_bbs);
3900 ifcvt_hoist_invariants (loop, pe);
3902 todo |= TODO_cleanup_cfg;
3904 cleanup:
3905 data_reference_p dr;
3906 unsigned int i;
3907 for (i = 0; refs.iterate (i, &dr); i++)
3909 free (dr->aux);
3910 free_data_ref (dr);
3912 refs.truncate (0);
3914 if (ifc_bbs)
3916 unsigned int i;
3918 for (i = 0; i < loop->num_nodes; i++)
3919 free_bb_predicate (ifc_bbs[i]);
3921 free (ifc_bbs);
3922 ifc_bbs = NULL;
3924 if (rloop != NULL)
3926 loop = rloop;
3927 reads_to_lower.truncate (0);
3928 writes_to_lower.truncate (0);
3929 goto again;
3932 return todo;
3935 /* Tree if-conversion pass management. */
3937 namespace {
3939 const pass_data pass_data_if_conversion =
3941 GIMPLE_PASS, /* type */
3942 "ifcvt", /* name */
3943 OPTGROUP_NONE, /* optinfo_flags */
3944 TV_TREE_LOOP_IFCVT, /* tv_id */
3945 ( PROP_cfg | PROP_ssa ), /* properties_required */
3946 0, /* properties_provided */
3947 0, /* properties_destroyed */
3948 0, /* todo_flags_start */
3949 0, /* todo_flags_finish */
3952 class pass_if_conversion : public gimple_opt_pass
3954 public:
3955 pass_if_conversion (gcc::context *ctxt)
3956 : gimple_opt_pass (pass_data_if_conversion, ctxt)
3959 /* opt_pass methods: */
3960 bool gate (function *) final override;
3961 unsigned int execute (function *) final override;
3963 }; // class pass_if_conversion
3965 bool
3966 pass_if_conversion::gate (function *fun)
3968 return (((flag_tree_loop_vectorize || fun->has_force_vectorize_loops)
3969 && flag_tree_loop_if_convert != 0)
3970 || flag_tree_loop_if_convert == 1);
3973 unsigned int
3974 pass_if_conversion::execute (function *fun)
3976 unsigned todo = 0;
3978 if (number_of_loops (fun) <= 1)
3979 return 0;
3981 auto_vec<gimple *> preds;
3982 for (auto loop : loops_list (cfun, 0))
3983 if (flag_tree_loop_if_convert == 1
3984 || ((flag_tree_loop_vectorize || loop->force_vectorize)
3985 && !loop->dont_vectorize))
3986 todo |= tree_if_conversion (loop, &preds);
3988 if (todo)
3990 free_numbers_of_iterations_estimates (fun);
3991 scev_reset ();
3994 if (flag_checking)
3996 basic_block bb;
3997 FOR_EACH_BB_FN (bb, fun)
3998 gcc_assert (!bb->aux);
4001 /* Perform IL update now, it might elide some loops. */
4002 if (todo & TODO_cleanup_cfg)
4004 cleanup_tree_cfg ();
4005 if (need_ssa_update_p (fun))
4006 todo |= TODO_update_ssa;
4008 if (todo & TODO_update_ssa_any)
4009 update_ssa (todo & TODO_update_ssa_any);
4011 /* If if-conversion elided the loop fall back to the original one. */
4012 for (unsigned i = 0; i < preds.length (); ++i)
4014 gimple *g = preds[i];
4015 if (!gimple_bb (g))
4016 continue;
4017 unsigned ifcvt_loop = tree_to_uhwi (gimple_call_arg (g, 0));
4018 unsigned orig_loop = tree_to_uhwi (gimple_call_arg (g, 1));
4019 if (!get_loop (fun, ifcvt_loop) || !get_loop (fun, orig_loop))
4021 if (dump_file)
4022 fprintf (dump_file, "If-converted loop vanished\n");
4023 fold_loop_internal_call (g, boolean_false_node);
4027 return 0;
4030 } // anon namespace
4032 gimple_opt_pass *
4033 make_pass_if_conversion (gcc::context *ctxt)
4035 return new pass_if_conversion (ctxt);