RISC-V: Support IMM for operand 1 of ussub pattern
[official-gcc.git] / gcc / tree-if-conv.cc
blob735e47763de1ee1f1276d5e2615d486f2ac717ca
1 /* If-conversion for vectorizer.
2 Copyright (C) 2004-2024 Free Software Foundation, Inc.
3 Contributed by Devang Patel <dpatel@apple.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass implements a tree level if-conversion of loops. Its
22 initial goal is to help the vectorizer to vectorize loops with
23 conditions.
25 A short description of if-conversion:
27 o Decide if a loop is if-convertible or not.
28 o Walk all loop basic blocks in breadth first order (BFS order).
29 o Remove conditional statements (at the end of basic block)
30 and propagate condition into destination basic blocks'
31 predicate list.
32 o Replace modify expression with conditional modify expression
33 using current basic block's condition.
34 o Merge all basic blocks
35 o Replace phi nodes with conditional modify expr
36 o Merge all basic blocks into header
38 Sample transformation:
40 INPUT
41 -----
43 # i_23 = PHI <0(0), i_18(10)>;
44 <L0>:;
45 j_15 = A[i_23];
46 if (j_15 > 41) goto <L1>; else goto <L17>;
48 <L17>:;
49 goto <bb 3> (<L3>);
51 <L1>:;
53 # iftmp.2_4 = PHI <0(8), 42(2)>;
54 <L3>:;
55 A[i_23] = iftmp.2_4;
56 i_18 = i_23 + 1;
57 if (i_18 <= 15) goto <L19>; else goto <L18>;
59 <L19>:;
60 goto <bb 1> (<L0>);
62 <L18>:;
64 OUTPUT
65 ------
67 # i_23 = PHI <0(0), i_18(10)>;
68 <L0>:;
69 j_15 = A[i_23];
71 <L3>:;
72 iftmp.2_4 = j_15 > 41 ? 42 : 0;
73 A[i_23] = iftmp.2_4;
74 i_18 = i_23 + 1;
75 if (i_18 <= 15) goto <L19>; else goto <L18>;
77 <L19>:;
78 goto <bb 1> (<L0>);
80 <L18>:;
83 #include "config.h"
84 #include "system.h"
85 #include "coretypes.h"
86 #include "backend.h"
87 #include "rtl.h"
88 #include "tree.h"
89 #include "gimple.h"
90 #include "cfghooks.h"
91 #include "tree-pass.h"
92 #include "ssa.h"
93 #include "expmed.h"
94 #include "expr.h"
95 #include "optabs-tree.h"
96 #include "gimple-pretty-print.h"
97 #include "alias.h"
98 #include "fold-const.h"
99 #include "stor-layout.h"
100 #include "gimple-iterator.h"
101 #include "gimple-fold.h"
102 #include "gimplify.h"
103 #include "gimplify-me.h"
104 #include "tree-cfg.h"
105 #include "tree-into-ssa.h"
106 #include "tree-ssa.h"
107 #include "cfgloop.h"
108 #include "tree-data-ref.h"
109 #include "tree-scalar-evolution.h"
110 #include "tree-ssa-loop.h"
111 #include "tree-ssa-loop-niter.h"
112 #include "tree-ssa-loop-ivopts.h"
113 #include "tree-ssa-address.h"
114 #include "dbgcnt.h"
115 #include "tree-hash-traits.h"
116 #include "varasm.h"
117 #include "builtins.h"
118 #include "cfganal.h"
119 #include "internal-fn.h"
120 #include "fold-const.h"
121 #include "tree-ssa-sccvn.h"
122 #include "tree-cfgcleanup.h"
123 #include "tree-ssa-dse.h"
124 #include "tree-vectorizer.h"
125 #include "tree-eh.h"
126 #include "cgraph.h"
128 /* For lang_hooks.types.type_for_mode. */
129 #include "langhooks.h"
131 /* Only handle PHIs with no more arguments unless we are asked to by
132 simd pragma. */
133 #define MAX_PHI_ARG_NUM \
134 ((unsigned) param_max_tree_if_conversion_phi_args)
136 /* True if we've converted a statement that was only executed when some
137 condition C was true, and if for correctness we need to predicate the
138 statement to ensure that it is a no-op when C is false. See
139 predicate_statements for the kinds of predication we support. */
140 static bool need_to_predicate;
142 /* True if we have to rewrite stmts that may invoke undefined behavior
143 when a condition C was false so it doesn't if it is always executed.
144 See predicate_statements for the kinds of predication we support. */
145 static bool need_to_rewrite_undefined;
147 /* Indicate if there are any complicated PHIs that need to be handled in
148 if-conversion. Complicated PHI has more than two arguments and can't
149 be degenerated to two arguments PHI. See more information in comment
150 before phi_convertible_by_degenerating_args. */
151 static bool any_complicated_phi;
153 /* True if we have bitfield accesses we can lower. */
154 static bool need_to_lower_bitfields;
156 /* True if there is any ifcvting to be done. */
157 static bool need_to_ifcvt;
159 /* Hash for struct innermost_loop_behavior. It depends on the user to
160 free the memory. */
162 struct innermost_loop_behavior_hash : nofree_ptr_hash <innermost_loop_behavior>
164 static inline hashval_t hash (const value_type &);
165 static inline bool equal (const value_type &,
166 const compare_type &);
169 inline hashval_t
170 innermost_loop_behavior_hash::hash (const value_type &e)
172 hashval_t hash;
174 hash = iterative_hash_expr (e->base_address, 0);
175 hash = iterative_hash_expr (e->offset, hash);
176 hash = iterative_hash_expr (e->init, hash);
177 return iterative_hash_expr (e->step, hash);
180 inline bool
181 innermost_loop_behavior_hash::equal (const value_type &e1,
182 const compare_type &e2)
184 if ((e1->base_address && !e2->base_address)
185 || (!e1->base_address && e2->base_address)
186 || (!e1->offset && e2->offset)
187 || (e1->offset && !e2->offset)
188 || (!e1->init && e2->init)
189 || (e1->init && !e2->init)
190 || (!e1->step && e2->step)
191 || (e1->step && !e2->step))
192 return false;
194 if (e1->base_address && e2->base_address
195 && !operand_equal_p (e1->base_address, e2->base_address, 0))
196 return false;
197 if (e1->offset && e2->offset
198 && !operand_equal_p (e1->offset, e2->offset, 0))
199 return false;
200 if (e1->init && e2->init
201 && !operand_equal_p (e1->init, e2->init, 0))
202 return false;
203 if (e1->step && e2->step
204 && !operand_equal_p (e1->step, e2->step, 0))
205 return false;
207 return true;
210 /* List of basic blocks in if-conversion-suitable order. */
211 static basic_block *ifc_bbs;
213 /* Hash table to store <DR's innermost loop behavior, DR> pairs. */
214 static hash_map<innermost_loop_behavior_hash,
215 data_reference_p> *innermost_DR_map;
217 /* Hash table to store <base reference, DR> pairs. */
218 static hash_map<tree_operand_hash, data_reference_p> *baseref_DR_map;
220 /* List of redundant SSA names: the first should be replaced by the second. */
221 static vec< std::pair<tree, tree> > redundant_ssa_names;
223 /* Structure used to predicate basic blocks. This is attached to the
224 ->aux field of the BBs in the loop to be if-converted. */
225 struct bb_predicate {
227 /* The condition under which this basic block is executed. */
228 tree predicate;
230 /* PREDICATE is gimplified, and the sequence of statements is
231 recorded here, in order to avoid the duplication of computations
232 that occur in previous conditions. See PR44483. */
233 gimple_seq predicate_gimplified_stmts;
235 /* Records the number of statements recorded into
236 PREDICATE_GIMPLIFIED_STMTS. */
237 unsigned no_predicate_stmts;
240 /* Returns true when the basic block BB has a predicate. */
242 static inline bool
243 bb_has_predicate (basic_block bb)
245 return bb->aux != NULL;
248 /* Returns the gimplified predicate for basic block BB. */
250 static inline tree
251 bb_predicate (basic_block bb)
253 return ((struct bb_predicate *) bb->aux)->predicate;
256 /* Sets the gimplified predicate COND for basic block BB. */
258 static inline void
259 set_bb_predicate (basic_block bb, tree cond)
261 auto aux = (struct bb_predicate *) bb->aux;
262 gcc_assert ((TREE_CODE (cond) == TRUTH_NOT_EXPR
263 && is_gimple_val (TREE_OPERAND (cond, 0)))
264 || is_gimple_val (cond));
265 aux->predicate = cond;
266 aux->no_predicate_stmts++;
268 if (dump_file && (dump_flags & TDF_DETAILS))
269 fprintf (dump_file, "Recording block %d value %d\n", bb->index,
270 aux->no_predicate_stmts);
273 /* Returns the sequence of statements of the gimplification of the
274 predicate for basic block BB. */
276 static inline gimple_seq
277 bb_predicate_gimplified_stmts (basic_block bb)
279 return ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts;
282 /* Sets the sequence of statements STMTS of the gimplification of the
283 predicate for basic block BB. If PRESERVE_COUNTS then don't clear the predicate
284 counts. */
286 static inline void
287 set_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts,
288 bool preserve_counts)
290 ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts = stmts;
291 if (stmts == NULL && !preserve_counts)
292 ((struct bb_predicate *) bb->aux)->no_predicate_stmts = 0;
295 /* Adds the sequence of statements STMTS to the sequence of statements
296 of the predicate for basic block BB. */
298 static inline void
299 add_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
301 /* We might have updated some stmts in STMTS via force_gimple_operand
302 calling fold_stmt and that producing multiple stmts. Delink immediate
303 uses so update_ssa after loop versioning doesn't get confused for
304 the not yet inserted predicates.
305 ??? This should go away once we reliably avoid updating stmts
306 not in any BB. */
307 for (gimple_stmt_iterator gsi = gsi_start (stmts);
308 !gsi_end_p (gsi); gsi_next (&gsi))
310 gimple *stmt = gsi_stmt (gsi);
311 delink_stmt_imm_use (stmt);
312 gimple_set_modified (stmt, true);
313 ((struct bb_predicate *) bb->aux)->no_predicate_stmts++;
315 gimple_seq_add_seq_without_update
316 (&(((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts), stmts);
319 /* Return the number of statements the predicate of the basic block consists
320 of. */
322 static inline unsigned
323 get_bb_num_predicate_stmts (basic_block bb)
325 return ((struct bb_predicate *) bb->aux)->no_predicate_stmts;
328 /* Initializes to TRUE the predicate of basic block BB. */
330 static inline void
331 init_bb_predicate (basic_block bb)
333 bb->aux = XNEW (struct bb_predicate);
334 set_bb_predicate_gimplified_stmts (bb, NULL, false);
335 set_bb_predicate (bb, boolean_true_node);
338 /* Release the SSA_NAMEs associated with the predicate of basic block BB. */
340 static inline void
341 release_bb_predicate (basic_block bb)
343 gimple_seq stmts = bb_predicate_gimplified_stmts (bb);
344 if (stmts)
346 /* Ensure that these stmts haven't yet been added to a bb. */
347 if (flag_checking)
348 for (gimple_stmt_iterator i = gsi_start (stmts);
349 !gsi_end_p (i); gsi_next (&i))
350 gcc_assert (! gimple_bb (gsi_stmt (i)));
352 /* Discard them. */
353 gimple_seq_discard (stmts);
354 set_bb_predicate_gimplified_stmts (bb, NULL, false);
358 /* Free the predicate of basic block BB. */
360 static inline void
361 free_bb_predicate (basic_block bb)
363 if (!bb_has_predicate (bb))
364 return;
366 release_bb_predicate (bb);
367 free (bb->aux);
368 bb->aux = NULL;
371 /* Reinitialize predicate of BB with the true predicate. */
373 static inline void
374 reset_bb_predicate (basic_block bb)
376 if (!bb_has_predicate (bb))
377 init_bb_predicate (bb);
378 else
380 release_bb_predicate (bb);
381 set_bb_predicate (bb, boolean_true_node);
385 /* Returns a new SSA_NAME of type TYPE that is assigned the value of
386 the expression EXPR. Inserts the statement created for this
387 computation before GSI and leaves the iterator GSI at the same
388 statement. */
390 static tree
391 ifc_temp_var (tree type, tree expr, gimple_stmt_iterator *gsi)
393 tree new_name = make_temp_ssa_name (type, NULL, "_ifc_");
394 gimple *stmt = gimple_build_assign (new_name, expr);
395 gimple_set_vuse (stmt, gimple_vuse (gsi_stmt (*gsi)));
396 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
397 return new_name;
400 /* Return true when COND is a false predicate. */
402 static inline bool
403 is_false_predicate (tree cond)
405 return (cond != NULL_TREE
406 && (cond == boolean_false_node
407 || integer_zerop (cond)));
410 /* Return true when COND is a true predicate. */
412 static inline bool
413 is_true_predicate (tree cond)
415 return (cond == NULL_TREE
416 || cond == boolean_true_node
417 || integer_onep (cond));
420 /* Returns true when BB has a predicate that is not trivial: true or
421 NULL_TREE. */
423 static inline bool
424 is_predicated (basic_block bb)
426 return !is_true_predicate (bb_predicate (bb));
429 /* Parses the predicate COND and returns its comparison code and
430 operands OP0 and OP1. */
432 static enum tree_code
433 parse_predicate (tree cond, tree *op0, tree *op1)
435 gimple *s;
437 if (TREE_CODE (cond) == SSA_NAME
438 && is_gimple_assign (s = SSA_NAME_DEF_STMT (cond)))
440 if (TREE_CODE_CLASS (gimple_assign_rhs_code (s)) == tcc_comparison)
442 *op0 = gimple_assign_rhs1 (s);
443 *op1 = gimple_assign_rhs2 (s);
444 return gimple_assign_rhs_code (s);
447 else if (gimple_assign_rhs_code (s) == TRUTH_NOT_EXPR)
449 tree op = gimple_assign_rhs1 (s);
450 tree type = TREE_TYPE (op);
451 enum tree_code code = parse_predicate (op, op0, op1);
453 return code == ERROR_MARK ? ERROR_MARK
454 : invert_tree_comparison (code, HONOR_NANS (type));
457 return ERROR_MARK;
460 if (COMPARISON_CLASS_P (cond))
462 *op0 = TREE_OPERAND (cond, 0);
463 *op1 = TREE_OPERAND (cond, 1);
464 return TREE_CODE (cond);
467 return ERROR_MARK;
470 /* Returns the fold of predicate C1 OR C2 at location LOC. */
472 static tree
473 fold_or_predicates (location_t loc, tree c1, tree c2)
475 tree op1a, op1b, op2a, op2b;
476 enum tree_code code1 = parse_predicate (c1, &op1a, &op1b);
477 enum tree_code code2 = parse_predicate (c2, &op2a, &op2b);
479 if (code1 != ERROR_MARK && code2 != ERROR_MARK)
481 tree t = maybe_fold_or_comparisons (boolean_type_node, code1, op1a, op1b,
482 code2, op2a, op2b);
483 if (t)
484 return t;
487 return fold_build2_loc (loc, TRUTH_OR_EXPR, boolean_type_node, c1, c2);
490 /* Returns either a COND_EXPR or the folded expression if the folded
491 expression is a MIN_EXPR, a MAX_EXPR, an ABS_EXPR,
492 a constant or a SSA_NAME. */
494 static tree
495 fold_build_cond_expr (tree type, tree cond, tree rhs, tree lhs)
497 /* If COND is comparison r != 0 and r has boolean type, convert COND
498 to SSA_NAME to accept by vect bool pattern. */
499 if (TREE_CODE (cond) == NE_EXPR)
501 tree op0 = TREE_OPERAND (cond, 0);
502 tree op1 = TREE_OPERAND (cond, 1);
503 if (TREE_CODE (op0) == SSA_NAME
504 && TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE
505 && (integer_zerop (op1)))
506 cond = op0;
509 gimple_match_op cexpr (gimple_match_cond::UNCOND, COND_EXPR,
510 type, cond, rhs, lhs);
511 if (cexpr.resimplify (NULL, follow_all_ssa_edges))
513 if (gimple_simplified_result_is_gimple_val (&cexpr))
514 return cexpr.ops[0];
515 else if (cexpr.code == ABS_EXPR)
516 return build1 (ABS_EXPR, type, cexpr.ops[0]);
517 else if (cexpr.code == MIN_EXPR
518 || cexpr.code == MAX_EXPR)
519 return build2 ((tree_code)cexpr.code, type, cexpr.ops[0], cexpr.ops[1]);
522 return build3 (COND_EXPR, type, cond, rhs, lhs);
525 /* Add condition NC to the predicate list of basic block BB. LOOP is
526 the loop to be if-converted. Use predicate of cd-equivalent block
527 for join bb if it exists: we call basic blocks bb1 and bb2
528 cd-equivalent if they are executed under the same condition. */
530 static inline void
531 add_to_predicate_list (class loop *loop, basic_block bb, tree nc)
533 tree bc, *tp;
534 basic_block dom_bb;
536 if (is_true_predicate (nc))
537 return;
539 /* If dominance tells us this basic block is always executed,
540 don't record any predicates for it. */
541 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
542 return;
544 dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb);
545 /* We use notion of cd equivalence to get simpler predicate for
546 join block, e.g. if join block has 2 predecessors with predicates
547 p1 & p2 and p1 & !p2, we'd like to get p1 for it instead of
548 p1 & p2 | p1 & !p2. */
549 if (dom_bb != loop->header
550 && get_immediate_dominator (CDI_POST_DOMINATORS, dom_bb) == bb)
552 gcc_assert (flow_bb_inside_loop_p (loop, dom_bb));
553 bc = bb_predicate (dom_bb);
554 if (!is_true_predicate (bc))
555 set_bb_predicate (bb, bc);
556 else
557 gcc_assert (is_true_predicate (bb_predicate (bb)));
558 if (dump_file && (dump_flags & TDF_DETAILS))
559 fprintf (dump_file, "Use predicate of bb#%d for bb#%d\n",
560 dom_bb->index, bb->index);
561 return;
564 if (!is_predicated (bb))
565 bc = nc;
566 else
568 bc = bb_predicate (bb);
569 bc = fold_or_predicates (EXPR_LOCATION (bc), nc, bc);
570 if (is_true_predicate (bc))
572 reset_bb_predicate (bb);
573 return;
577 /* Allow a TRUTH_NOT_EXPR around the main predicate. */
578 if (TREE_CODE (bc) == TRUTH_NOT_EXPR)
579 tp = &TREE_OPERAND (bc, 0);
580 else
581 tp = &bc;
582 if (!is_gimple_val (*tp))
584 gimple_seq stmts;
585 *tp = force_gimple_operand (*tp, &stmts, true, NULL_TREE);
586 add_bb_predicate_gimplified_stmts (bb, stmts);
588 set_bb_predicate (bb, bc);
591 /* Add the condition COND to the previous condition PREV_COND, and add
592 this to the predicate list of the destination of edge E. LOOP is
593 the loop to be if-converted. */
595 static void
596 add_to_dst_predicate_list (class loop *loop, edge e,
597 tree prev_cond, tree cond)
599 if (!flow_bb_inside_loop_p (loop, e->dest))
600 return;
602 if (!is_true_predicate (prev_cond))
603 cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
604 prev_cond, cond);
606 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, e->dest))
607 add_to_predicate_list (loop, e->dest, cond);
610 /* Return true if one of the successor edges of BB exits LOOP. */
612 static bool
613 bb_with_exit_edge_p (const class loop *loop, basic_block bb)
615 edge e;
616 edge_iterator ei;
618 FOR_EACH_EDGE (e, ei, bb->succs)
619 if (loop_exit_edge_p (loop, e))
620 return true;
622 return false;
625 /* Given PHI which has more than two arguments, this function checks if
626 it's if-convertible by degenerating its arguments. Specifically, if
627 below two conditions are satisfied:
629 1) Number of PHI arguments with different values equals to 2 and one
630 argument has the only occurrence.
631 2) The edge corresponding to the unique argument isn't critical edge.
633 Such PHI can be handled as PHIs have only two arguments. For example,
634 below PHI:
636 res = PHI <A_1(e1), A_1(e2), A_2(e3)>;
638 can be transformed into:
640 res = (predicate of e3) ? A_2 : A_1;
642 Return TRUE if it is the case, FALSE otherwise. */
644 static bool
645 phi_convertible_by_degenerating_args (gphi *phi)
647 edge e;
648 tree arg, t1 = NULL, t2 = NULL;
649 unsigned int i, i1 = 0, i2 = 0, n1 = 0, n2 = 0;
650 unsigned int num_args = gimple_phi_num_args (phi);
652 gcc_assert (num_args > 2);
654 for (i = 0; i < num_args; i++)
656 arg = gimple_phi_arg_def (phi, i);
657 if (t1 == NULL || operand_equal_p (t1, arg, 0))
659 n1++;
660 i1 = i;
661 t1 = arg;
663 else if (t2 == NULL || operand_equal_p (t2, arg, 0))
665 n2++;
666 i2 = i;
667 t2 = arg;
669 else
670 return false;
673 if (n1 != 1 && n2 != 1)
674 return false;
676 /* Check if the edge corresponding to the unique arg is critical. */
677 e = gimple_phi_arg_edge (phi, (n1 == 1) ? i1 : i2);
678 if (EDGE_COUNT (e->src->succs) > 1)
679 return false;
681 return true;
684 /* Return true when PHI is if-convertible. PHI is part of loop LOOP
685 and it belongs to basic block BB. Note at this point, it is sure
686 that PHI is if-convertible. This function updates global variable
687 ANY_COMPLICATED_PHI if PHI is complicated. */
689 static bool
690 if_convertible_phi_p (class loop *loop, basic_block bb, gphi *phi)
692 if (dump_file && (dump_flags & TDF_DETAILS))
694 fprintf (dump_file, "-------------------------\n");
695 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
698 if (bb != loop->header
699 && gimple_phi_num_args (phi) > 2
700 && !phi_convertible_by_degenerating_args (phi))
701 any_complicated_phi = true;
703 return true;
706 /* Records the status of a data reference. This struct is attached to
707 each DR->aux field. */
709 struct ifc_dr {
710 bool rw_unconditionally;
711 bool w_unconditionally;
712 bool written_at_least_once;
714 tree rw_predicate;
715 tree w_predicate;
716 tree base_w_predicate;
719 #define IFC_DR(DR) ((struct ifc_dr *) (DR)->aux)
720 #define DR_BASE_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->written_at_least_once)
721 #define DR_RW_UNCONDITIONALLY(DR) (IFC_DR (DR)->rw_unconditionally)
722 #define DR_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->w_unconditionally)
724 /* Iterates over DR's and stores refs, DR and base refs, DR pairs in
725 HASH tables. While storing them in HASH table, it checks if the
726 reference is unconditionally read or written and stores that as a flag
727 information. For base reference it checks if it is written atlest once
728 unconditionally and stores it as flag information along with DR.
729 In other words for every data reference A in STMT there exist other
730 accesses to a data reference with the same base with predicates that
731 add up (OR-up) to the true predicate: this ensures that the data
732 reference A is touched (read or written) on every iteration of the
733 if-converted loop. */
734 static void
735 hash_memrefs_baserefs_and_store_DRs_read_written_info (data_reference_p a)
738 data_reference_p *master_dr, *base_master_dr;
739 tree base_ref = DR_BASE_OBJECT (a);
740 innermost_loop_behavior *innermost = &DR_INNERMOST (a);
741 tree ca = bb_predicate (gimple_bb (DR_STMT (a)));
742 bool exist1, exist2;
744 master_dr = &innermost_DR_map->get_or_insert (innermost, &exist1);
745 if (!exist1)
746 *master_dr = a;
748 if (DR_IS_WRITE (a))
750 IFC_DR (*master_dr)->w_predicate
751 = fold_or_predicates (UNKNOWN_LOCATION, ca,
752 IFC_DR (*master_dr)->w_predicate);
753 if (is_true_predicate (IFC_DR (*master_dr)->w_predicate))
754 DR_W_UNCONDITIONALLY (*master_dr) = true;
756 IFC_DR (*master_dr)->rw_predicate
757 = fold_or_predicates (UNKNOWN_LOCATION, ca,
758 IFC_DR (*master_dr)->rw_predicate);
759 if (is_true_predicate (IFC_DR (*master_dr)->rw_predicate))
760 DR_RW_UNCONDITIONALLY (*master_dr) = true;
762 if (DR_IS_WRITE (a))
764 base_master_dr = &baseref_DR_map->get_or_insert (base_ref, &exist2);
765 if (!exist2)
766 *base_master_dr = a;
767 IFC_DR (*base_master_dr)->base_w_predicate
768 = fold_or_predicates (UNKNOWN_LOCATION, ca,
769 IFC_DR (*base_master_dr)->base_w_predicate);
770 if (is_true_predicate (IFC_DR (*base_master_dr)->base_w_predicate))
771 DR_BASE_W_UNCONDITIONALLY (*base_master_dr) = true;
775 /* Return TRUE if can prove the index IDX of an array reference REF is
776 within array bound. Return false otherwise. */
778 static bool
779 idx_within_array_bound (tree ref, tree *idx, void *dta)
781 wi::overflow_type overflow;
782 widest_int niter, valid_niter, delta, wi_step;
783 tree ev, init, step;
784 tree low, high;
785 class loop *loop = (class loop*) dta;
787 /* Only support within-bound access for array references. */
788 if (TREE_CODE (ref) != ARRAY_REF)
789 return false;
791 /* For arrays that might have flexible sizes, it is not guaranteed that they
792 do not extend over their declared size. */
793 if (array_ref_flexible_size_p (ref))
794 return false;
796 ev = analyze_scalar_evolution (loop, *idx);
797 ev = instantiate_parameters (loop, ev);
798 init = initial_condition (ev);
799 step = evolution_part_in_loop_num (ev, loop->num);
801 if (!init || TREE_CODE (init) != INTEGER_CST
802 || (step && TREE_CODE (step) != INTEGER_CST))
803 return false;
805 low = array_ref_low_bound (ref);
806 high = array_ref_up_bound (ref);
808 /* The case of nonconstant bounds could be handled, but it would be
809 complicated. */
810 if (TREE_CODE (low) != INTEGER_CST
811 || !high || TREE_CODE (high) != INTEGER_CST)
812 return false;
814 /* Check if the intial idx is within bound. */
815 if (wi::to_widest (init) < wi::to_widest (low)
816 || wi::to_widest (init) > wi::to_widest (high))
817 return false;
819 /* The idx is always within bound. */
820 if (!step || integer_zerop (step))
821 return true;
823 if (!max_loop_iterations (loop, &niter))
824 return false;
826 if (wi::to_widest (step) < 0)
828 delta = wi::to_widest (init) - wi::to_widest (low);
829 wi_step = -wi::to_widest (step);
831 else
833 delta = wi::to_widest (high) - wi::to_widest (init);
834 wi_step = wi::to_widest (step);
837 valid_niter = wi::div_floor (delta, wi_step, SIGNED, &overflow);
838 /* The iteration space of idx is within array bound. */
839 if (!overflow && niter <= valid_niter)
840 return true;
842 return false;
845 /* Return TRUE if ref is a within bound array reference. */
847 bool
848 ref_within_array_bound (gimple *stmt, tree ref)
850 class loop *loop = loop_containing_stmt (stmt);
852 gcc_assert (loop != NULL);
853 return for_each_index (&ref, idx_within_array_bound, loop);
857 /* Given a memory reference expression T, return TRUE if base object
858 it refers to is writable. The base object of a memory reference
859 is the main object being referenced, which is returned by function
860 get_base_address. */
862 static bool
863 base_object_writable (tree ref)
865 tree base_tree = get_base_address (ref);
867 return (base_tree
868 && DECL_P (base_tree)
869 && decl_binds_to_current_def_p (base_tree)
870 && !TREE_READONLY (base_tree));
873 /* Return true when the memory references of STMT won't trap in the
874 if-converted code. There are two things that we have to check for:
876 - writes to memory occur to writable memory: if-conversion of
877 memory writes transforms the conditional memory writes into
878 unconditional writes, i.e. "if (cond) A[i] = foo" is transformed
879 into "A[i] = cond ? foo : A[i]", and as the write to memory may not
880 be executed at all in the original code, it may be a readonly
881 memory. To check that A is not const-qualified, we check that
882 there exists at least an unconditional write to A in the current
883 function.
885 - reads or writes to memory are valid memory accesses for every
886 iteration. To check that the memory accesses are correctly formed
887 and that we are allowed to read and write in these locations, we
888 check that the memory accesses to be if-converted occur at every
889 iteration unconditionally.
891 Returns true for the memory reference in STMT, same memory reference
892 is read or written unconditionally atleast once and the base memory
893 reference is written unconditionally once. This is to check reference
894 will not write fault. Also retuns true if the memory reference is
895 unconditionally read once then we are conditionally writing to memory
896 which is defined as read and write and is bound to the definition
897 we are seeing. */
898 static bool
899 ifcvt_memrefs_wont_trap (gimple *stmt, vec<data_reference_p> drs)
901 /* If DR didn't see a reference here we can't use it to tell
902 whether the ref traps or not. */
903 if (gimple_uid (stmt) == 0)
904 return false;
906 data_reference_p *master_dr, *base_master_dr;
907 data_reference_p a = drs[gimple_uid (stmt) - 1];
909 tree base = DR_BASE_OBJECT (a);
910 innermost_loop_behavior *innermost = &DR_INNERMOST (a);
912 gcc_assert (DR_STMT (a) == stmt);
913 gcc_assert (DR_BASE_ADDRESS (a) || DR_OFFSET (a)
914 || DR_INIT (a) || DR_STEP (a));
916 master_dr = innermost_DR_map->get (innermost);
917 gcc_assert (master_dr != NULL);
919 base_master_dr = baseref_DR_map->get (base);
921 /* If a is unconditionally written to it doesn't trap. */
922 if (DR_W_UNCONDITIONALLY (*master_dr))
923 return true;
925 /* If a is unconditionally accessed then ...
927 Even a is conditional access, we can treat it as an unconditional
928 one if it's an array reference and all its index are within array
929 bound. */
930 if (DR_RW_UNCONDITIONALLY (*master_dr)
931 || ref_within_array_bound (stmt, DR_REF (a)))
933 /* an unconditional read won't trap. */
934 if (DR_IS_READ (a))
935 return true;
937 /* an unconditionaly write won't trap if the base is written
938 to unconditionally. */
939 if ((base_master_dr
940 && DR_BASE_W_UNCONDITIONALLY (*base_master_dr))
941 /* or the base is known to be not readonly. */
942 || base_object_writable (DR_REF (a)))
943 return !ref_can_have_store_data_races (base);
946 return false;
949 /* Return true if STMT could be converted into a masked load or store
950 (conditional load or store based on a mask computed from bb predicate). */
952 static bool
953 ifcvt_can_use_mask_load_store (gimple *stmt)
955 /* Check whether this is a load or store. */
956 tree lhs = gimple_assign_lhs (stmt);
957 bool is_load;
958 tree ref;
959 if (gimple_store_p (stmt))
961 if (!is_gimple_val (gimple_assign_rhs1 (stmt)))
962 return false;
963 is_load = false;
964 ref = lhs;
966 else if (gimple_assign_load_p (stmt))
968 is_load = true;
969 ref = gimple_assign_rhs1 (stmt);
971 else
972 return false;
974 if (may_be_nonaddressable_p (ref))
975 return false;
977 /* Mask should be integer mode of the same size as the load/store
978 mode. */
979 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
980 if (!int_mode_for_mode (mode).exists () || VECTOR_MODE_P (mode))
981 return false;
983 if (can_vec_mask_load_store_p (mode, VOIDmode, is_load))
984 return true;
986 return false;
989 /* Return true if STMT could be converted from an operation that is
990 unconditional to one that is conditional on a bb predicate mask. */
992 static bool
993 ifcvt_can_predicate (gimple *stmt)
995 basic_block bb = gimple_bb (stmt);
997 if (!(flag_tree_loop_vectorize || bb->loop_father->force_vectorize)
998 || bb->loop_father->dont_vectorize
999 || gimple_has_volatile_ops (stmt))
1000 return false;
1002 if (gimple_assign_single_p (stmt))
1003 return ifcvt_can_use_mask_load_store (stmt);
1005 tree_code code = gimple_assign_rhs_code (stmt);
1006 tree lhs_type = TREE_TYPE (gimple_assign_lhs (stmt));
1007 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
1008 if (!types_compatible_p (lhs_type, rhs_type))
1009 return false;
1010 internal_fn cond_fn = get_conditional_internal_fn (code);
1011 return (cond_fn != IFN_LAST
1012 && vectorized_internal_fn_supported_p (cond_fn, lhs_type));
1015 /* Return true when STMT is if-convertible.
1017 GIMPLE_ASSIGN statement is not if-convertible if,
1018 - it is not movable,
1019 - it could trap,
1020 - LHS is not var decl. */
1022 static bool
1023 if_convertible_gimple_assign_stmt_p (gimple *stmt,
1024 vec<data_reference_p> refs)
1026 tree lhs = gimple_assign_lhs (stmt);
1028 if (dump_file && (dump_flags & TDF_DETAILS))
1030 fprintf (dump_file, "-------------------------\n");
1031 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1034 if (!is_gimple_reg_type (TREE_TYPE (lhs)))
1035 return false;
1037 /* Some of these constrains might be too conservative. */
1038 if (stmt_ends_bb_p (stmt)
1039 || gimple_has_volatile_ops (stmt)
1040 || (TREE_CODE (lhs) == SSA_NAME
1041 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
1042 || gimple_has_side_effects (stmt))
1044 if (dump_file && (dump_flags & TDF_DETAILS))
1045 fprintf (dump_file, "stmt not suitable for ifcvt\n");
1046 return false;
1049 /* tree-into-ssa.cc uses GF_PLF_1, so avoid it, because
1050 in between if_convertible_loop_p and combine_blocks
1051 we can perform loop versioning. */
1052 gimple_set_plf (stmt, GF_PLF_2, false);
1054 if ((! gimple_vuse (stmt)
1055 || gimple_could_trap_p_1 (stmt, false, false)
1056 || ! ifcvt_memrefs_wont_trap (stmt, refs))
1057 && gimple_could_trap_p (stmt))
1059 if (ifcvt_can_predicate (stmt))
1061 gimple_set_plf (stmt, GF_PLF_2, true);
1062 need_to_predicate = true;
1063 return true;
1065 if (dump_file && (dump_flags & TDF_DETAILS))
1066 fprintf (dump_file, "tree could trap...\n");
1067 return false;
1069 else if ((INTEGRAL_TYPE_P (TREE_TYPE (lhs))
1070 || POINTER_TYPE_P (TREE_TYPE (lhs)))
1071 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs))
1072 && arith_code_with_undefined_signed_overflow
1073 (gimple_assign_rhs_code (stmt)))
1074 /* We have to rewrite stmts with undefined overflow. */
1075 need_to_rewrite_undefined = true;
1077 /* When if-converting stores force versioning, likewise if we
1078 ended up generating store data races. */
1079 if (gimple_vdef (stmt))
1080 need_to_predicate = true;
1082 return true;
1085 /* Return true when SW switch statement is equivalent to cond, that
1086 all non default labels point to the same label.
1088 Fallthrough is not checked for and could even happen
1089 with cond (using goto), so is handled.
1091 This is intended for switches created by the if-switch-conversion
1092 pass, but can handle some programmer supplied cases too. */
1094 static bool
1095 if_convertible_switch_p (gswitch *sw)
1097 if (gimple_switch_num_labels (sw) <= 1)
1098 return false;
1099 tree label = CASE_LABEL (gimple_switch_label (sw, 1));
1100 for (unsigned i = 1; i < gimple_switch_num_labels (sw); i++)
1102 if (CASE_LABEL (gimple_switch_label (sw, i)) != label)
1103 return false;
1105 return true;
1108 /* Return true when STMT is if-convertible.
1110 A statement is if-convertible if:
1111 - it is an if-convertible GIMPLE_ASSIGN,
1112 - it is a GIMPLE_LABEL or a GIMPLE_COND,
1113 - it is a switch equivalent to COND
1114 - it is builtins call,
1115 - it is a call to a function with a SIMD clone. */
1117 static bool
1118 if_convertible_stmt_p (gimple *stmt, vec<data_reference_p> refs)
1120 switch (gimple_code (stmt))
1122 case GIMPLE_LABEL:
1123 case GIMPLE_DEBUG:
1124 case GIMPLE_COND:
1125 return true;
1127 case GIMPLE_SWITCH:
1128 return if_convertible_switch_p (as_a <gswitch *> (stmt));
1130 case GIMPLE_ASSIGN:
1131 return if_convertible_gimple_assign_stmt_p (stmt, refs);
1133 case GIMPLE_CALL:
1135 /* There are some IFN_s that are used to replace builtins but have the
1136 same semantics. Even if MASK_CALL cannot handle them vectorable_call
1137 will insert the proper selection, so do not block conversion. */
1138 int flags = gimple_call_flags (stmt);
1139 if ((flags & ECF_CONST)
1140 && !(flags & ECF_LOOPING_CONST_OR_PURE)
1141 && gimple_call_combined_fn (stmt) != CFN_LAST)
1142 return true;
1144 tree fndecl = gimple_call_fndecl (stmt);
1145 if (fndecl)
1147 /* We can vectorize some builtins and functions with SIMD
1148 "inbranch" clones. */
1149 struct cgraph_node *node = cgraph_node::get (fndecl);
1150 if (node && node->simd_clones != NULL)
1151 /* Ensure that at least one clone can be "inbranch". */
1152 for (struct cgraph_node *n = node->simd_clones; n != NULL;
1153 n = n->simdclone->next_clone)
1154 if (n->simdclone->inbranch)
1156 gimple_set_plf (stmt, GF_PLF_2, true);
1157 need_to_predicate = true;
1158 return true;
1162 return false;
1165 default:
1166 /* Don't know what to do with 'em so don't do anything. */
1167 if (dump_file && (dump_flags & TDF_DETAILS))
1169 fprintf (dump_file, "don't know what to do\n");
1170 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1172 return false;
1176 /* Assumes that BB has more than 1 predecessors.
1177 Returns false if at least one successor is not on critical edge
1178 and true otherwise. */
1180 static inline bool
1181 all_preds_critical_p (basic_block bb)
1183 edge e;
1184 edge_iterator ei;
1186 FOR_EACH_EDGE (e, ei, bb->preds)
1187 if (EDGE_COUNT (e->src->succs) == 1)
1188 return false;
1189 return true;
1192 /* Return true when BB is if-convertible. This routine does not check
1193 basic block's statements and phis.
1195 A basic block is not if-convertible if:
1196 - it is non-empty and it is after the exit block (in BFS order),
1197 - it is after the exit block but before the latch,
1198 - its edges are not normal.
1200 EXIT_BB is the basic block containing the exit of the LOOP. BB is
1201 inside LOOP. */
1203 static bool
1204 if_convertible_bb_p (class loop *loop, basic_block bb, basic_block exit_bb)
1206 edge e;
1207 edge_iterator ei;
1209 if (dump_file && (dump_flags & TDF_DETAILS))
1210 fprintf (dump_file, "----------[%d]-------------\n", bb->index);
1212 if (EDGE_COUNT (bb->succs) > 2)
1213 return false;
1215 if (gcall *call = safe_dyn_cast <gcall *> (*gsi_last_bb (bb)))
1216 if (gimple_call_ctrl_altering_p (call))
1217 return false;
1219 if (exit_bb)
1221 if (bb != loop->latch)
1223 if (dump_file && (dump_flags & TDF_DETAILS))
1224 fprintf (dump_file, "basic block after exit bb but before latch\n");
1225 return false;
1227 else if (!empty_block_p (bb))
1229 if (dump_file && (dump_flags & TDF_DETAILS))
1230 fprintf (dump_file, "non empty basic block after exit bb\n");
1231 return false;
1233 else if (bb == loop->latch
1234 && bb != exit_bb
1235 && !dominated_by_p (CDI_DOMINATORS, bb, exit_bb))
1237 if (dump_file && (dump_flags & TDF_DETAILS))
1238 fprintf (dump_file, "latch is not dominated by exit_block\n");
1239 return false;
1243 /* Be less adventurous and handle only normal edges. */
1244 FOR_EACH_EDGE (e, ei, bb->succs)
1245 if (e->flags & (EDGE_EH | EDGE_ABNORMAL | EDGE_IRREDUCIBLE_LOOP))
1247 if (dump_file && (dump_flags & TDF_DETAILS))
1248 fprintf (dump_file, "Difficult to handle edges\n");
1249 return false;
1252 return true;
1255 /* Return true when all predecessor blocks of BB are visited. The
1256 VISITED bitmap keeps track of the visited blocks. */
1258 static bool
1259 pred_blocks_visited_p (basic_block bb, bitmap *visited)
1261 edge e;
1262 edge_iterator ei;
1263 FOR_EACH_EDGE (e, ei, bb->preds)
1264 if (!bitmap_bit_p (*visited, e->src->index))
1265 return false;
1267 return true;
1270 /* Get body of a LOOP in suitable order for if-conversion. It is
1271 caller's responsibility to deallocate basic block list.
1272 If-conversion suitable order is, breadth first sort (BFS) order
1273 with an additional constraint: select a block only if all its
1274 predecessors are already selected. */
1276 static basic_block *
1277 get_loop_body_in_if_conv_order (const class loop *loop)
1279 basic_block *blocks, *blocks_in_bfs_order;
1280 basic_block bb;
1281 bitmap visited;
1282 unsigned int index = 0;
1283 unsigned int visited_count = 0;
1285 gcc_assert (loop->num_nodes);
1286 gcc_assert (loop->latch != EXIT_BLOCK_PTR_FOR_FN (cfun));
1288 blocks = XCNEWVEC (basic_block, loop->num_nodes);
1289 visited = BITMAP_ALLOC (NULL);
1291 blocks_in_bfs_order = get_loop_body_in_bfs_order (loop);
1293 index = 0;
1294 while (index < loop->num_nodes)
1296 bb = blocks_in_bfs_order [index];
1298 if (bb->flags & BB_IRREDUCIBLE_LOOP)
1300 free (blocks_in_bfs_order);
1301 BITMAP_FREE (visited);
1302 free (blocks);
1303 return NULL;
1306 if (!bitmap_bit_p (visited, bb->index))
1308 if (pred_blocks_visited_p (bb, &visited)
1309 || bb == loop->header)
1311 /* This block is now visited. */
1312 bitmap_set_bit (visited, bb->index);
1313 blocks[visited_count++] = bb;
1317 index++;
1319 if (index == loop->num_nodes
1320 && visited_count != loop->num_nodes)
1321 /* Not done yet. */
1322 index = 0;
1324 free (blocks_in_bfs_order);
1325 BITMAP_FREE (visited);
1327 /* Go through loop and reject if-conversion or lowering of bitfields if we
1328 encounter statements we do not believe the vectorizer will be able to
1329 handle. If adding a new type of statement here, make sure
1330 'ifcvt_local_dce' is also able to handle it propertly. */
1331 for (index = 0; index < loop->num_nodes; index++)
1333 basic_block bb = blocks[index];
1334 gimple_stmt_iterator gsi;
1336 bool may_have_nonlocal_labels
1337 = bb_with_exit_edge_p (loop, bb) || bb == loop->latch;
1338 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1339 switch (gimple_code (gsi_stmt (gsi)))
1341 case GIMPLE_LABEL:
1342 if (!may_have_nonlocal_labels)
1344 tree label
1345 = gimple_label_label (as_a <glabel *> (gsi_stmt (gsi)));
1346 if (DECL_NONLOCAL (label) || FORCED_LABEL (label))
1348 free (blocks);
1349 return NULL;
1352 /* Fallthru. */
1353 case GIMPLE_ASSIGN:
1354 case GIMPLE_CALL:
1355 case GIMPLE_DEBUG:
1356 case GIMPLE_COND:
1357 case GIMPLE_SWITCH:
1358 gimple_set_uid (gsi_stmt (gsi), 0);
1359 break;
1360 default:
1361 free (blocks);
1362 return NULL;
1365 return blocks;
1368 /* Returns true when the analysis of the predicates for all the basic
1369 blocks in LOOP succeeded.
1371 predicate_bbs first allocates the predicates of the basic blocks.
1372 These fields are then initialized with the tree expressions
1373 representing the predicates under which a basic block is executed
1374 in the LOOP. As the loop->header is executed at each iteration, it
1375 has the "true" predicate. Other statements executed under a
1376 condition are predicated with that condition, for example
1378 | if (x)
1379 | S1;
1380 | else
1381 | S2;
1383 S1 will be predicated with "x", and
1384 S2 will be predicated with "!x". */
1386 static void
1387 predicate_bbs (loop_p loop)
1389 unsigned int i;
1391 for (i = 0; i < loop->num_nodes; i++)
1392 init_bb_predicate (ifc_bbs[i]);
1394 for (i = 0; i < loop->num_nodes; i++)
1396 basic_block bb = ifc_bbs[i];
1397 tree cond;
1399 /* The loop latch and loop exit block are always executed and
1400 have no extra conditions to be processed: skip them. */
1401 if (bb == loop->latch
1402 || bb_with_exit_edge_p (loop, bb))
1404 reset_bb_predicate (bb);
1405 continue;
1408 cond = bb_predicate (bb);
1409 if (gcond *stmt = safe_dyn_cast <gcond *> (*gsi_last_bb (bb)))
1411 tree c2;
1412 edge true_edge, false_edge;
1413 location_t loc = gimple_location (stmt);
1414 tree c;
1415 /* gcc.dg/fold-bopcond-1.c shows that despite all forwprop passes
1416 conditions can remain unfolded because of multiple uses so
1417 try to re-fold here, especially to get precision changing
1418 conversions sorted out. Do not simply fold the stmt since
1419 this is analysis only. When conditions were embedded in
1420 COND_EXPRs those were folded separately before folding the
1421 COND_EXPR but as they are now outside we have to make sure
1422 to fold them. Do it here - another opportunity would be to
1423 fold predicates as they are inserted. */
1424 gimple_match_op cexpr (gimple_match_cond::UNCOND,
1425 gimple_cond_code (stmt),
1426 boolean_type_node,
1427 gimple_cond_lhs (stmt),
1428 gimple_cond_rhs (stmt));
1429 if (cexpr.resimplify (NULL, follow_all_ssa_edges)
1430 && cexpr.code.is_tree_code ()
1431 && TREE_CODE_CLASS ((tree_code)cexpr.code) == tcc_comparison)
1432 c = build2_loc (loc, (tree_code)cexpr.code, boolean_type_node,
1433 cexpr.ops[0], cexpr.ops[1]);
1434 else
1435 c = build2_loc (loc, gimple_cond_code (stmt),
1436 boolean_type_node,
1437 gimple_cond_lhs (stmt),
1438 gimple_cond_rhs (stmt));
1440 /* Add new condition into destination's predicate list. */
1441 extract_true_false_edges_from_block (gimple_bb (stmt),
1442 &true_edge, &false_edge);
1444 /* If C is true, then TRUE_EDGE is taken. */
1445 add_to_dst_predicate_list (loop, true_edge, unshare_expr (cond),
1446 unshare_expr (c));
1448 /* If C is false, then FALSE_EDGE is taken. */
1449 c2 = build1_loc (loc, TRUTH_NOT_EXPR, boolean_type_node,
1450 unshare_expr (c));
1451 add_to_dst_predicate_list (loop, false_edge,
1452 unshare_expr (cond), c2);
1454 cond = NULL_TREE;
1457 /* Assumes the limited COND like switches checked for earlier. */
1458 else if (gswitch *sw = safe_dyn_cast <gswitch *> (*gsi_last_bb (bb)))
1460 location_t loc = gimple_location (*gsi_last_bb (bb));
1462 tree default_label = CASE_LABEL (gimple_switch_default_label (sw));
1463 tree cond_label = CASE_LABEL (gimple_switch_label (sw, 1));
1465 edge false_edge = find_edge (bb, label_to_block (cfun, default_label));
1466 edge true_edge = find_edge (bb, label_to_block (cfun, cond_label));
1468 /* Create chain of switch tests for each case. */
1469 tree switch_cond = NULL_TREE;
1470 tree index = gimple_switch_index (sw);
1471 for (unsigned i = 1; i < gimple_switch_num_labels (sw); i++)
1473 tree label = gimple_switch_label (sw, i);
1474 tree case_cond;
1475 if (CASE_HIGH (label))
1477 tree low = build2_loc (loc, GE_EXPR,
1478 boolean_type_node,
1479 index, CASE_LOW (label));
1480 tree high = build2_loc (loc, LE_EXPR,
1481 boolean_type_node,
1482 index, CASE_HIGH (label));
1483 case_cond = build2_loc (loc, TRUTH_AND_EXPR,
1484 boolean_type_node,
1485 low, high);
1487 else
1488 case_cond = build2_loc (loc, EQ_EXPR,
1489 boolean_type_node,
1490 index,
1491 CASE_LOW (gimple_switch_label (sw, i)));
1492 if (i > 1)
1493 switch_cond = build2_loc (loc, TRUTH_OR_EXPR,
1494 boolean_type_node,
1495 case_cond, switch_cond);
1496 else
1497 switch_cond = case_cond;
1500 add_to_dst_predicate_list (loop, true_edge, unshare_expr (cond),
1501 unshare_expr (switch_cond));
1502 switch_cond = build1_loc (loc, TRUTH_NOT_EXPR, boolean_type_node,
1503 unshare_expr (switch_cond));
1504 add_to_dst_predicate_list (loop, false_edge,
1505 unshare_expr (cond), switch_cond);
1506 cond = NULL_TREE;
1509 /* If current bb has only one successor, then consider it as an
1510 unconditional goto. */
1511 if (single_succ_p (bb))
1513 basic_block bb_n = single_succ (bb);
1515 /* The successor bb inherits the predicate of its
1516 predecessor. If there is no predicate in the predecessor
1517 bb, then consider the successor bb as always executed. */
1518 if (cond == NULL_TREE)
1519 cond = boolean_true_node;
1521 add_to_predicate_list (loop, bb_n, cond);
1525 /* The loop header is always executed. */
1526 reset_bb_predicate (loop->header);
1527 gcc_assert (bb_predicate_gimplified_stmts (loop->header) == NULL
1528 && bb_predicate_gimplified_stmts (loop->latch) == NULL);
1531 /* Build region by adding loop pre-header and post-header blocks. */
1533 static vec<basic_block>
1534 build_region (class loop *loop)
1536 vec<basic_block> region = vNULL;
1537 basic_block exit_bb = NULL;
1539 gcc_assert (ifc_bbs);
1540 /* The first element is loop pre-header. */
1541 region.safe_push (loop_preheader_edge (loop)->src);
1543 for (unsigned int i = 0; i < loop->num_nodes; i++)
1545 basic_block bb = ifc_bbs[i];
1546 region.safe_push (bb);
1547 /* Find loop postheader. */
1548 edge e;
1549 edge_iterator ei;
1550 FOR_EACH_EDGE (e, ei, bb->succs)
1551 if (loop_exit_edge_p (loop, e))
1553 exit_bb = e->dest;
1554 break;
1557 /* The last element is loop post-header. */
1558 gcc_assert (exit_bb);
1559 region.safe_push (exit_bb);
1560 return region;
1563 /* Return true when LOOP is if-convertible. This is a helper function
1564 for if_convertible_loop_p. REFS and DDRS are initialized and freed
1565 in if_convertible_loop_p. */
1567 static bool
1568 if_convertible_loop_p_1 (class loop *loop, vec<data_reference_p> *refs)
1570 unsigned int i;
1571 basic_block exit_bb = NULL;
1572 vec<basic_block> region;
1574 calculate_dominance_info (CDI_DOMINATORS);
1576 for (i = 0; i < loop->num_nodes; i++)
1578 basic_block bb = ifc_bbs[i];
1580 if (!if_convertible_bb_p (loop, bb, exit_bb))
1581 return false;
1583 if (bb_with_exit_edge_p (loop, bb))
1584 exit_bb = bb;
1587 data_reference_p dr;
1589 innermost_DR_map
1590 = new hash_map<innermost_loop_behavior_hash, data_reference_p>;
1591 baseref_DR_map = new hash_map<tree_operand_hash, data_reference_p>;
1593 /* Compute post-dominator tree locally. */
1594 region = build_region (loop);
1595 calculate_dominance_info_for_region (CDI_POST_DOMINATORS, region);
1597 predicate_bbs (loop);
1599 /* Free post-dominator tree since it is not used after predication. */
1600 free_dominance_info_for_region (cfun, CDI_POST_DOMINATORS, region);
1601 region.release ();
1603 for (i = 0; refs->iterate (i, &dr); i++)
1605 tree ref = DR_REF (dr);
1607 dr->aux = XNEW (struct ifc_dr);
1608 DR_BASE_W_UNCONDITIONALLY (dr) = false;
1609 DR_RW_UNCONDITIONALLY (dr) = false;
1610 DR_W_UNCONDITIONALLY (dr) = false;
1611 IFC_DR (dr)->rw_predicate = boolean_false_node;
1612 IFC_DR (dr)->w_predicate = boolean_false_node;
1613 IFC_DR (dr)->base_w_predicate = boolean_false_node;
1614 if (gimple_uid (DR_STMT (dr)) == 0)
1615 gimple_set_uid (DR_STMT (dr), i + 1);
1617 /* If DR doesn't have innermost loop behavior or it's a compound
1618 memory reference, we synthesize its innermost loop behavior
1619 for hashing. */
1620 if (TREE_CODE (ref) == COMPONENT_REF
1621 || TREE_CODE (ref) == IMAGPART_EXPR
1622 || TREE_CODE (ref) == REALPART_EXPR
1623 || !(DR_BASE_ADDRESS (dr) || DR_OFFSET (dr)
1624 || DR_INIT (dr) || DR_STEP (dr)))
1626 while (TREE_CODE (ref) == COMPONENT_REF
1627 || TREE_CODE (ref) == IMAGPART_EXPR
1628 || TREE_CODE (ref) == REALPART_EXPR)
1629 ref = TREE_OPERAND (ref, 0);
1631 memset (&DR_INNERMOST (dr), 0, sizeof (DR_INNERMOST (dr)));
1632 DR_BASE_ADDRESS (dr) = ref;
1634 hash_memrefs_baserefs_and_store_DRs_read_written_info (dr);
1637 for (i = 0; i < loop->num_nodes; i++)
1639 basic_block bb = ifc_bbs[i];
1640 gimple_stmt_iterator itr;
1642 /* Check the if-convertibility of statements in predicated BBs. */
1643 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1644 for (itr = gsi_start_bb (bb); !gsi_end_p (itr); gsi_next (&itr))
1645 if (!if_convertible_stmt_p (gsi_stmt (itr), *refs))
1646 return false;
1649 /* Checking PHIs needs to be done after stmts, as the fact whether there
1650 are any masked loads or stores affects the tests. */
1651 for (i = 0; i < loop->num_nodes; i++)
1653 basic_block bb = ifc_bbs[i];
1654 gphi_iterator itr;
1656 for (itr = gsi_start_phis (bb); !gsi_end_p (itr); gsi_next (&itr))
1657 if (!if_convertible_phi_p (loop, bb, itr.phi ()))
1658 return false;
1661 if (dump_file)
1662 fprintf (dump_file, "Applying if-conversion\n");
1664 return true;
1667 /* Return true when LOOP is if-convertible.
1668 LOOP is if-convertible if:
1669 - it is innermost,
1670 - it has two or more basic blocks,
1671 - it has only one exit,
1672 - loop header is not the exit edge,
1673 - if its basic blocks and phi nodes are if convertible. */
1675 static bool
1676 if_convertible_loop_p (class loop *loop, vec<data_reference_p> *refs)
1678 edge e;
1679 edge_iterator ei;
1680 bool res = false;
1682 /* Handle only innermost loop. */
1683 if (!loop || loop->inner)
1685 if (dump_file && (dump_flags & TDF_DETAILS))
1686 fprintf (dump_file, "not innermost loop\n");
1687 return false;
1690 /* If only one block, no need for if-conversion. */
1691 if (loop->num_nodes <= 2)
1693 if (dump_file && (dump_flags & TDF_DETAILS))
1694 fprintf (dump_file, "less than 2 basic blocks\n");
1695 return false;
1698 /* If one of the loop header's edge is an exit edge then do not
1699 apply if-conversion. */
1700 FOR_EACH_EDGE (e, ei, loop->header->succs)
1701 if (loop_exit_edge_p (loop, e))
1702 return false;
1704 res = if_convertible_loop_p_1 (loop, refs);
1706 delete innermost_DR_map;
1707 innermost_DR_map = NULL;
1709 delete baseref_DR_map;
1710 baseref_DR_map = NULL;
1712 return res;
1715 /* Return reduc_1 if has_nop.
1717 if (...)
1718 tmp1 = (unsigned type) reduc_1;
1719 tmp2 = tmp1 + rhs2;
1720 reduc_3 = (signed type) tmp2. */
1721 static tree
1722 strip_nop_cond_scalar_reduction (bool has_nop, tree op)
1724 if (!has_nop)
1725 return op;
1727 if (TREE_CODE (op) != SSA_NAME)
1728 return NULL_TREE;
1730 gassign *stmt = safe_dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op));
1731 if (!stmt
1732 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt))
1733 || !tree_nop_conversion_p (TREE_TYPE (op), TREE_TYPE
1734 (gimple_assign_rhs1 (stmt))))
1735 return NULL_TREE;
1737 return gimple_assign_rhs1 (stmt);
1740 /* Returns true if def-stmt for phi argument ARG is simple increment/decrement
1741 which is in predicated basic block.
1742 In fact, the following PHI pattern is searching:
1743 loop-header:
1744 reduc_1 = PHI <..., reduc_2>
1746 if (...)
1747 reduc_3 = ...
1748 reduc_2 = PHI <reduc_1, reduc_3>
1750 ARG_0 and ARG_1 are correspondent PHI arguments.
1751 REDUC, OP0 and OP1 contain reduction stmt and its operands.
1752 EXTENDED is true if PHI has > 2 arguments. */
1754 static bool
1755 is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1,
1756 tree *op0, tree *op1, bool extended, bool* has_nop,
1757 gimple **nop_reduc)
1759 tree lhs, r_op1, r_op2, r_nop1, r_nop2;
1760 gimple *stmt;
1761 gimple *header_phi = NULL;
1762 enum tree_code reduction_op;
1763 basic_block bb = gimple_bb (phi);
1764 class loop *loop = bb->loop_father;
1765 edge latch_e = loop_latch_edge (loop);
1766 imm_use_iterator imm_iter;
1767 use_operand_p use_p;
1768 edge e;
1769 edge_iterator ei;
1770 bool result = *has_nop = false;
1771 if (TREE_CODE (arg_0) != SSA_NAME || TREE_CODE (arg_1) != SSA_NAME)
1772 return false;
1774 if (!extended && gimple_code (SSA_NAME_DEF_STMT (arg_0)) == GIMPLE_PHI)
1776 lhs = arg_1;
1777 header_phi = SSA_NAME_DEF_STMT (arg_0);
1778 stmt = SSA_NAME_DEF_STMT (arg_1);
1780 else if (gimple_code (SSA_NAME_DEF_STMT (arg_1)) == GIMPLE_PHI)
1782 lhs = arg_0;
1783 header_phi = SSA_NAME_DEF_STMT (arg_1);
1784 stmt = SSA_NAME_DEF_STMT (arg_0);
1786 else
1787 return false;
1788 if (gimple_bb (header_phi) != loop->header)
1789 return false;
1791 if (PHI_ARG_DEF_FROM_EDGE (header_phi, latch_e) != PHI_RESULT (phi))
1792 return false;
1794 if (gimple_code (stmt) != GIMPLE_ASSIGN
1795 || gimple_has_volatile_ops (stmt))
1796 return false;
1798 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
1799 return false;
1801 if (!is_predicated (gimple_bb (stmt)))
1802 return false;
1804 /* Check that stmt-block is predecessor of phi-block. */
1805 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->succs)
1806 if (e->dest == bb)
1808 result = true;
1809 break;
1811 if (!result)
1812 return false;
1814 if (!has_single_use (lhs))
1815 return false;
1817 reduction_op = gimple_assign_rhs_code (stmt);
1819 /* Catch something like below
1821 loop-header:
1822 reduc_1 = PHI <..., reduc_2>
1824 if (...)
1825 tmp1 = (unsigned type) reduc_1;
1826 tmp2 = tmp1 + rhs2;
1827 reduc_3 = (signed type) tmp2;
1829 reduc_2 = PHI <reduc_1, reduc_3>
1831 and convert to
1833 reduc_2 = PHI <0, reduc_1>
1834 tmp1 = (unsigned type)reduc_1;
1835 ifcvt = cond_expr ? rhs2 : 0
1836 tmp2 = tmp1 +/- ifcvt;
1837 reduc_1 = (signed type)tmp2; */
1839 if (CONVERT_EXPR_CODE_P (reduction_op))
1841 lhs = gimple_assign_rhs1 (stmt);
1842 if (TREE_CODE (lhs) != SSA_NAME
1843 || !has_single_use (lhs))
1844 return false;
1846 *nop_reduc = stmt;
1847 stmt = SSA_NAME_DEF_STMT (lhs);
1848 if (gimple_bb (stmt) != gimple_bb (*nop_reduc)
1849 || !is_gimple_assign (stmt))
1850 return false;
1852 *has_nop = true;
1853 reduction_op = gimple_assign_rhs_code (stmt);
1856 if (reduction_op != PLUS_EXPR
1857 && reduction_op != MINUS_EXPR
1858 && reduction_op != MULT_EXPR
1859 && reduction_op != BIT_IOR_EXPR
1860 && reduction_op != BIT_XOR_EXPR
1861 && reduction_op != BIT_AND_EXPR)
1862 return false;
1863 r_op1 = gimple_assign_rhs1 (stmt);
1864 r_op2 = gimple_assign_rhs2 (stmt);
1866 r_nop1 = strip_nop_cond_scalar_reduction (*has_nop, r_op1);
1867 r_nop2 = strip_nop_cond_scalar_reduction (*has_nop, r_op2);
1869 /* Make R_OP1 to hold reduction variable. */
1870 if (r_nop2 == PHI_RESULT (header_phi)
1871 && commutative_tree_code (reduction_op))
1873 std::swap (r_op1, r_op2);
1874 std::swap (r_nop1, r_nop2);
1876 else if (r_nop1 != PHI_RESULT (header_phi))
1877 return false;
1879 if (*has_nop)
1881 /* Check that R_NOP1 is used in nop_stmt or in PHI only. */
1882 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_nop1)
1884 gimple *use_stmt = USE_STMT (use_p);
1885 if (is_gimple_debug (use_stmt))
1886 continue;
1887 if (use_stmt == SSA_NAME_DEF_STMT (r_op1))
1888 continue;
1889 if (use_stmt != phi)
1890 return false;
1894 /* Check that R_OP1 is used in reduction stmt or in PHI only. */
1895 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_op1)
1897 gimple *use_stmt = USE_STMT (use_p);
1898 if (is_gimple_debug (use_stmt))
1899 continue;
1900 if (use_stmt == stmt)
1901 continue;
1902 if (gimple_code (use_stmt) != GIMPLE_PHI)
1903 return false;
1906 *op0 = r_op1; *op1 = r_op2;
1907 *reduc = stmt;
1908 return true;
1911 /* Converts conditional scalar reduction into unconditional form, e.g.
1912 bb_4
1913 if (_5 != 0) goto bb_5 else goto bb_6
1914 end_bb_4
1915 bb_5
1916 res_6 = res_13 + 1;
1917 end_bb_5
1918 bb_6
1919 # res_2 = PHI <res_13(4), res_6(5)>
1920 end_bb_6
1922 will be converted into sequence
1923 _ifc__1 = _5 != 0 ? 1 : 0;
1924 res_2 = res_13 + _ifc__1;
1925 Argument SWAP tells that arguments of conditional expression should be
1926 swapped.
1927 If LOOP_VERSIONED is true if we assume that we versioned the loop for
1928 vectorization. In that case we can create a COND_OP.
1929 Returns rhs of resulting PHI assignment. */
1931 static tree
1932 convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi,
1933 tree cond, tree op0, tree op1, bool swap,
1934 bool has_nop, gimple* nop_reduc,
1935 bool loop_versioned)
1937 gimple_stmt_iterator stmt_it;
1938 gimple *new_assign;
1939 tree rhs;
1940 tree rhs1 = gimple_assign_rhs1 (reduc);
1941 tree lhs = gimple_assign_lhs (reduc);
1942 tree tmp = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, "_ifc_");
1943 tree c;
1944 enum tree_code reduction_op = gimple_assign_rhs_code (reduc);
1945 tree op_nochange = neutral_op_for_reduction (TREE_TYPE (rhs1), reduction_op,
1946 NULL, false);
1947 gimple_seq stmts = NULL;
1949 if (dump_file && (dump_flags & TDF_DETAILS))
1951 fprintf (dump_file, "Found cond scalar reduction.\n");
1952 print_gimple_stmt (dump_file, reduc, 0, TDF_SLIM);
1955 /* If possible create a COND_OP instead of a COND_EXPR and an OP_EXPR.
1956 The COND_OP will have a neutral_op else value. */
1957 internal_fn ifn;
1958 ifn = get_conditional_internal_fn (reduction_op);
1959 if (loop_versioned && ifn != IFN_LAST
1960 && vectorized_internal_fn_supported_p (ifn, TREE_TYPE (lhs))
1961 && !swap)
1963 gcall *cond_call = gimple_build_call_internal (ifn, 4,
1964 unshare_expr (cond),
1965 op0, op1, op0);
1966 gsi_insert_before (gsi, cond_call, GSI_SAME_STMT);
1967 gimple_call_set_lhs (cond_call, tmp);
1968 rhs = tmp;
1970 else
1972 /* Build cond expression using COND and constant operand
1973 of reduction rhs. */
1974 c = fold_build_cond_expr (TREE_TYPE (rhs1),
1975 unshare_expr (cond),
1976 swap ? op_nochange : op1,
1977 swap ? op1 : op_nochange);
1978 /* Create assignment stmt and insert it at GSI. */
1979 new_assign = gimple_build_assign (tmp, c);
1980 gsi_insert_before (gsi, new_assign, GSI_SAME_STMT);
1981 /* Build rhs for unconditional increment/decrement/logic_operation. */
1982 rhs = gimple_build (&stmts, reduction_op,
1983 TREE_TYPE (rhs1), op0, tmp);
1986 if (has_nop)
1988 rhs = gimple_convert (&stmts,
1989 TREE_TYPE (gimple_assign_lhs (nop_reduc)), rhs);
1990 stmt_it = gsi_for_stmt (nop_reduc);
1991 gsi_remove (&stmt_it, true);
1992 release_defs (nop_reduc);
1994 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
1996 /* Delete original reduction stmt. */
1997 stmt_it = gsi_for_stmt (reduc);
1998 gsi_remove (&stmt_it, true);
1999 release_defs (reduc);
2000 return rhs;
2003 /* Generate a simplified conditional. */
2005 static tree
2006 gen_simplified_condition (tree cond, scalar_cond_masked_set_type &cond_set)
2008 /* Check if the value is already live in a previous branch. This resolves
2009 nested conditionals from diamond PHI reductions. */
2010 if (TREE_CODE (cond) == SSA_NAME)
2012 gimple *stmt = SSA_NAME_DEF_STMT (cond);
2013 gassign *assign = NULL;
2014 if ((assign = as_a <gassign *> (stmt))
2015 && gimple_assign_rhs_code (assign) == BIT_AND_EXPR)
2017 tree arg1 = gimple_assign_rhs1 (assign);
2018 tree arg2 = gimple_assign_rhs2 (assign);
2019 if (cond_set.contains ({ arg1, 1 }))
2020 arg1 = boolean_true_node;
2021 else
2022 arg1 = gen_simplified_condition (arg1, cond_set);
2024 if (cond_set.contains ({ arg2, 1 }))
2025 arg2 = boolean_true_node;
2026 else
2027 arg2 = gen_simplified_condition (arg2, cond_set);
2029 cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, arg1, arg2);
2032 return cond;
2035 /* Structure used to track meta-data on PHI arguments used to generate
2036 most efficient comparison sequence to slatten a PHI node. */
2038 typedef struct ifcvt_arg_entry
2040 /* The PHI node argument value. */
2041 tree arg;
2043 /* The number of compares required to reach this PHI node from start of the
2044 BB being if-converted. */
2045 unsigned num_compares;
2047 /* The number of times this PHI node argument appears in the current PHI
2048 node. */
2049 unsigned occurs;
2051 /* The indices at which this PHI arg occurs inside the PHI node. */
2052 vec <int> *indexes;
2053 } ifcvt_arg_entry_t;
2055 /* Produce condition for all occurrences of ARG in PHI node. Set *INVERT
2056 as to whether the condition is inverted. */
2058 static tree
2059 gen_phi_arg_condition (gphi *phi, ifcvt_arg_entry_t &arg,
2060 gimple_stmt_iterator *gsi,
2061 scalar_cond_masked_set_type &cond_set, bool *invert)
2063 int len;
2064 int i;
2065 tree cond = NULL_TREE;
2066 tree c;
2067 edge e;
2069 *invert = false;
2070 len = arg.indexes->length ();
2071 gcc_assert (len > 0);
2072 for (i = 0; i < len; i++)
2074 e = gimple_phi_arg_edge (phi, (*arg.indexes)[i]);
2075 c = bb_predicate (e->src);
2076 if (is_true_predicate (c))
2078 cond = c;
2079 break;
2081 /* If we have just a single inverted predicate, signal that and
2082 instead invert the COND_EXPR arms. */
2083 if (len == 1 && TREE_CODE (c) == TRUTH_NOT_EXPR)
2085 c = TREE_OPERAND (c, 0);
2086 *invert = true;
2089 c = gen_simplified_condition (c, cond_set);
2090 c = force_gimple_operand_gsi (gsi, unshare_expr (c),
2091 true, NULL_TREE, true, GSI_SAME_STMT);
2092 if (cond != NULL_TREE)
2094 /* Must build OR expression. */
2095 cond = fold_or_predicates (EXPR_LOCATION (c), c, cond);
2096 cond = force_gimple_operand_gsi (gsi, unshare_expr (cond), true,
2097 NULL_TREE, true, GSI_SAME_STMT);
2099 else
2100 cond = c;
2102 /* Register the new possibly simplified conditional. When more than 2
2103 entries in a phi node we chain entries in the false branch, so the
2104 inverted condition is active. */
2105 scalar_cond_masked_key pred_cond ({ cond, 1 });
2106 if (!*invert)
2107 pred_cond.inverted_p = !pred_cond.inverted_p;
2108 cond_set.add (pred_cond);
2110 gcc_assert (cond != NULL_TREE);
2111 return cond;
2114 /* Create the smallest nested conditional possible. On pre-order we record
2115 which conditionals are live, and on post-order rewrite the chain by removing
2116 already active conditions.
2118 As an example we simplify:
2120 _7 = a_10 < 0;
2121 _21 = a_10 >= 0;
2122 _22 = a_10 < e_11(D);
2123 _23 = _21 & _22;
2124 _ifc__42 = _23 ? t_13 : 0;
2125 t_6 = _7 ? 1 : _ifc__42
2127 into
2129 _7 = a_10 < 0;
2130 _22 = a_10 < e_11(D);
2131 _ifc__42 = _22 ? t_13 : 0;
2132 t_6 = _7 ? 1 : _ifc__42;
2134 which produces better code. */
2136 static tree
2137 gen_phi_nest_statement (gphi *phi, gimple_stmt_iterator *gsi,
2138 scalar_cond_masked_set_type &cond_set, tree type,
2139 gimple **res_stmt, tree lhs0,
2140 vec<struct ifcvt_arg_entry> &args, unsigned idx)
2142 if (idx == args.length ())
2143 return args[idx - 1].arg;
2145 bool invert;
2146 tree cond = gen_phi_arg_condition (phi, args[idx - 1], gsi, cond_set,
2147 &invert);
2148 tree arg1 = gen_phi_nest_statement (phi, gsi, cond_set, type, res_stmt, lhs0,
2149 args, idx + 1);
2151 unsigned prev = idx;
2152 unsigned curr = prev - 1;
2153 tree arg0 = args[curr].arg;
2154 tree rhs, lhs;
2155 if (idx > 1)
2156 lhs = make_temp_ssa_name (type, NULL, "_ifc_");
2157 else
2158 lhs = lhs0;
2160 if (invert)
2161 rhs = fold_build_cond_expr (type, unshare_expr (cond),
2162 arg1, arg0);
2163 else
2164 rhs = fold_build_cond_expr (type, unshare_expr (cond),
2165 arg0, arg1);
2166 gassign *new_stmt = gimple_build_assign (lhs, rhs);
2167 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2168 update_stmt (new_stmt);
2169 *res_stmt = new_stmt;
2170 return lhs;
2173 /* When flattening a PHI node we have a choice of which conditions to test to
2174 for all the paths from the start of the dominator block of the BB with the
2175 PHI node. If the PHI node has X arguments we have to only test X - 1
2176 conditions as the last one is implicit. It does matter which conditions we
2177 test first. We should test the shortest condition first (distance here is
2178 measures in the number of logical operators in the condition) and the
2179 longest one last. This allows us to skip testing the most expensive
2180 condition. To accomplish this we need to sort the conditions. P1 and P2
2181 are sorted first based on the number of logical operations (num_compares)
2182 and then by how often they occur in the PHI node. */
2184 static int
2185 cmp_arg_entry (const void *p1, const void *p2, void * /* data. */)
2187 const ifcvt_arg_entry sval1 = *(const ifcvt_arg_entry *)p1;
2188 const ifcvt_arg_entry sval2 = *(const ifcvt_arg_entry *)p2;
2190 if (sval1.num_compares < sval2.num_compares)
2191 return -1;
2192 else if (sval1.num_compares > sval2.num_compares)
2193 return 1;
2195 if (sval1.occurs < sval2.occurs)
2196 return -1;
2197 else if (sval1.occurs > sval2.occurs)
2198 return 1;
2200 return 0;
2203 /* Replace a scalar PHI node with a COND_EXPR using COND as condition.
2204 This routine can handle PHI nodes with more than two arguments.
2206 For example,
2207 S1: A = PHI <x1(1), x2(5)>
2208 is converted into,
2209 S2: A = cond ? x1 : x2;
2211 The generated code is inserted at GSI that points to the top of
2212 basic block's statement list.
2213 If PHI node has more than two arguments a chain of conditional
2214 expression is produced.
2215 LOOP_VERSIONED should be true if we know that the loop was versioned for
2216 vectorization. */
2219 static void
2220 predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi, bool loop_versioned)
2222 gimple *new_stmt = NULL, *reduc, *nop_reduc;
2223 tree rhs, res, arg0, arg1, op0, op1, scev;
2224 tree cond;
2225 unsigned int index0;
2226 edge e;
2227 basic_block bb;
2228 unsigned int i;
2229 bool has_nop;
2231 res = gimple_phi_result (phi);
2232 if (virtual_operand_p (res))
2233 return;
2235 if ((rhs = degenerate_phi_result (phi))
2236 || ((scev = analyze_scalar_evolution (gimple_bb (phi)->loop_father,
2237 res))
2238 && !chrec_contains_undetermined (scev)
2239 && scev != res
2240 && (rhs = gimple_phi_arg_def (phi, 0))))
2242 if (dump_file && (dump_flags & TDF_DETAILS))
2244 fprintf (dump_file, "Degenerate phi!\n");
2245 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
2247 new_stmt = gimple_build_assign (res, rhs);
2248 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2249 update_stmt (new_stmt);
2250 return;
2253 bb = gimple_bb (phi);
2254 /* Keep track of conditionals already seen. */
2255 scalar_cond_masked_set_type cond_set;
2256 if (EDGE_COUNT (bb->preds) == 2)
2258 /* Predicate ordinary PHI node with 2 arguments. */
2259 edge first_edge, second_edge;
2260 basic_block true_bb;
2261 first_edge = EDGE_PRED (bb, 0);
2262 second_edge = EDGE_PRED (bb, 1);
2263 cond = bb_predicate (first_edge->src);
2264 cond_set.add ({ cond, 1 });
2265 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2266 std::swap (first_edge, second_edge);
2267 if (EDGE_COUNT (first_edge->src->succs) > 1)
2269 cond = bb_predicate (second_edge->src);
2270 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2271 cond = TREE_OPERAND (cond, 0);
2272 else
2273 first_edge = second_edge;
2275 else
2276 cond = bb_predicate (first_edge->src);
2278 /* Gimplify the condition to a valid cond-expr conditonal operand. */
2279 cond = gen_simplified_condition (cond, cond_set);
2280 cond = force_gimple_operand_gsi (gsi, unshare_expr (cond), true,
2281 NULL_TREE, true, GSI_SAME_STMT);
2282 true_bb = first_edge->src;
2283 if (EDGE_PRED (bb, 1)->src == true_bb)
2285 arg0 = gimple_phi_arg_def (phi, 1);
2286 arg1 = gimple_phi_arg_def (phi, 0);
2288 else
2290 arg0 = gimple_phi_arg_def (phi, 0);
2291 arg1 = gimple_phi_arg_def (phi, 1);
2293 if (is_cond_scalar_reduction (phi, &reduc, arg0, arg1,
2294 &op0, &op1, false, &has_nop,
2295 &nop_reduc))
2297 /* Convert reduction stmt into vectorizable form. */
2298 rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
2299 true_bb != gimple_bb (reduc),
2300 has_nop, nop_reduc,
2301 loop_versioned);
2302 redundant_ssa_names.safe_push (std::make_pair (res, rhs));
2304 else
2305 /* Build new RHS using selected condition and arguments. */
2306 rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
2307 arg0, arg1);
2308 new_stmt = gimple_build_assign (res, rhs);
2309 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2310 gimple_stmt_iterator new_gsi = gsi_for_stmt (new_stmt);
2311 if (fold_stmt (&new_gsi, follow_all_ssa_edges))
2313 new_stmt = gsi_stmt (new_gsi);
2314 update_stmt (new_stmt);
2317 if (dump_file && (dump_flags & TDF_DETAILS))
2319 fprintf (dump_file, "new phi replacement stmt\n");
2320 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
2322 return;
2325 /* Create hashmap for PHI node which contain vector of argument indexes
2326 having the same value. */
2327 bool swap = false;
2328 hash_map<tree_operand_hash, auto_vec<int> > phi_arg_map;
2329 unsigned int num_args = gimple_phi_num_args (phi);
2330 /* Vector of different PHI argument values. */
2331 auto_vec<ifcvt_arg_entry_t> args;
2333 /* Compute phi_arg_map, determine the list of unique PHI args and the indices
2334 where they are in the PHI node. The indices will be used to determine
2335 the conditions to apply and their complexity. */
2336 for (i = 0; i < num_args; i++)
2338 tree arg;
2340 arg = gimple_phi_arg_def (phi, i);
2341 if (!phi_arg_map.get (arg))
2342 args.safe_push ({ arg, 0, 0, NULL });
2343 phi_arg_map.get_or_insert (arg).safe_push (i);
2346 /* Determine element with max number of occurrences and complexity. Looking
2347 at only number of occurrences as a measure for complexity isn't enough as
2348 all usages can be unique but the comparisons to reach the PHI node differ
2349 per branch. */
2350 for (unsigned i = 0; i < args.length (); i++)
2352 unsigned int len = 0;
2353 vec<int> *indices = phi_arg_map.get (args[i].arg);
2354 for (int index : *indices)
2356 edge e = gimple_phi_arg_edge (phi, index);
2357 len += get_bb_num_predicate_stmts (e->src);
2360 unsigned occur = indices->length ();
2361 if (dump_file && (dump_flags & TDF_DETAILS))
2362 fprintf (dump_file, "Ranking %d as len=%d, idx=%d\n", i, len, occur);
2363 args[i].num_compares = len;
2364 args[i].occurs = occur;
2365 args[i].indexes = indices;
2368 /* Sort elements based on rankings ARGS. */
2369 args.stablesort (cmp_arg_entry, NULL);
2371 /* Handle one special case when number of arguments with different values
2372 is equal 2 and one argument has the only occurrence. Such PHI can be
2373 handled as if would have only 2 arguments. */
2374 if (args.length () == 2
2375 && args[0].indexes->length () == 1)
2377 index0 = (*args[0].indexes)[0];
2378 arg0 = args[0].arg;
2379 arg1 = args[1].arg;
2380 e = gimple_phi_arg_edge (phi, index0);
2381 cond = bb_predicate (e->src);
2382 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2384 swap = true;
2385 cond = TREE_OPERAND (cond, 0);
2387 /* Gimplify the condition to a valid cond-expr conditonal operand. */
2388 cond = force_gimple_operand_gsi (gsi, unshare_expr (cond), true,
2389 NULL_TREE, true, GSI_SAME_STMT);
2390 if (!(is_cond_scalar_reduction (phi, &reduc, arg0 , arg1,
2391 &op0, &op1, true, &has_nop, &nop_reduc)))
2392 rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
2393 swap ? arg1 : arg0,
2394 swap ? arg0 : arg1);
2395 else
2397 /* Convert reduction stmt into vectorizable form. */
2398 rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
2399 swap, has_nop, nop_reduc,
2400 loop_versioned);
2401 redundant_ssa_names.safe_push (std::make_pair (res, rhs));
2403 new_stmt = gimple_build_assign (res, rhs);
2404 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2405 update_stmt (new_stmt);
2407 else
2409 /* Common case. */
2410 tree type = TREE_TYPE (gimple_phi_result (phi));
2411 gen_phi_nest_statement (phi, gsi, cond_set, type, &new_stmt, res,
2412 args, 1);
2415 if (dump_file && (dump_flags & TDF_DETAILS))
2417 fprintf (dump_file, "new extended phi replacement stmt\n");
2418 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
2422 /* Replaces in LOOP all the scalar phi nodes other than those in the
2423 LOOP->header block with conditional modify expressions.
2424 LOOP_VERSIONED should be true if we know that the loop was versioned for
2425 vectorization. */
2427 static void
2428 predicate_all_scalar_phis (class loop *loop, bool loop_versioned)
2430 basic_block bb;
2431 unsigned int orig_loop_num_nodes = loop->num_nodes;
2432 unsigned int i;
2434 for (i = 1; i < orig_loop_num_nodes; i++)
2436 gphi *phi;
2437 gimple_stmt_iterator gsi;
2438 gphi_iterator phi_gsi;
2439 bb = ifc_bbs[i];
2441 if (bb == loop->header)
2442 continue;
2444 phi_gsi = gsi_start_phis (bb);
2445 if (gsi_end_p (phi_gsi))
2446 continue;
2448 gsi = gsi_after_labels (bb);
2449 while (!gsi_end_p (phi_gsi))
2451 phi = phi_gsi.phi ();
2452 if (virtual_operand_p (gimple_phi_result (phi)))
2453 gsi_next (&phi_gsi);
2454 else
2456 predicate_scalar_phi (phi, &gsi, loop_versioned);
2457 remove_phi_node (&phi_gsi, false);
2463 /* Insert in each basic block of LOOP the statements produced by the
2464 gimplification of the predicates. */
2466 static void
2467 insert_gimplified_predicates (loop_p loop)
2469 unsigned int i;
2471 for (i = 0; i < loop->num_nodes; i++)
2473 basic_block bb = ifc_bbs[i];
2474 gimple_seq stmts;
2475 if (!is_predicated (bb))
2476 gcc_assert (bb_predicate_gimplified_stmts (bb) == NULL);
2477 if (!is_predicated (bb))
2479 /* Do not insert statements for a basic block that is not
2480 predicated. Also make sure that the predicate of the
2481 basic block is set to true. */
2482 reset_bb_predicate (bb);
2483 continue;
2486 stmts = bb_predicate_gimplified_stmts (bb);
2487 if (stmts)
2489 if (need_to_predicate)
2491 /* Insert the predicate of the BB just after the label,
2492 as the if-conversion of memory writes will use this
2493 predicate. */
2494 gimple_stmt_iterator gsi = gsi_after_labels (bb);
2495 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2497 else
2499 /* Insert the predicate of the BB at the end of the BB
2500 as this would reduce the register pressure: the only
2501 use of this predicate will be in successor BBs. */
2502 gimple_stmt_iterator gsi = gsi_last_bb (bb);
2504 if (gsi_end_p (gsi)
2505 || stmt_ends_bb_p (gsi_stmt (gsi)))
2506 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2507 else
2508 gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
2511 /* Once the sequence is code generated, set it to NULL. */
2512 set_bb_predicate_gimplified_stmts (bb, NULL, true);
2517 /* Helper function for predicate_statements. Returns index of existent
2518 mask if it was created for given SIZE and -1 otherwise. */
2520 static int
2521 mask_exists (int size, const vec<int> &vec)
2523 unsigned int ix;
2524 int v;
2525 FOR_EACH_VEC_ELT (vec, ix, v)
2526 if (v == size)
2527 return (int) ix;
2528 return -1;
2531 /* Helper function for predicate_statements. STMT is a memory read or
2532 write and it needs to be predicated by MASK. Return a statement
2533 that does so. */
2535 static gimple *
2536 predicate_load_or_store (gimple_stmt_iterator *gsi, gassign *stmt, tree mask)
2538 gcall *new_stmt;
2540 tree lhs = gimple_assign_lhs (stmt);
2541 tree rhs = gimple_assign_rhs1 (stmt);
2542 tree ref = TREE_CODE (lhs) == SSA_NAME ? rhs : lhs;
2543 mark_addressable (ref);
2544 tree addr = force_gimple_operand_gsi (gsi, build_fold_addr_expr (ref),
2545 true, NULL_TREE, true, GSI_SAME_STMT);
2546 tree ptr = build_int_cst (reference_alias_ptr_type (ref),
2547 get_object_alignment (ref));
2548 /* Copy points-to info if possible. */
2549 if (TREE_CODE (addr) == SSA_NAME && !SSA_NAME_PTR_INFO (addr))
2550 copy_ref_info (build2 (MEM_REF, TREE_TYPE (ref), addr, ptr),
2551 ref);
2552 if (TREE_CODE (lhs) == SSA_NAME)
2554 new_stmt
2555 = gimple_build_call_internal (IFN_MASK_LOAD, 3, addr,
2556 ptr, mask);
2557 gimple_call_set_lhs (new_stmt, lhs);
2558 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
2560 else
2562 new_stmt
2563 = gimple_build_call_internal (IFN_MASK_STORE, 4, addr, ptr,
2564 mask, rhs);
2565 gimple_move_vops (new_stmt, stmt);
2567 gimple_call_set_nothrow (new_stmt, true);
2568 return new_stmt;
2571 /* STMT uses OP_LHS. Check whether it is equivalent to:
2573 ... = OP_MASK ? OP_LHS : X;
2575 Return X if so, otherwise return null. OP_MASK is an SSA_NAME that is
2576 known to have value OP_COND. */
2578 static tree
2579 check_redundant_cond_expr (gimple *stmt, tree op_mask, tree op_cond,
2580 tree op_lhs)
2582 gassign *assign = dyn_cast <gassign *> (stmt);
2583 if (!assign || gimple_assign_rhs_code (assign) != COND_EXPR)
2584 return NULL_TREE;
2586 tree use_cond = gimple_assign_rhs1 (assign);
2587 tree if_true = gimple_assign_rhs2 (assign);
2588 tree if_false = gimple_assign_rhs3 (assign);
2590 if ((use_cond == op_mask || operand_equal_p (use_cond, op_cond, 0))
2591 && if_true == op_lhs)
2592 return if_false;
2594 if (inverse_conditions_p (use_cond, op_cond) && if_false == op_lhs)
2595 return if_true;
2597 return NULL_TREE;
2600 /* Return true if VALUE is available for use at STMT. SSA_NAMES is
2601 the set of SSA names defined earlier in STMT's block. */
2603 static bool
2604 value_available_p (gimple *stmt, hash_set<tree_ssa_name_hash> *ssa_names,
2605 tree value)
2607 if (is_gimple_min_invariant (value))
2608 return true;
2610 if (TREE_CODE (value) == SSA_NAME)
2612 if (SSA_NAME_IS_DEFAULT_DEF (value))
2613 return true;
2615 basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (value));
2616 basic_block use_bb = gimple_bb (stmt);
2617 return (def_bb == use_bb
2618 ? ssa_names->contains (value)
2619 : dominated_by_p (CDI_DOMINATORS, use_bb, def_bb));
2622 return false;
2625 /* Helper function for predicate_statements. STMT is a potentially-trapping
2626 arithmetic operation that needs to be predicated by MASK, an SSA_NAME that
2627 has value COND. Return a statement that does so. SSA_NAMES is the set of
2628 SSA names defined earlier in STMT's block. */
2630 static gimple *
2631 predicate_rhs_code (gassign *stmt, tree mask, tree cond,
2632 hash_set<tree_ssa_name_hash> *ssa_names)
2634 tree lhs = gimple_assign_lhs (stmt);
2635 tree_code code = gimple_assign_rhs_code (stmt);
2636 unsigned int nops = gimple_num_ops (stmt);
2637 internal_fn cond_fn = get_conditional_internal_fn (code);
2639 /* Construct the arguments to the conditional internal function. */
2640 auto_vec<tree, 8> args;
2641 args.safe_grow (nops + 1, true);
2642 args[0] = mask;
2643 for (unsigned int i = 1; i < nops; ++i)
2644 args[i] = gimple_op (stmt, i);
2645 args[nops] = NULL_TREE;
2647 /* Look for uses of the result to see whether they are COND_EXPRs that can
2648 be folded into the conditional call. */
2649 imm_use_iterator imm_iter;
2650 gimple *use_stmt;
2651 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs)
2653 tree new_else = check_redundant_cond_expr (use_stmt, mask, cond, lhs);
2654 if (new_else && value_available_p (stmt, ssa_names, new_else))
2656 if (!args[nops])
2657 args[nops] = new_else;
2658 if (operand_equal_p (new_else, args[nops], 0))
2660 /* We have:
2662 LHS = IFN_COND (MASK, ..., ELSE);
2663 X = MASK ? LHS : ELSE;
2665 which makes X equivalent to LHS. */
2666 tree use_lhs = gimple_assign_lhs (use_stmt);
2667 redundant_ssa_names.safe_push (std::make_pair (use_lhs, lhs));
2671 if (!args[nops])
2672 args[nops] = targetm.preferred_else_value (cond_fn, TREE_TYPE (lhs),
2673 nops - 1, &args[1]);
2675 /* Create and insert the call. */
2676 gcall *new_stmt = gimple_build_call_internal_vec (cond_fn, args);
2677 gimple_call_set_lhs (new_stmt, lhs);
2678 gimple_call_set_nothrow (new_stmt, true);
2680 return new_stmt;
2683 /* Predicate each write to memory in LOOP.
2685 This function transforms control flow constructs containing memory
2686 writes of the form:
2688 | for (i = 0; i < N; i++)
2689 | if (cond)
2690 | A[i] = expr;
2692 into the following form that does not contain control flow:
2694 | for (i = 0; i < N; i++)
2695 | A[i] = cond ? expr : A[i];
2697 The original CFG looks like this:
2699 | bb_0
2700 | i = 0
2701 | end_bb_0
2703 | bb_1
2704 | if (i < N) goto bb_5 else goto bb_2
2705 | end_bb_1
2707 | bb_2
2708 | cond = some_computation;
2709 | if (cond) goto bb_3 else goto bb_4
2710 | end_bb_2
2712 | bb_3
2713 | A[i] = expr;
2714 | goto bb_4
2715 | end_bb_3
2717 | bb_4
2718 | goto bb_1
2719 | end_bb_4
2721 insert_gimplified_predicates inserts the computation of the COND
2722 expression at the beginning of the destination basic block:
2724 | bb_0
2725 | i = 0
2726 | end_bb_0
2728 | bb_1
2729 | if (i < N) goto bb_5 else goto bb_2
2730 | end_bb_1
2732 | bb_2
2733 | cond = some_computation;
2734 | if (cond) goto bb_3 else goto bb_4
2735 | end_bb_2
2737 | bb_3
2738 | cond = some_computation;
2739 | A[i] = expr;
2740 | goto bb_4
2741 | end_bb_3
2743 | bb_4
2744 | goto bb_1
2745 | end_bb_4
2747 predicate_statements is then predicating the memory write as follows:
2749 | bb_0
2750 | i = 0
2751 | end_bb_0
2753 | bb_1
2754 | if (i < N) goto bb_5 else goto bb_2
2755 | end_bb_1
2757 | bb_2
2758 | if (cond) goto bb_3 else goto bb_4
2759 | end_bb_2
2761 | bb_3
2762 | cond = some_computation;
2763 | A[i] = cond ? expr : A[i];
2764 | goto bb_4
2765 | end_bb_3
2767 | bb_4
2768 | goto bb_1
2769 | end_bb_4
2771 and finally combine_blocks removes the basic block boundaries making
2772 the loop vectorizable:
2774 | bb_0
2775 | i = 0
2776 | if (i < N) goto bb_5 else goto bb_1
2777 | end_bb_0
2779 | bb_1
2780 | cond = some_computation;
2781 | A[i] = cond ? expr : A[i];
2782 | if (i < N) goto bb_5 else goto bb_4
2783 | end_bb_1
2785 | bb_4
2786 | goto bb_1
2787 | end_bb_4
2790 static void
2791 predicate_statements (loop_p loop)
2793 unsigned int i, orig_loop_num_nodes = loop->num_nodes;
2794 auto_vec<int, 1> vect_sizes;
2795 auto_vec<tree, 1> vect_masks;
2796 hash_set<tree_ssa_name_hash> ssa_names;
2798 for (i = 1; i < orig_loop_num_nodes; i++)
2800 gimple_stmt_iterator gsi;
2801 basic_block bb = ifc_bbs[i];
2802 tree cond = bb_predicate (bb);
2803 bool swap;
2804 int index;
2806 if (is_true_predicate (cond))
2807 continue;
2809 swap = false;
2810 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2812 swap = true;
2813 cond = TREE_OPERAND (cond, 0);
2816 vect_sizes.truncate (0);
2817 vect_masks.truncate (0);
2819 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
2821 gassign *stmt = dyn_cast <gassign *> (gsi_stmt (gsi));
2822 tree lhs;
2823 if (!stmt)
2825 else if (is_false_predicate (cond)
2826 && gimple_vdef (stmt))
2828 unlink_stmt_vdef (stmt);
2829 gsi_remove (&gsi, true);
2830 release_defs (stmt);
2831 continue;
2833 else if (gimple_plf (stmt, GF_PLF_2)
2834 && is_gimple_assign (stmt))
2836 tree lhs = gimple_assign_lhs (stmt);
2837 tree mask;
2838 gimple *new_stmt;
2839 gimple_seq stmts = NULL;
2840 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
2841 /* We checked before setting GF_PLF_2 that an equivalent
2842 integer mode exists. */
2843 int bitsize = GET_MODE_BITSIZE (mode).to_constant ();
2844 if (!vect_sizes.is_empty ()
2845 && (index = mask_exists (bitsize, vect_sizes)) != -1)
2846 /* Use created mask. */
2847 mask = vect_masks[index];
2848 else
2850 if (COMPARISON_CLASS_P (cond))
2851 mask = gimple_build (&stmts, TREE_CODE (cond),
2852 boolean_type_node,
2853 TREE_OPERAND (cond, 0),
2854 TREE_OPERAND (cond, 1));
2855 else
2856 mask = cond;
2858 if (swap)
2860 tree true_val
2861 = constant_boolean_node (true, TREE_TYPE (mask));
2862 mask = gimple_build (&stmts, BIT_XOR_EXPR,
2863 TREE_TYPE (mask), mask, true_val);
2865 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2867 /* Save mask and its size for further use. */
2868 vect_sizes.safe_push (bitsize);
2869 vect_masks.safe_push (mask);
2871 if (gimple_assign_single_p (stmt))
2872 new_stmt = predicate_load_or_store (&gsi, stmt, mask);
2873 else
2874 new_stmt = predicate_rhs_code (stmt, mask, cond, &ssa_names);
2876 gsi_replace (&gsi, new_stmt, true);
2878 else if (((lhs = gimple_assign_lhs (stmt)), true)
2879 && (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2880 || POINTER_TYPE_P (TREE_TYPE (lhs)))
2881 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs))
2882 && arith_code_with_undefined_signed_overflow
2883 (gimple_assign_rhs_code (stmt)))
2884 rewrite_to_defined_overflow (&gsi);
2885 else if (gimple_vdef (stmt))
2887 tree lhs = gimple_assign_lhs (stmt);
2888 tree rhs = gimple_assign_rhs1 (stmt);
2889 tree type = TREE_TYPE (lhs);
2891 lhs = ifc_temp_var (type, unshare_expr (lhs), &gsi);
2892 rhs = ifc_temp_var (type, unshare_expr (rhs), &gsi);
2893 if (swap)
2894 std::swap (lhs, rhs);
2895 cond = force_gimple_operand_gsi (&gsi, unshare_expr (cond), true,
2896 NULL_TREE, true, GSI_SAME_STMT);
2897 rhs = fold_build_cond_expr (type, unshare_expr (cond), rhs, lhs);
2898 gimple_assign_set_rhs1 (stmt, ifc_temp_var (type, rhs, &gsi));
2899 update_stmt (stmt);
2902 if (gimple_plf (gsi_stmt (gsi), GF_PLF_2)
2903 && is_gimple_call (gsi_stmt (gsi)))
2905 /* Convert functions that have a SIMD clone to IFN_MASK_CALL.
2906 This will cause the vectorizer to match the "in branch"
2907 clone variants, and serves to build the mask vector
2908 in a natural way. */
2909 gcall *call = dyn_cast <gcall *> (gsi_stmt (gsi));
2910 tree orig_fn = gimple_call_fn (call);
2911 int orig_nargs = gimple_call_num_args (call);
2912 auto_vec<tree> args;
2913 args.safe_push (orig_fn);
2914 for (int i = 0; i < orig_nargs; i++)
2915 args.safe_push (gimple_call_arg (call, i));
2916 args.safe_push (cond);
2918 /* Replace the call with a IFN_MASK_CALL that has the extra
2919 condition parameter. */
2920 gcall *new_call = gimple_build_call_internal_vec (IFN_MASK_CALL,
2921 args);
2922 gimple_call_set_lhs (new_call, gimple_call_lhs (call));
2923 gsi_replace (&gsi, new_call, true);
2926 lhs = gimple_get_lhs (gsi_stmt (gsi));
2927 if (lhs && TREE_CODE (lhs) == SSA_NAME)
2928 ssa_names.add (lhs);
2929 gsi_next (&gsi);
2931 ssa_names.empty ();
2935 /* Remove all GIMPLE_CONDs and GIMPLE_LABELs and GIMPLE_SWITCH of all
2936 the basic blocks other than the exit and latch of the LOOP. Also
2937 resets the GIMPLE_DEBUG information. */
2939 static void
2940 remove_conditions_and_labels (loop_p loop)
2942 gimple_stmt_iterator gsi;
2943 unsigned int i;
2945 for (i = 0; i < loop->num_nodes; i++)
2947 basic_block bb = ifc_bbs[i];
2949 if (bb_with_exit_edge_p (loop, bb)
2950 || bb == loop->latch)
2951 continue;
2953 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
2954 switch (gimple_code (gsi_stmt (gsi)))
2956 case GIMPLE_COND:
2957 case GIMPLE_LABEL:
2958 case GIMPLE_SWITCH:
2959 gsi_remove (&gsi, true);
2960 break;
2962 case GIMPLE_DEBUG:
2963 /* ??? Should there be conditional GIMPLE_DEBUG_BINDs? */
2964 if (gimple_debug_bind_p (gsi_stmt (gsi)))
2966 gimple_debug_bind_reset_value (gsi_stmt (gsi));
2967 update_stmt (gsi_stmt (gsi));
2969 gsi_next (&gsi);
2970 break;
2972 default:
2973 gsi_next (&gsi);
2978 /* Combine all the basic blocks from LOOP into one or two super basic
2979 blocks. Replace PHI nodes with conditional modify expressions.
2980 LOOP_VERSIONED should be true if we know that the loop was versioned for
2981 vectorization. */
2983 static void
2984 combine_blocks (class loop *loop, bool loop_versioned)
2986 basic_block bb, exit_bb, merge_target_bb;
2987 unsigned int orig_loop_num_nodes = loop->num_nodes;
2988 unsigned int i;
2989 edge e;
2990 edge_iterator ei;
2992 /* Reset flow-sensitive info before predicating stmts or PHIs we
2993 might fold. */
2994 bool *predicated = XNEWVEC (bool, orig_loop_num_nodes);
2995 for (i = 0; i < orig_loop_num_nodes; i++)
2997 bb = ifc_bbs[i];
2998 predicated[i] = is_predicated (bb);
2999 if (predicated[i])
3001 for (auto gsi = gsi_start_phis (bb);
3002 !gsi_end_p (gsi); gsi_next (&gsi))
3003 reset_flow_sensitive_info (gimple_phi_result (*gsi));
3004 for (auto gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3006 gimple *stmt = gsi_stmt (gsi);
3007 ssa_op_iter i;
3008 tree op;
3009 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF)
3010 reset_flow_sensitive_info (op);
3015 remove_conditions_and_labels (loop);
3016 insert_gimplified_predicates (loop);
3017 predicate_all_scalar_phis (loop, loop_versioned);
3019 if (need_to_predicate || need_to_rewrite_undefined)
3020 predicate_statements (loop);
3022 /* Merge basic blocks. */
3023 exit_bb = single_exit (loop)->src;
3024 gcc_assert (exit_bb != loop->latch);
3025 for (i = 0; i < orig_loop_num_nodes; i++)
3027 bb = ifc_bbs[i];
3028 free_bb_predicate (bb);
3031 merge_target_bb = loop->header;
3033 /* Get at the virtual def valid for uses starting at the first block
3034 we merge into the header. Without a virtual PHI the loop has the
3035 same virtual use on all stmts. */
3036 gphi *vphi = get_virtual_phi (loop->header);
3037 tree last_vdef = NULL_TREE;
3038 if (vphi)
3040 last_vdef = gimple_phi_result (vphi);
3041 for (gimple_stmt_iterator gsi = gsi_start_bb (loop->header);
3042 ! gsi_end_p (gsi); gsi_next (&gsi))
3043 if (gimple_vdef (gsi_stmt (gsi)))
3044 last_vdef = gimple_vdef (gsi_stmt (gsi));
3046 for (i = 1; i < orig_loop_num_nodes; i++)
3048 gimple_stmt_iterator gsi;
3049 gimple_stmt_iterator last;
3051 bb = ifc_bbs[i];
3053 if (bb == exit_bb || bb == loop->latch)
3054 continue;
3056 /* We release virtual PHIs late because we have to propagate them
3057 out using the current VUSE. The def might be the one used
3058 after the loop. */
3059 vphi = get_virtual_phi (bb);
3060 if (vphi)
3062 /* When there's just loads inside the loop a stray virtual
3063 PHI merging the uses can appear, update last_vdef from
3064 it. */
3065 if (!last_vdef)
3066 last_vdef = gimple_phi_arg_def (vphi, 0);
3067 imm_use_iterator iter;
3068 use_operand_p use_p;
3069 gimple *use_stmt;
3070 FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi))
3072 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3073 SET_USE (use_p, last_vdef);
3075 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (vphi)))
3076 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (last_vdef) = 1;
3077 gsi = gsi_for_stmt (vphi);
3078 remove_phi_node (&gsi, true);
3081 /* Make stmts member of loop->header and clear range info from all stmts
3082 in BB which is now no longer executed conditional on a predicate we
3083 could have derived it from. */
3084 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3086 gimple *stmt = gsi_stmt (gsi);
3087 gimple_set_bb (stmt, merge_target_bb);
3088 /* Update virtual operands. */
3089 if (last_vdef)
3091 use_operand_p use_p = ssa_vuse_operand (stmt);
3092 if (use_p
3093 && USE_FROM_PTR (use_p) != last_vdef)
3094 SET_USE (use_p, last_vdef);
3095 if (gimple_vdef (stmt))
3096 last_vdef = gimple_vdef (stmt);
3098 else
3099 /* If this is the first load we arrive at update last_vdef
3100 so we handle stray PHIs correctly. */
3101 last_vdef = gimple_vuse (stmt);
3104 /* Update stmt list. */
3105 last = gsi_last_bb (merge_target_bb);
3106 gsi_insert_seq_after_without_update (&last, bb_seq (bb), GSI_NEW_STMT);
3107 set_bb_seq (bb, NULL);
3110 /* Fixup virtual operands in the exit block. */
3111 if (exit_bb
3112 && exit_bb != loop->header)
3114 /* We release virtual PHIs late because we have to propagate them
3115 out using the current VUSE. The def might be the one used
3116 after the loop. */
3117 vphi = get_virtual_phi (exit_bb);
3118 if (vphi)
3120 /* When there's just loads inside the loop a stray virtual
3121 PHI merging the uses can appear, update last_vdef from
3122 it. */
3123 if (!last_vdef)
3124 last_vdef = gimple_phi_arg_def (vphi, 0);
3125 imm_use_iterator iter;
3126 use_operand_p use_p;
3127 gimple *use_stmt;
3128 FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi))
3130 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3131 SET_USE (use_p, last_vdef);
3133 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (vphi)))
3134 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (last_vdef) = 1;
3135 gimple_stmt_iterator gsi = gsi_for_stmt (vphi);
3136 remove_phi_node (&gsi, true);
3140 /* Now remove all the edges in the loop, except for those from the exit
3141 block and delete the blocks we elided. */
3142 for (i = 1; i < orig_loop_num_nodes; i++)
3144 bb = ifc_bbs[i];
3146 for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));)
3148 if (e->src == exit_bb)
3149 ei_next (&ei);
3150 else
3151 remove_edge (e);
3154 for (i = 1; i < orig_loop_num_nodes; i++)
3156 bb = ifc_bbs[i];
3158 if (bb == exit_bb || bb == loop->latch)
3159 continue;
3161 delete_basic_block (bb);
3164 /* Re-connect the exit block. */
3165 if (exit_bb != NULL)
3167 if (exit_bb != loop->header)
3169 /* Connect this node to loop header. */
3170 make_single_succ_edge (loop->header, exit_bb, EDGE_FALLTHRU);
3171 set_immediate_dominator (CDI_DOMINATORS, exit_bb, loop->header);
3174 /* Redirect non-exit edges to loop->latch. */
3175 FOR_EACH_EDGE (e, ei, exit_bb->succs)
3177 if (!loop_exit_edge_p (loop, e))
3178 redirect_edge_and_branch (e, loop->latch);
3180 set_immediate_dominator (CDI_DOMINATORS, loop->latch, exit_bb);
3182 else
3184 /* If the loop does not have an exit, reconnect header and latch. */
3185 make_edge (loop->header, loop->latch, EDGE_FALLTHRU);
3186 set_immediate_dominator (CDI_DOMINATORS, loop->latch, loop->header);
3189 /* If possible, merge loop header to the block with the exit edge.
3190 This reduces the number of basic blocks to two, to please the
3191 vectorizer that handles only loops with two nodes. */
3192 if (exit_bb
3193 && exit_bb != loop->header)
3195 if (can_merge_blocks_p (loop->header, exit_bb))
3196 merge_blocks (loop->header, exit_bb);
3199 free (ifc_bbs);
3200 ifc_bbs = NULL;
3201 free (predicated);
3204 /* Version LOOP before if-converting it; the original loop
3205 will be if-converted, the new copy of the loop will not,
3206 and the LOOP_VECTORIZED internal call will be guarding which
3207 loop to execute. The vectorizer pass will fold this
3208 internal call into either true or false.
3210 Note that this function intentionally invalidates profile. Both edges
3211 out of LOOP_VECTORIZED must have 100% probability so the profile remains
3212 consistent after the condition is folded in the vectorizer. */
3214 static class loop *
3215 version_loop_for_if_conversion (class loop *loop, vec<gimple *> *preds)
3217 basic_block cond_bb;
3218 tree cond = make_ssa_name (boolean_type_node);
3219 class loop *new_loop;
3220 gimple *g;
3221 gimple_stmt_iterator gsi;
3222 unsigned int save_length = 0;
3224 g = gimple_build_call_internal (IFN_LOOP_VECTORIZED, 2,
3225 build_int_cst (integer_type_node, loop->num),
3226 integer_zero_node);
3227 gimple_call_set_lhs (g, cond);
3229 void **saved_preds = NULL;
3230 if (any_complicated_phi || need_to_predicate)
3232 /* Save BB->aux around loop_version as that uses the same field. */
3233 save_length = loop->inner ? loop->inner->num_nodes : loop->num_nodes;
3234 saved_preds = XALLOCAVEC (void *, save_length);
3235 for (unsigned i = 0; i < save_length; i++)
3236 saved_preds[i] = ifc_bbs[i]->aux;
3239 initialize_original_copy_tables ();
3240 /* At this point we invalidate porfile confistency until IFN_LOOP_VECTORIZED
3241 is re-merged in the vectorizer. */
3242 new_loop = loop_version (loop, cond, &cond_bb,
3243 profile_probability::always (),
3244 profile_probability::always (),
3245 profile_probability::always (),
3246 profile_probability::always (), true);
3247 free_original_copy_tables ();
3249 if (any_complicated_phi || need_to_predicate)
3250 for (unsigned i = 0; i < save_length; i++)
3251 ifc_bbs[i]->aux = saved_preds[i];
3253 if (new_loop == NULL)
3254 return NULL;
3256 new_loop->dont_vectorize = true;
3257 new_loop->force_vectorize = false;
3258 gsi = gsi_last_bb (cond_bb);
3259 gimple_call_set_arg (g, 1, build_int_cst (integer_type_node, new_loop->num));
3260 if (preds)
3261 preds->safe_push (g);
3262 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3263 update_ssa (TODO_update_ssa_no_phi);
3264 return new_loop;
3267 /* Return true when LOOP satisfies the follow conditions that will
3268 allow it to be recognized by the vectorizer for outer-loop
3269 vectorization:
3270 - The loop is not the root node of the loop tree.
3271 - The loop has exactly one inner loop.
3272 - The loop has a single exit.
3273 - The loop header has a single successor, which is the inner
3274 loop header.
3275 - Each of the inner and outer loop latches have a single
3276 predecessor.
3277 - The loop exit block has a single predecessor, which is the
3278 inner loop's exit block. */
3280 static bool
3281 versionable_outer_loop_p (class loop *loop)
3283 if (!loop_outer (loop)
3284 || loop->dont_vectorize
3285 || !loop->inner
3286 || loop->inner->next
3287 || !single_exit (loop)
3288 || !single_succ_p (loop->header)
3289 || single_succ (loop->header) != loop->inner->header
3290 || !single_pred_p (loop->latch)
3291 || !single_pred_p (loop->inner->latch))
3292 return false;
3294 basic_block outer_exit = single_pred (loop->latch);
3295 basic_block inner_exit = single_pred (loop->inner->latch);
3297 if (!single_pred_p (outer_exit) || single_pred (outer_exit) != inner_exit)
3298 return false;
3300 if (dump_file)
3301 fprintf (dump_file, "Found vectorizable outer loop for versioning\n");
3303 return true;
3306 /* Performs splitting of critical edges. Skip splitting and return false
3307 if LOOP will not be converted because:
3309 - LOOP is not well formed.
3310 - LOOP has PHI with more than MAX_PHI_ARG_NUM arguments.
3312 Last restriction is valid only if AGGRESSIVE_IF_CONV is false. */
3314 static bool
3315 ifcvt_split_critical_edges (class loop *loop, bool aggressive_if_conv)
3317 basic_block *body;
3318 basic_block bb;
3319 unsigned int num = loop->num_nodes;
3320 unsigned int i;
3321 edge e;
3322 edge_iterator ei;
3323 auto_vec<edge> critical_edges;
3325 /* Loop is not well formed. */
3326 if (loop->inner)
3327 return false;
3329 body = get_loop_body (loop);
3330 for (i = 0; i < num; i++)
3332 bb = body[i];
3333 if (!aggressive_if_conv
3334 && phi_nodes (bb)
3335 && EDGE_COUNT (bb->preds) > MAX_PHI_ARG_NUM)
3337 if (dump_file && (dump_flags & TDF_DETAILS))
3338 fprintf (dump_file,
3339 "BB %d has complicated PHI with more than %u args.\n",
3340 bb->index, MAX_PHI_ARG_NUM);
3342 free (body);
3343 return false;
3345 if (bb == loop->latch || bb_with_exit_edge_p (loop, bb))
3346 continue;
3348 /* Skip basic blocks not ending with conditional branch. */
3349 if (!safe_is_a <gcond *> (*gsi_last_bb (bb))
3350 && !safe_is_a <gswitch *> (*gsi_last_bb (bb)))
3351 continue;
3353 FOR_EACH_EDGE (e, ei, bb->succs)
3354 if (EDGE_CRITICAL_P (e) && e->dest->loop_father == loop)
3355 critical_edges.safe_push (e);
3357 free (body);
3359 while (critical_edges.length () > 0)
3361 e = critical_edges.pop ();
3362 /* Don't split if bb can be predicated along non-critical edge. */
3363 if (EDGE_COUNT (e->dest->preds) > 2 || all_preds_critical_p (e->dest))
3364 split_edge (e);
3367 return true;
3370 /* Delete redundant statements produced by predication which prevents
3371 loop vectorization. */
3373 static void
3374 ifcvt_local_dce (class loop *loop)
3376 gimple *stmt;
3377 gimple *stmt1;
3378 gimple *phi;
3379 gimple_stmt_iterator gsi;
3380 auto_vec<gimple *> worklist;
3381 enum gimple_code code;
3382 use_operand_p use_p;
3383 imm_use_iterator imm_iter;
3385 /* The loop has a single BB only. */
3386 basic_block bb = loop->header;
3387 tree latch_vdef = NULL_TREE;
3389 worklist.create (64);
3390 /* Consider all phi as live statements. */
3391 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3393 phi = gsi_stmt (gsi);
3394 gimple_set_plf (phi, GF_PLF_2, true);
3395 worklist.safe_push (phi);
3396 if (virtual_operand_p (gimple_phi_result (phi)))
3397 latch_vdef = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
3399 /* Consider load/store statements, CALL and COND as live. */
3400 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3402 stmt = gsi_stmt (gsi);
3403 if (is_gimple_debug (stmt))
3405 gimple_set_plf (stmt, GF_PLF_2, true);
3406 continue;
3408 if (gimple_store_p (stmt) || gimple_assign_load_p (stmt))
3410 gimple_set_plf (stmt, GF_PLF_2, true);
3411 worklist.safe_push (stmt);
3412 continue;
3414 code = gimple_code (stmt);
3415 if (code == GIMPLE_COND || code == GIMPLE_CALL || code == GIMPLE_SWITCH)
3417 gimple_set_plf (stmt, GF_PLF_2, true);
3418 worklist.safe_push (stmt);
3419 continue;
3421 gimple_set_plf (stmt, GF_PLF_2, false);
3423 if (code == GIMPLE_ASSIGN)
3425 tree lhs = gimple_assign_lhs (stmt);
3426 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
3428 stmt1 = USE_STMT (use_p);
3429 if (!is_gimple_debug (stmt1) && gimple_bb (stmt1) != bb)
3431 gimple_set_plf (stmt, GF_PLF_2, true);
3432 worklist.safe_push (stmt);
3433 break;
3438 /* Propagate liveness through arguments of live stmt. */
3439 while (worklist.length () > 0)
3441 ssa_op_iter iter;
3442 use_operand_p use_p;
3443 tree use;
3445 stmt = worklist.pop ();
3446 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
3448 use = USE_FROM_PTR (use_p);
3449 if (TREE_CODE (use) != SSA_NAME)
3450 continue;
3451 stmt1 = SSA_NAME_DEF_STMT (use);
3452 if (gimple_bb (stmt1) != bb || gimple_plf (stmt1, GF_PLF_2))
3453 continue;
3454 gimple_set_plf (stmt1, GF_PLF_2, true);
3455 worklist.safe_push (stmt1);
3458 /* Delete dead statements. */
3459 gsi = gsi_last_bb (bb);
3460 while (!gsi_end_p (gsi))
3462 gimple_stmt_iterator gsiprev = gsi;
3463 gsi_prev (&gsiprev);
3464 stmt = gsi_stmt (gsi);
3465 if (!gimple_has_volatile_ops (stmt)
3466 && gimple_store_p (stmt)
3467 && gimple_vdef (stmt))
3469 tree lhs = gimple_get_lhs (stmt);
3470 ao_ref write;
3471 ao_ref_init (&write, lhs);
3473 if (dse_classify_store (&write, stmt, false, NULL, NULL, latch_vdef)
3474 == DSE_STORE_DEAD)
3475 delete_dead_or_redundant_assignment (&gsi, "dead");
3476 gsi = gsiprev;
3477 continue;
3480 if (gimple_plf (stmt, GF_PLF_2))
3482 gsi = gsiprev;
3483 continue;
3485 if (dump_file && (dump_flags & TDF_DETAILS))
3487 fprintf (dump_file, "Delete dead stmt in bb#%d\n", bb->index);
3488 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3490 gsi_remove (&gsi, true);
3491 release_defs (stmt);
3492 gsi = gsiprev;
3496 /* Return true if VALUE is already available on edge PE. */
3498 static bool
3499 ifcvt_available_on_edge_p (edge pe, tree value)
3501 if (is_gimple_min_invariant (value))
3502 return true;
3504 if (TREE_CODE (value) == SSA_NAME)
3506 basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (value));
3507 if (!def_bb || dominated_by_p (CDI_DOMINATORS, pe->dest, def_bb))
3508 return true;
3511 return false;
3514 /* Return true if STMT can be hoisted from if-converted loop LOOP to
3515 edge PE. */
3517 static bool
3518 ifcvt_can_hoist (class loop *loop, edge pe, gimple *stmt)
3520 if (auto *call = dyn_cast<gcall *> (stmt))
3522 if (gimple_call_internal_p (call)
3523 && internal_fn_mask_index (gimple_call_internal_fn (call)) >= 0)
3524 return false;
3526 else if (auto *assign = dyn_cast<gassign *> (stmt))
3528 if (gimple_assign_rhs_code (assign) == COND_EXPR)
3529 return false;
3531 else
3532 return false;
3534 if (gimple_has_side_effects (stmt)
3535 || gimple_could_trap_p (stmt)
3536 || stmt_could_throw_p (cfun, stmt)
3537 || gimple_vdef (stmt)
3538 || gimple_vuse (stmt))
3539 return false;
3541 int num_args = gimple_num_args (stmt);
3542 if (pe != loop_preheader_edge (loop))
3544 for (int i = 0; i < num_args; ++i)
3545 if (!ifcvt_available_on_edge_p (pe, gimple_arg (stmt, i)))
3546 return false;
3548 else
3550 for (int i = 0; i < num_args; ++i)
3551 if (!expr_invariant_in_loop_p (loop, gimple_arg (stmt, i)))
3552 return false;
3555 return true;
3558 /* Hoist invariant statements from LOOP to edge PE. */
3560 static void
3561 ifcvt_hoist_invariants (class loop *loop, edge pe)
3563 /* Only hoist from the now unconditionally executed part of the loop. */
3564 basic_block bb = loop->header;
3565 gimple_stmt_iterator hoist_gsi = {};
3566 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
3568 gimple *stmt = gsi_stmt (gsi);
3569 if (ifcvt_can_hoist (loop, pe, stmt))
3571 /* Once we've hoisted one statement, insert other statements
3572 after it. */
3573 gsi_remove (&gsi, false);
3574 if (hoist_gsi.ptr)
3575 gsi_insert_after (&hoist_gsi, stmt, GSI_NEW_STMT);
3576 else
3578 gsi_insert_on_edge_immediate (pe, stmt);
3579 hoist_gsi = gsi_for_stmt (stmt);
3581 continue;
3583 gsi_next (&gsi);
3587 /* Returns the DECL_FIELD_BIT_OFFSET of the bitfield accesse in stmt iff its
3588 type mode is not BLKmode. If BITPOS is not NULL it will hold the poly_int64
3589 value of the DECL_FIELD_BIT_OFFSET of the bitfield access and STRUCT_EXPR,
3590 if not NULL, will hold the tree representing the base struct of this
3591 bitfield. */
3593 static tree
3594 get_bitfield_rep (gassign *stmt, bool write, tree *bitpos,
3595 tree *struct_expr)
3597 tree comp_ref = write ? gimple_assign_lhs (stmt)
3598 : gimple_assign_rhs1 (stmt);
3600 tree field_decl = TREE_OPERAND (comp_ref, 1);
3601 tree ref_offset = component_ref_field_offset (comp_ref);
3602 tree rep_decl = DECL_BIT_FIELD_REPRESENTATIVE (field_decl);
3604 /* Bail out if the representative is not a suitable type for a scalar
3605 register variable. */
3606 if (!is_gimple_reg_type (TREE_TYPE (rep_decl)))
3607 return NULL_TREE;
3609 /* Bail out if the DECL_SIZE of the field_decl isn't the same as the BF's
3610 precision. */
3611 unsigned HOST_WIDE_INT bf_prec
3612 = TYPE_PRECISION (TREE_TYPE (gimple_assign_lhs (stmt)));
3613 if (compare_tree_int (DECL_SIZE (field_decl), bf_prec) != 0)
3614 return NULL_TREE;
3616 if (TREE_CODE (DECL_FIELD_OFFSET (rep_decl)) != INTEGER_CST
3617 || TREE_CODE (ref_offset) != INTEGER_CST)
3619 if (dump_file && (dump_flags & TDF_DETAILS))
3620 fprintf (dump_file, "\t Bitfield NOT OK to lower,"
3621 " offset is non-constant.\n");
3622 return NULL_TREE;
3625 if (struct_expr)
3626 *struct_expr = TREE_OPERAND (comp_ref, 0);
3628 if (bitpos)
3630 /* To calculate the bitposition of the BITFIELD_REF we have to determine
3631 where our bitfield starts in relation to the container REP_DECL. The
3632 DECL_FIELD_OFFSET of the original bitfield's member FIELD_DECL tells
3633 us how many bytes from the start of the structure there are until the
3634 start of the group of bitfield members the FIELD_DECL belongs to,
3635 whereas DECL_FIELD_BIT_OFFSET will tell us how many bits from that
3636 position our actual bitfield member starts. For the container
3637 REP_DECL adding DECL_FIELD_OFFSET and DECL_FIELD_BIT_OFFSET will tell
3638 us the distance between the start of the structure and the start of
3639 the container, though the first is in bytes and the later other in
3640 bits. With this in mind we calculate the bit position of our new
3641 BITFIELD_REF by subtracting the number of bits between the start of
3642 the structure and the container from the number of bits from the start
3643 of the structure and the actual bitfield member. */
3644 tree bf_pos = fold_build2 (MULT_EXPR, bitsizetype,
3645 ref_offset,
3646 build_int_cst (bitsizetype, BITS_PER_UNIT));
3647 bf_pos = fold_build2 (PLUS_EXPR, bitsizetype, bf_pos,
3648 DECL_FIELD_BIT_OFFSET (field_decl));
3649 tree rep_pos = fold_build2 (MULT_EXPR, bitsizetype,
3650 DECL_FIELD_OFFSET (rep_decl),
3651 build_int_cst (bitsizetype, BITS_PER_UNIT));
3652 rep_pos = fold_build2 (PLUS_EXPR, bitsizetype, rep_pos,
3653 DECL_FIELD_BIT_OFFSET (rep_decl));
3655 *bitpos = fold_build2 (MINUS_EXPR, bitsizetype, bf_pos, rep_pos);
3658 return rep_decl;
3662 /* Lowers the bitfield described by DATA.
3663 For a write like:
3665 struct.bf = _1;
3667 lower to:
3669 __ifc_1 = struct.<representative>;
3670 __ifc_2 = BIT_INSERT_EXPR (__ifc_1, _1, bitpos);
3671 struct.<representative> = __ifc_2;
3673 For a read:
3675 _1 = struct.bf;
3677 lower to:
3679 __ifc_1 = struct.<representative>;
3680 _1 = BIT_FIELD_REF (__ifc_1, bitsize, bitpos);
3682 where representative is a legal load that contains the bitfield value,
3683 bitsize is the size of the bitfield and bitpos the offset to the start of
3684 the bitfield within the representative. */
3686 static void
3687 lower_bitfield (gassign *stmt, bool write)
3689 tree struct_expr;
3690 tree bitpos;
3691 tree rep_decl = get_bitfield_rep (stmt, write, &bitpos, &struct_expr);
3692 tree rep_type = TREE_TYPE (rep_decl);
3693 tree bf_type = TREE_TYPE (gimple_assign_lhs (stmt));
3695 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3696 if (dump_file && (dump_flags & TDF_DETAILS))
3698 fprintf (dump_file, "Lowering:\n");
3699 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3700 fprintf (dump_file, "to:\n");
3703 /* REP_COMP_REF is a COMPONENT_REF for the representative. NEW_VAL is it's
3704 defining SSA_NAME. */
3705 tree rep_comp_ref = build3 (COMPONENT_REF, rep_type, struct_expr, rep_decl,
3706 NULL_TREE);
3707 tree new_val = ifc_temp_var (rep_type, rep_comp_ref, &gsi);
3709 if (dump_file && (dump_flags & TDF_DETAILS))
3710 print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (new_val), 0, TDF_SLIM);
3712 if (write)
3714 new_val = ifc_temp_var (rep_type,
3715 build3 (BIT_INSERT_EXPR, rep_type, new_val,
3716 unshare_expr (gimple_assign_rhs1 (stmt)),
3717 bitpos), &gsi);
3719 if (dump_file && (dump_flags & TDF_DETAILS))
3720 print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (new_val), 0, TDF_SLIM);
3722 gimple *new_stmt = gimple_build_assign (unshare_expr (rep_comp_ref),
3723 new_val);
3724 gimple_move_vops (new_stmt, stmt);
3725 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
3727 if (dump_file && (dump_flags & TDF_DETAILS))
3728 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
3730 else
3732 tree bfr = build3 (BIT_FIELD_REF, bf_type, new_val,
3733 build_int_cst (bitsizetype, TYPE_PRECISION (bf_type)),
3734 bitpos);
3735 new_val = ifc_temp_var (bf_type, bfr, &gsi);
3737 gimple *new_stmt = gimple_build_assign (gimple_assign_lhs (stmt),
3738 new_val);
3739 gimple_move_vops (new_stmt, stmt);
3740 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
3742 if (dump_file && (dump_flags & TDF_DETAILS))
3743 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
3746 gsi_remove (&gsi, true);
3749 /* Return TRUE if there are bitfields to lower in this LOOP. Fill TO_LOWER
3750 with data structures representing these bitfields. */
3752 static bool
3753 bitfields_to_lower_p (class loop *loop,
3754 vec <gassign *> &reads_to_lower,
3755 vec <gassign *> &writes_to_lower)
3757 gimple_stmt_iterator gsi;
3759 if (dump_file && (dump_flags & TDF_DETAILS))
3761 fprintf (dump_file, "Analyzing loop %d for bitfields:\n", loop->num);
3764 for (unsigned i = 0; i < loop->num_nodes; ++i)
3766 basic_block bb = ifc_bbs[i];
3767 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3769 gassign *stmt = dyn_cast<gassign*> (gsi_stmt (gsi));
3770 if (!stmt)
3771 continue;
3773 tree op = gimple_assign_lhs (stmt);
3774 bool write = TREE_CODE (op) == COMPONENT_REF;
3776 if (!write)
3777 op = gimple_assign_rhs1 (stmt);
3779 if (TREE_CODE (op) != COMPONENT_REF)
3780 continue;
3782 if (DECL_BIT_FIELD_TYPE (TREE_OPERAND (op, 1)))
3784 if (dump_file && (dump_flags & TDF_DETAILS))
3785 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3787 if (TREE_THIS_VOLATILE (op))
3789 if (dump_file && (dump_flags & TDF_DETAILS))
3790 fprintf (dump_file, "\t Bitfield NO OK to lower,"
3791 " the access is volatile.\n");
3792 return false;
3795 if (!INTEGRAL_TYPE_P (TREE_TYPE (op)))
3797 if (dump_file && (dump_flags & TDF_DETAILS))
3798 fprintf (dump_file, "\t Bitfield NO OK to lower,"
3799 " field type is not Integral.\n");
3800 return false;
3803 if (!get_bitfield_rep (stmt, write, NULL, NULL))
3805 if (dump_file && (dump_flags & TDF_DETAILS))
3806 fprintf (dump_file, "\t Bitfield NOT OK to lower,"
3807 " representative is BLKmode.\n");
3808 return false;
3811 if (dump_file && (dump_flags & TDF_DETAILS))
3812 fprintf (dump_file, "\tBitfield OK to lower.\n");
3813 if (write)
3814 writes_to_lower.safe_push (stmt);
3815 else
3816 reads_to_lower.safe_push (stmt);
3820 return !reads_to_lower.is_empty () || !writes_to_lower.is_empty ();
3824 /* If-convert LOOP when it is legal. For the moment this pass has no
3825 profitability analysis. Returns non-zero todo flags when something
3826 changed. */
3828 unsigned int
3829 tree_if_conversion (class loop *loop, vec<gimple *> *preds)
3831 unsigned int todo = 0;
3832 bool aggressive_if_conv;
3833 class loop *rloop;
3834 auto_vec <gassign *, 4> reads_to_lower;
3835 auto_vec <gassign *, 4> writes_to_lower;
3836 bitmap exit_bbs;
3837 edge pe;
3838 auto_vec<data_reference_p, 10> refs;
3839 bool loop_versioned;
3841 again:
3842 rloop = NULL;
3843 ifc_bbs = NULL;
3844 need_to_lower_bitfields = false;
3845 need_to_ifcvt = false;
3846 need_to_predicate = false;
3847 need_to_rewrite_undefined = false;
3848 any_complicated_phi = false;
3849 loop_versioned = false;
3851 /* Apply more aggressive if-conversion when loop or its outer loop were
3852 marked with simd pragma. When that's the case, we try to if-convert
3853 loop containing PHIs with more than MAX_PHI_ARG_NUM arguments. */
3854 aggressive_if_conv = loop->force_vectorize;
3855 if (!aggressive_if_conv)
3857 class loop *outer_loop = loop_outer (loop);
3858 if (outer_loop && outer_loop->force_vectorize)
3859 aggressive_if_conv = true;
3862 /* If there are more than two BBs in the loop then there is at least one if
3863 to convert. */
3864 if (loop->num_nodes > 2
3865 && !ifcvt_split_critical_edges (loop, aggressive_if_conv))
3866 goto cleanup;
3868 ifc_bbs = get_loop_body_in_if_conv_order (loop);
3869 if (!ifc_bbs)
3871 if (dump_file && (dump_flags & TDF_DETAILS))
3872 fprintf (dump_file, "Irreducible loop\n");
3873 goto cleanup;
3876 if (find_data_references_in_loop (loop, &refs) == chrec_dont_know)
3877 goto cleanup;
3879 if (loop->num_nodes > 2)
3881 /* More than one loop exit is too much to handle. */
3882 if (!single_exit (loop))
3884 if (dump_file && (dump_flags & TDF_DETAILS))
3885 fprintf (dump_file, "Can not ifcvt due to multiple exits\n");
3887 else
3889 need_to_ifcvt = true;
3891 if (!if_convertible_loop_p (loop, &refs)
3892 || !dbg_cnt (if_conversion_tree))
3893 goto cleanup;
3895 if ((need_to_predicate || any_complicated_phi)
3896 && ((!flag_tree_loop_vectorize && !loop->force_vectorize)
3897 || loop->dont_vectorize))
3898 goto cleanup;
3902 if ((flag_tree_loop_vectorize || loop->force_vectorize)
3903 && !loop->dont_vectorize)
3904 need_to_lower_bitfields = bitfields_to_lower_p (loop, reads_to_lower,
3905 writes_to_lower);
3907 if (!need_to_ifcvt && !need_to_lower_bitfields)
3908 goto cleanup;
3910 /* The edge to insert invariant stmts on. */
3911 pe = loop_preheader_edge (loop);
3913 /* Since we have no cost model, always version loops unless the user
3914 specified -ftree-loop-if-convert or unless versioning is required.
3915 Either version this loop, or if the pattern is right for outer-loop
3916 vectorization, version the outer loop. In the latter case we will
3917 still if-convert the original inner loop. */
3918 if (need_to_lower_bitfields
3919 || need_to_predicate
3920 || any_complicated_phi
3921 || flag_tree_loop_if_convert != 1)
3923 class loop *vloop
3924 = (versionable_outer_loop_p (loop_outer (loop))
3925 ? loop_outer (loop) : loop);
3926 class loop *nloop = version_loop_for_if_conversion (vloop, preds);
3927 if (nloop == NULL)
3928 goto cleanup;
3929 if (vloop != loop)
3931 /* If versionable_outer_loop_p decided to version the
3932 outer loop, version also the inner loop of the non-vectorized
3933 loop copy. So we transform:
3934 loop1
3935 loop2
3936 into:
3937 if (LOOP_VECTORIZED (1, 3))
3939 loop1
3940 loop2
3942 else
3943 loop3 (copy of loop1)
3944 if (LOOP_VECTORIZED (4, 5))
3945 loop4 (copy of loop2)
3946 else
3947 loop5 (copy of loop4) */
3948 gcc_assert (nloop->inner && nloop->inner->next == NULL);
3949 rloop = nloop->inner;
3951 else
3952 /* If we versioned loop then make sure to insert invariant
3953 stmts before the .LOOP_VECTORIZED check since the vectorizer
3954 will re-use that for things like runtime alias versioning
3955 whose condition can end up using those invariants. */
3956 pe = single_pred_edge (gimple_bb (preds->last ()));
3958 loop_versioned = true;
3961 if (need_to_lower_bitfields)
3963 if (dump_file && (dump_flags & TDF_DETAILS))
3965 fprintf (dump_file, "-------------------------\n");
3966 fprintf (dump_file, "Start lowering bitfields\n");
3968 while (!reads_to_lower.is_empty ())
3969 lower_bitfield (reads_to_lower.pop (), false);
3970 while (!writes_to_lower.is_empty ())
3971 lower_bitfield (writes_to_lower.pop (), true);
3973 if (dump_file && (dump_flags & TDF_DETAILS))
3975 fprintf (dump_file, "Done lowering bitfields\n");
3976 fprintf (dump_file, "-------------------------\n");
3979 if (need_to_ifcvt)
3981 /* Before we rewrite edges we'll record their original position in the
3982 edge map such that we can map the edges between the ifcvt and the
3983 non-ifcvt loop during peeling. */
3984 uintptr_t idx = 0;
3985 for (edge exit : get_loop_exit_edges (loop))
3986 exit->aux = (void*)idx++;
3988 /* Now all statements are if-convertible. Combine all the basic
3989 blocks into one huge basic block doing the if-conversion
3990 on-the-fly. */
3991 combine_blocks (loop, loop_versioned);
3994 std::pair <tree, tree> *name_pair;
3995 unsigned ssa_names_idx;
3996 FOR_EACH_VEC_ELT (redundant_ssa_names, ssa_names_idx, name_pair)
3997 replace_uses_by (name_pair->first, name_pair->second);
3998 redundant_ssa_names.release ();
4000 /* Perform local CSE, this esp. helps the vectorizer analysis if loads
4001 and stores are involved. CSE only the loop body, not the entry
4002 PHIs, those are to be kept in sync with the non-if-converted copy.
4003 ??? We'll still keep dead stores though. */
4004 exit_bbs = BITMAP_ALLOC (NULL);
4005 for (edge exit : get_loop_exit_edges (loop))
4006 bitmap_set_bit (exit_bbs, exit->dest->index);
4007 todo |= do_rpo_vn (cfun, loop_preheader_edge (loop), exit_bbs,
4008 false, true, true);
4010 /* Delete dead predicate computations. */
4011 ifcvt_local_dce (loop);
4012 BITMAP_FREE (exit_bbs);
4014 ifcvt_hoist_invariants (loop, pe);
4016 todo |= TODO_cleanup_cfg;
4018 cleanup:
4019 data_reference_p dr;
4020 unsigned int i;
4021 for (i = 0; refs.iterate (i, &dr); i++)
4023 free (dr->aux);
4024 free_data_ref (dr);
4026 refs.truncate (0);
4028 if (ifc_bbs)
4030 unsigned int i;
4032 for (i = 0; i < loop->num_nodes; i++)
4033 free_bb_predicate (ifc_bbs[i]);
4035 free (ifc_bbs);
4036 ifc_bbs = NULL;
4038 if (rloop != NULL)
4040 loop = rloop;
4041 reads_to_lower.truncate (0);
4042 writes_to_lower.truncate (0);
4043 goto again;
4046 return todo;
4049 /* Tree if-conversion pass management. */
4051 namespace {
4053 const pass_data pass_data_if_conversion =
4055 GIMPLE_PASS, /* type */
4056 "ifcvt", /* name */
4057 OPTGROUP_NONE, /* optinfo_flags */
4058 TV_TREE_LOOP_IFCVT, /* tv_id */
4059 ( PROP_cfg | PROP_ssa ), /* properties_required */
4060 0, /* properties_provided */
4061 0, /* properties_destroyed */
4062 0, /* todo_flags_start */
4063 0, /* todo_flags_finish */
4066 class pass_if_conversion : public gimple_opt_pass
4068 public:
4069 pass_if_conversion (gcc::context *ctxt)
4070 : gimple_opt_pass (pass_data_if_conversion, ctxt)
4073 /* opt_pass methods: */
4074 bool gate (function *) final override;
4075 unsigned int execute (function *) final override;
4077 }; // class pass_if_conversion
4079 bool
4080 pass_if_conversion::gate (function *fun)
4082 return (((flag_tree_loop_vectorize || fun->has_force_vectorize_loops)
4083 && flag_tree_loop_if_convert != 0)
4084 || flag_tree_loop_if_convert == 1);
4087 unsigned int
4088 pass_if_conversion::execute (function *fun)
4090 unsigned todo = 0;
4092 if (number_of_loops (fun) <= 1)
4093 return 0;
4095 auto_vec<gimple *> preds;
4096 for (auto loop : loops_list (cfun, 0))
4097 if (flag_tree_loop_if_convert == 1
4098 || ((flag_tree_loop_vectorize || loop->force_vectorize)
4099 && !loop->dont_vectorize))
4100 todo |= tree_if_conversion (loop, &preds);
4102 if (todo)
4104 free_numbers_of_iterations_estimates (fun);
4105 scev_reset ();
4108 if (flag_checking)
4110 basic_block bb;
4111 FOR_EACH_BB_FN (bb, fun)
4112 gcc_assert (!bb->aux);
4115 /* Perform IL update now, it might elide some loops. */
4116 if (todo & TODO_cleanup_cfg)
4118 cleanup_tree_cfg ();
4119 if (need_ssa_update_p (fun))
4120 todo |= TODO_update_ssa;
4122 if (todo & TODO_update_ssa_any)
4123 update_ssa (todo & TODO_update_ssa_any);
4125 /* If if-conversion elided the loop fall back to the original one. Likewise
4126 if the loops are not nested in the same outer loop. */
4127 for (unsigned i = 0; i < preds.length (); ++i)
4129 gimple *g = preds[i];
4130 if (!gimple_bb (g))
4131 continue;
4132 auto ifcvt_loop = get_loop (fun, tree_to_uhwi (gimple_call_arg (g, 0)));
4133 auto orig_loop = get_loop (fun, tree_to_uhwi (gimple_call_arg (g, 1)));
4134 if (!ifcvt_loop || !orig_loop)
4136 if (dump_file)
4137 fprintf (dump_file, "If-converted loop vanished\n");
4138 fold_loop_internal_call (g, boolean_false_node);
4140 else if (loop_outer (ifcvt_loop) != loop_outer (orig_loop))
4142 if (dump_file)
4143 fprintf (dump_file, "If-converted loop in different outer loop\n");
4144 fold_loop_internal_call (g, boolean_false_node);
4148 return 0;
4151 } // anon namespace
4153 gimple_opt_pass *
4154 make_pass_if_conversion (gcc::context *ctxt)
4156 return new pass_if_conversion (ctxt);