Relocation (= move+destroy)
[official-gcc.git] / gcc / tree-if-conv.c
blob52aa5756c94d2db0c5c8f2d62cf1892c79ed43ef
1 /* If-conversion for vectorizer.
2 Copyright (C) 2004-2018 Free Software Foundation, Inc.
3 Contributed by Devang Patel <dpatel@apple.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass implements a tree level if-conversion of loops. Its
22 initial goal is to help the vectorizer to vectorize loops with
23 conditions.
25 A short description of if-conversion:
27 o Decide if a loop is if-convertible or not.
28 o Walk all loop basic blocks in breadth first order (BFS order).
29 o Remove conditional statements (at the end of basic block)
30 and propagate condition into destination basic blocks'
31 predicate list.
32 o Replace modify expression with conditional modify expression
33 using current basic block's condition.
34 o Merge all basic blocks
35 o Replace phi nodes with conditional modify expr
36 o Merge all basic blocks into header
38 Sample transformation:
40 INPUT
41 -----
43 # i_23 = PHI <0(0), i_18(10)>;
44 <L0>:;
45 j_15 = A[i_23];
46 if (j_15 > 41) goto <L1>; else goto <L17>;
48 <L17>:;
49 goto <bb 3> (<L3>);
51 <L1>:;
53 # iftmp.2_4 = PHI <0(8), 42(2)>;
54 <L3>:;
55 A[i_23] = iftmp.2_4;
56 i_18 = i_23 + 1;
57 if (i_18 <= 15) goto <L19>; else goto <L18>;
59 <L19>:;
60 goto <bb 1> (<L0>);
62 <L18>:;
64 OUTPUT
65 ------
67 # i_23 = PHI <0(0), i_18(10)>;
68 <L0>:;
69 j_15 = A[i_23];
71 <L3>:;
72 iftmp.2_4 = j_15 > 41 ? 42 : 0;
73 A[i_23] = iftmp.2_4;
74 i_18 = i_23 + 1;
75 if (i_18 <= 15) goto <L19>; else goto <L18>;
77 <L19>:;
78 goto <bb 1> (<L0>);
80 <L18>:;
83 #include "config.h"
84 #include "system.h"
85 #include "coretypes.h"
86 #include "backend.h"
87 #include "rtl.h"
88 #include "tree.h"
89 #include "gimple.h"
90 #include "cfghooks.h"
91 #include "tree-pass.h"
92 #include "ssa.h"
93 #include "expmed.h"
94 #include "optabs-query.h"
95 #include "gimple-pretty-print.h"
96 #include "alias.h"
97 #include "fold-const.h"
98 #include "stor-layout.h"
99 #include "gimple-fold.h"
100 #include "gimplify.h"
101 #include "gimple-iterator.h"
102 #include "gimplify-me.h"
103 #include "tree-cfg.h"
104 #include "tree-into-ssa.h"
105 #include "tree-ssa.h"
106 #include "cfgloop.h"
107 #include "tree-data-ref.h"
108 #include "tree-scalar-evolution.h"
109 #include "tree-ssa-loop.h"
110 #include "tree-ssa-loop-niter.h"
111 #include "tree-ssa-loop-ivopts.h"
112 #include "tree-ssa-address.h"
113 #include "dbgcnt.h"
114 #include "tree-hash-traits.h"
115 #include "varasm.h"
116 #include "builtins.h"
117 #include "params.h"
118 #include "cfganal.h"
119 #include "internal-fn.h"
120 #include "fold-const.h"
122 /* Only handle PHIs with no more arguments unless we are asked to by
123 simd pragma. */
124 #define MAX_PHI_ARG_NUM \
125 ((unsigned) PARAM_VALUE (PARAM_MAX_TREE_IF_CONVERSION_PHI_ARGS))
127 /* True if we've converted a statement that was only executed when some
128 condition C was true, and if for correctness we need to predicate the
129 statement to ensure that it is a no-op when C is false. See
130 predicate_statements for the kinds of predication we support. */
131 static bool need_to_predicate;
133 /* Indicate if there are any complicated PHIs that need to be handled in
134 if-conversion. Complicated PHI has more than two arguments and can't
135 be degenerated to two arguments PHI. See more information in comment
136 before phi_convertible_by_degenerating_args. */
137 static bool any_complicated_phi;
139 /* Hash for struct innermost_loop_behavior. It depends on the user to
140 free the memory. */
142 struct innermost_loop_behavior_hash : nofree_ptr_hash <innermost_loop_behavior>
144 static inline hashval_t hash (const value_type &);
145 static inline bool equal (const value_type &,
146 const compare_type &);
149 inline hashval_t
150 innermost_loop_behavior_hash::hash (const value_type &e)
152 hashval_t hash;
154 hash = iterative_hash_expr (e->base_address, 0);
155 hash = iterative_hash_expr (e->offset, hash);
156 hash = iterative_hash_expr (e->init, hash);
157 return iterative_hash_expr (e->step, hash);
160 inline bool
161 innermost_loop_behavior_hash::equal (const value_type &e1,
162 const compare_type &e2)
164 if ((e1->base_address && !e2->base_address)
165 || (!e1->base_address && e2->base_address)
166 || (!e1->offset && e2->offset)
167 || (e1->offset && !e2->offset)
168 || (!e1->init && e2->init)
169 || (e1->init && !e2->init)
170 || (!e1->step && e2->step)
171 || (e1->step && !e2->step))
172 return false;
174 if (e1->base_address && e2->base_address
175 && !operand_equal_p (e1->base_address, e2->base_address, 0))
176 return false;
177 if (e1->offset && e2->offset
178 && !operand_equal_p (e1->offset, e2->offset, 0))
179 return false;
180 if (e1->init && e2->init
181 && !operand_equal_p (e1->init, e2->init, 0))
182 return false;
183 if (e1->step && e2->step
184 && !operand_equal_p (e1->step, e2->step, 0))
185 return false;
187 return true;
190 /* List of basic blocks in if-conversion-suitable order. */
191 static basic_block *ifc_bbs;
193 /* Hash table to store <DR's innermost loop behavior, DR> pairs. */
194 static hash_map<innermost_loop_behavior_hash,
195 data_reference_p> *innermost_DR_map;
197 /* Hash table to store <base reference, DR> pairs. */
198 static hash_map<tree_operand_hash, data_reference_p> *baseref_DR_map;
200 /* List of redundant SSA names: the first should be replaced by the second. */
201 static vec< std::pair<tree, tree> > redundant_ssa_names;
203 /* Structure used to predicate basic blocks. This is attached to the
204 ->aux field of the BBs in the loop to be if-converted. */
205 struct bb_predicate {
207 /* The condition under which this basic block is executed. */
208 tree predicate;
210 /* PREDICATE is gimplified, and the sequence of statements is
211 recorded here, in order to avoid the duplication of computations
212 that occur in previous conditions. See PR44483. */
213 gimple_seq predicate_gimplified_stmts;
216 /* Returns true when the basic block BB has a predicate. */
218 static inline bool
219 bb_has_predicate (basic_block bb)
221 return bb->aux != NULL;
224 /* Returns the gimplified predicate for basic block BB. */
226 static inline tree
227 bb_predicate (basic_block bb)
229 return ((struct bb_predicate *) bb->aux)->predicate;
232 /* Sets the gimplified predicate COND for basic block BB. */
234 static inline void
235 set_bb_predicate (basic_block bb, tree cond)
237 gcc_assert ((TREE_CODE (cond) == TRUTH_NOT_EXPR
238 && is_gimple_condexpr (TREE_OPERAND (cond, 0)))
239 || is_gimple_condexpr (cond));
240 ((struct bb_predicate *) bb->aux)->predicate = cond;
243 /* Returns the sequence of statements of the gimplification of the
244 predicate for basic block BB. */
246 static inline gimple_seq
247 bb_predicate_gimplified_stmts (basic_block bb)
249 return ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts;
252 /* Sets the sequence of statements STMTS of the gimplification of the
253 predicate for basic block BB. */
255 static inline void
256 set_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
258 ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts = stmts;
261 /* Adds the sequence of statements STMTS to the sequence of statements
262 of the predicate for basic block BB. */
264 static inline void
265 add_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
267 /* We might have updated some stmts in STMTS via force_gimple_operand
268 calling fold_stmt and that producing multiple stmts. Delink immediate
269 uses so update_ssa after loop versioning doesn't get confused for
270 the not yet inserted predicates.
271 ??? This should go away once we reliably avoid updating stmts
272 not in any BB. */
273 for (gimple_stmt_iterator gsi = gsi_start (stmts);
274 !gsi_end_p (gsi); gsi_next (&gsi))
276 gimple *stmt = gsi_stmt (gsi);
277 delink_stmt_imm_use (stmt);
278 gimple_set_modified (stmt, true);
280 gimple_seq_add_seq_without_update
281 (&(((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts), stmts);
284 /* Initializes to TRUE the predicate of basic block BB. */
286 static inline void
287 init_bb_predicate (basic_block bb)
289 bb->aux = XNEW (struct bb_predicate);
290 set_bb_predicate_gimplified_stmts (bb, NULL);
291 set_bb_predicate (bb, boolean_true_node);
294 /* Release the SSA_NAMEs associated with the predicate of basic block BB. */
296 static inline void
297 release_bb_predicate (basic_block bb)
299 gimple_seq stmts = bb_predicate_gimplified_stmts (bb);
300 if (stmts)
302 /* Ensure that these stmts haven't yet been added to a bb. */
303 if (flag_checking)
304 for (gimple_stmt_iterator i = gsi_start (stmts);
305 !gsi_end_p (i); gsi_next (&i))
306 gcc_assert (! gimple_bb (gsi_stmt (i)));
308 /* Discard them. */
309 gimple_seq_discard (stmts);
310 set_bb_predicate_gimplified_stmts (bb, NULL);
314 /* Free the predicate of basic block BB. */
316 static inline void
317 free_bb_predicate (basic_block bb)
319 if (!bb_has_predicate (bb))
320 return;
322 release_bb_predicate (bb);
323 free (bb->aux);
324 bb->aux = NULL;
327 /* Reinitialize predicate of BB with the true predicate. */
329 static inline void
330 reset_bb_predicate (basic_block bb)
332 if (!bb_has_predicate (bb))
333 init_bb_predicate (bb);
334 else
336 release_bb_predicate (bb);
337 set_bb_predicate (bb, boolean_true_node);
341 /* Returns a new SSA_NAME of type TYPE that is assigned the value of
342 the expression EXPR. Inserts the statement created for this
343 computation before GSI and leaves the iterator GSI at the same
344 statement. */
346 static tree
347 ifc_temp_var (tree type, tree expr, gimple_stmt_iterator *gsi)
349 tree new_name = make_temp_ssa_name (type, NULL, "_ifc_");
350 gimple *stmt = gimple_build_assign (new_name, expr);
351 gimple_set_vuse (stmt, gimple_vuse (gsi_stmt (*gsi)));
352 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
353 return new_name;
356 /* Return true when COND is a false predicate. */
358 static inline bool
359 is_false_predicate (tree cond)
361 return (cond != NULL_TREE
362 && (cond == boolean_false_node
363 || integer_zerop (cond)));
366 /* Return true when COND is a true predicate. */
368 static inline bool
369 is_true_predicate (tree cond)
371 return (cond == NULL_TREE
372 || cond == boolean_true_node
373 || integer_onep (cond));
376 /* Returns true when BB has a predicate that is not trivial: true or
377 NULL_TREE. */
379 static inline bool
380 is_predicated (basic_block bb)
382 return !is_true_predicate (bb_predicate (bb));
385 /* Parses the predicate COND and returns its comparison code and
386 operands OP0 and OP1. */
388 static enum tree_code
389 parse_predicate (tree cond, tree *op0, tree *op1)
391 gimple *s;
393 if (TREE_CODE (cond) == SSA_NAME
394 && is_gimple_assign (s = SSA_NAME_DEF_STMT (cond)))
396 if (TREE_CODE_CLASS (gimple_assign_rhs_code (s)) == tcc_comparison)
398 *op0 = gimple_assign_rhs1 (s);
399 *op1 = gimple_assign_rhs2 (s);
400 return gimple_assign_rhs_code (s);
403 else if (gimple_assign_rhs_code (s) == TRUTH_NOT_EXPR)
405 tree op = gimple_assign_rhs1 (s);
406 tree type = TREE_TYPE (op);
407 enum tree_code code = parse_predicate (op, op0, op1);
409 return code == ERROR_MARK ? ERROR_MARK
410 : invert_tree_comparison (code, HONOR_NANS (type));
413 return ERROR_MARK;
416 if (COMPARISON_CLASS_P (cond))
418 *op0 = TREE_OPERAND (cond, 0);
419 *op1 = TREE_OPERAND (cond, 1);
420 return TREE_CODE (cond);
423 return ERROR_MARK;
426 /* Returns the fold of predicate C1 OR C2 at location LOC. */
428 static tree
429 fold_or_predicates (location_t loc, tree c1, tree c2)
431 tree op1a, op1b, op2a, op2b;
432 enum tree_code code1 = parse_predicate (c1, &op1a, &op1b);
433 enum tree_code code2 = parse_predicate (c2, &op2a, &op2b);
435 if (code1 != ERROR_MARK && code2 != ERROR_MARK)
437 tree t = maybe_fold_or_comparisons (code1, op1a, op1b,
438 code2, op2a, op2b);
439 if (t)
440 return t;
443 return fold_build2_loc (loc, TRUTH_OR_EXPR, boolean_type_node, c1, c2);
446 /* Returns either a COND_EXPR or the folded expression if the folded
447 expression is a MIN_EXPR, a MAX_EXPR, an ABS_EXPR,
448 a constant or a SSA_NAME. */
450 static tree
451 fold_build_cond_expr (tree type, tree cond, tree rhs, tree lhs)
453 tree rhs1, lhs1, cond_expr;
455 /* If COND is comparison r != 0 and r has boolean type, convert COND
456 to SSA_NAME to accept by vect bool pattern. */
457 if (TREE_CODE (cond) == NE_EXPR)
459 tree op0 = TREE_OPERAND (cond, 0);
460 tree op1 = TREE_OPERAND (cond, 1);
461 if (TREE_CODE (op0) == SSA_NAME
462 && TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE
463 && (integer_zerop (op1)))
464 cond = op0;
466 cond_expr = fold_ternary (COND_EXPR, type, cond, rhs, lhs);
468 if (cond_expr == NULL_TREE)
469 return build3 (COND_EXPR, type, cond, rhs, lhs);
471 STRIP_USELESS_TYPE_CONVERSION (cond_expr);
473 if (is_gimple_val (cond_expr))
474 return cond_expr;
476 if (TREE_CODE (cond_expr) == ABS_EXPR)
478 rhs1 = TREE_OPERAND (cond_expr, 1);
479 STRIP_USELESS_TYPE_CONVERSION (rhs1);
480 if (is_gimple_val (rhs1))
481 return build1 (ABS_EXPR, type, rhs1);
484 if (TREE_CODE (cond_expr) == MIN_EXPR
485 || TREE_CODE (cond_expr) == MAX_EXPR)
487 lhs1 = TREE_OPERAND (cond_expr, 0);
488 STRIP_USELESS_TYPE_CONVERSION (lhs1);
489 rhs1 = TREE_OPERAND (cond_expr, 1);
490 STRIP_USELESS_TYPE_CONVERSION (rhs1);
491 if (is_gimple_val (rhs1) && is_gimple_val (lhs1))
492 return build2 (TREE_CODE (cond_expr), type, lhs1, rhs1);
494 return build3 (COND_EXPR, type, cond, rhs, lhs);
497 /* Add condition NC to the predicate list of basic block BB. LOOP is
498 the loop to be if-converted. Use predicate of cd-equivalent block
499 for join bb if it exists: we call basic blocks bb1 and bb2
500 cd-equivalent if they are executed under the same condition. */
502 static inline void
503 add_to_predicate_list (struct loop *loop, basic_block bb, tree nc)
505 tree bc, *tp;
506 basic_block dom_bb;
508 if (is_true_predicate (nc))
509 return;
511 /* If dominance tells us this basic block is always executed,
512 don't record any predicates for it. */
513 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
514 return;
516 dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb);
517 /* We use notion of cd equivalence to get simpler predicate for
518 join block, e.g. if join block has 2 predecessors with predicates
519 p1 & p2 and p1 & !p2, we'd like to get p1 for it instead of
520 p1 & p2 | p1 & !p2. */
521 if (dom_bb != loop->header
522 && get_immediate_dominator (CDI_POST_DOMINATORS, dom_bb) == bb)
524 gcc_assert (flow_bb_inside_loop_p (loop, dom_bb));
525 bc = bb_predicate (dom_bb);
526 if (!is_true_predicate (bc))
527 set_bb_predicate (bb, bc);
528 else
529 gcc_assert (is_true_predicate (bb_predicate (bb)));
530 if (dump_file && (dump_flags & TDF_DETAILS))
531 fprintf (dump_file, "Use predicate of bb#%d for bb#%d\n",
532 dom_bb->index, bb->index);
533 return;
536 if (!is_predicated (bb))
537 bc = nc;
538 else
540 bc = bb_predicate (bb);
541 bc = fold_or_predicates (EXPR_LOCATION (bc), nc, bc);
542 if (is_true_predicate (bc))
544 reset_bb_predicate (bb);
545 return;
549 /* Allow a TRUTH_NOT_EXPR around the main predicate. */
550 if (TREE_CODE (bc) == TRUTH_NOT_EXPR)
551 tp = &TREE_OPERAND (bc, 0);
552 else
553 tp = &bc;
554 if (!is_gimple_condexpr (*tp))
556 gimple_seq stmts;
557 *tp = force_gimple_operand_1 (*tp, &stmts, is_gimple_condexpr, NULL_TREE);
558 add_bb_predicate_gimplified_stmts (bb, stmts);
560 set_bb_predicate (bb, bc);
563 /* Add the condition COND to the previous condition PREV_COND, and add
564 this to the predicate list of the destination of edge E. LOOP is
565 the loop to be if-converted. */
567 static void
568 add_to_dst_predicate_list (struct loop *loop, edge e,
569 tree prev_cond, tree cond)
571 if (!flow_bb_inside_loop_p (loop, e->dest))
572 return;
574 if (!is_true_predicate (prev_cond))
575 cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
576 prev_cond, cond);
578 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, e->dest))
579 add_to_predicate_list (loop, e->dest, cond);
582 /* Return true if one of the successor edges of BB exits LOOP. */
584 static bool
585 bb_with_exit_edge_p (struct loop *loop, basic_block bb)
587 edge e;
588 edge_iterator ei;
590 FOR_EACH_EDGE (e, ei, bb->succs)
591 if (loop_exit_edge_p (loop, e))
592 return true;
594 return false;
597 /* Given PHI which has more than two arguments, this function checks if
598 it's if-convertible by degenerating its arguments. Specifically, if
599 below two conditions are satisfied:
601 1) Number of PHI arguments with different values equals to 2 and one
602 argument has the only occurrence.
603 2) The edge corresponding to the unique argument isn't critical edge.
605 Such PHI can be handled as PHIs have only two arguments. For example,
606 below PHI:
608 res = PHI <A_1(e1), A_1(e2), A_2(e3)>;
610 can be transformed into:
612 res = (predicate of e3) ? A_2 : A_1;
614 Return TRUE if it is the case, FALSE otherwise. */
616 static bool
617 phi_convertible_by_degenerating_args (gphi *phi)
619 edge e;
620 tree arg, t1 = NULL, t2 = NULL;
621 unsigned int i, i1 = 0, i2 = 0, n1 = 0, n2 = 0;
622 unsigned int num_args = gimple_phi_num_args (phi);
624 gcc_assert (num_args > 2);
626 for (i = 0; i < num_args; i++)
628 arg = gimple_phi_arg_def (phi, i);
629 if (t1 == NULL || operand_equal_p (t1, arg, 0))
631 n1++;
632 i1 = i;
633 t1 = arg;
635 else if (t2 == NULL || operand_equal_p (t2, arg, 0))
637 n2++;
638 i2 = i;
639 t2 = arg;
641 else
642 return false;
645 if (n1 != 1 && n2 != 1)
646 return false;
648 /* Check if the edge corresponding to the unique arg is critical. */
649 e = gimple_phi_arg_edge (phi, (n1 == 1) ? i1 : i2);
650 if (EDGE_COUNT (e->src->succs) > 1)
651 return false;
653 return true;
656 /* Return true when PHI is if-convertible. PHI is part of loop LOOP
657 and it belongs to basic block BB. Note at this point, it is sure
658 that PHI is if-convertible. This function updates global variable
659 ANY_COMPLICATED_PHI if PHI is complicated. */
661 static bool
662 if_convertible_phi_p (struct loop *loop, basic_block bb, gphi *phi)
664 if (dump_file && (dump_flags & TDF_DETAILS))
666 fprintf (dump_file, "-------------------------\n");
667 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
670 if (bb != loop->header
671 && gimple_phi_num_args (phi) > 2
672 && !phi_convertible_by_degenerating_args (phi))
673 any_complicated_phi = true;
675 return true;
678 /* Records the status of a data reference. This struct is attached to
679 each DR->aux field. */
681 struct ifc_dr {
682 bool rw_unconditionally;
683 bool w_unconditionally;
684 bool written_at_least_once;
686 tree rw_predicate;
687 tree w_predicate;
688 tree base_w_predicate;
691 #define IFC_DR(DR) ((struct ifc_dr *) (DR)->aux)
692 #define DR_BASE_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->written_at_least_once)
693 #define DR_RW_UNCONDITIONALLY(DR) (IFC_DR (DR)->rw_unconditionally)
694 #define DR_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->w_unconditionally)
696 /* Iterates over DR's and stores refs, DR and base refs, DR pairs in
697 HASH tables. While storing them in HASH table, it checks if the
698 reference is unconditionally read or written and stores that as a flag
699 information. For base reference it checks if it is written atlest once
700 unconditionally and stores it as flag information along with DR.
701 In other words for every data reference A in STMT there exist other
702 accesses to a data reference with the same base with predicates that
703 add up (OR-up) to the true predicate: this ensures that the data
704 reference A is touched (read or written) on every iteration of the
705 if-converted loop. */
706 static void
707 hash_memrefs_baserefs_and_store_DRs_read_written_info (data_reference_p a)
710 data_reference_p *master_dr, *base_master_dr;
711 tree base_ref = DR_BASE_OBJECT (a);
712 innermost_loop_behavior *innermost = &DR_INNERMOST (a);
713 tree ca = bb_predicate (gimple_bb (DR_STMT (a)));
714 bool exist1, exist2;
716 master_dr = &innermost_DR_map->get_or_insert (innermost, &exist1);
717 if (!exist1)
718 *master_dr = a;
720 if (DR_IS_WRITE (a))
722 IFC_DR (*master_dr)->w_predicate
723 = fold_or_predicates (UNKNOWN_LOCATION, ca,
724 IFC_DR (*master_dr)->w_predicate);
725 if (is_true_predicate (IFC_DR (*master_dr)->w_predicate))
726 DR_W_UNCONDITIONALLY (*master_dr) = true;
728 IFC_DR (*master_dr)->rw_predicate
729 = fold_or_predicates (UNKNOWN_LOCATION, ca,
730 IFC_DR (*master_dr)->rw_predicate);
731 if (is_true_predicate (IFC_DR (*master_dr)->rw_predicate))
732 DR_RW_UNCONDITIONALLY (*master_dr) = true;
734 if (DR_IS_WRITE (a))
736 base_master_dr = &baseref_DR_map->get_or_insert (base_ref, &exist2);
737 if (!exist2)
738 *base_master_dr = a;
739 IFC_DR (*base_master_dr)->base_w_predicate
740 = fold_or_predicates (UNKNOWN_LOCATION, ca,
741 IFC_DR (*base_master_dr)->base_w_predicate);
742 if (is_true_predicate (IFC_DR (*base_master_dr)->base_w_predicate))
743 DR_BASE_W_UNCONDITIONALLY (*base_master_dr) = true;
747 /* Return TRUE if can prove the index IDX of an array reference REF is
748 within array bound. Return false otherwise. */
750 static bool
751 idx_within_array_bound (tree ref, tree *idx, void *dta)
753 wi::overflow_type overflow;
754 widest_int niter, valid_niter, delta, wi_step;
755 tree ev, init, step;
756 tree low, high;
757 struct loop *loop = (struct loop*) dta;
759 /* Only support within-bound access for array references. */
760 if (TREE_CODE (ref) != ARRAY_REF)
761 return false;
763 /* For arrays at the end of the structure, we are not guaranteed that they
764 do not really extend over their declared size. However, for arrays of
765 size greater than one, this is unlikely to be intended. */
766 if (array_at_struct_end_p (ref))
767 return false;
769 ev = analyze_scalar_evolution (loop, *idx);
770 ev = instantiate_parameters (loop, ev);
771 init = initial_condition (ev);
772 step = evolution_part_in_loop_num (ev, loop->num);
774 if (!init || TREE_CODE (init) != INTEGER_CST
775 || (step && TREE_CODE (step) != INTEGER_CST))
776 return false;
778 low = array_ref_low_bound (ref);
779 high = array_ref_up_bound (ref);
781 /* The case of nonconstant bounds could be handled, but it would be
782 complicated. */
783 if (TREE_CODE (low) != INTEGER_CST
784 || !high || TREE_CODE (high) != INTEGER_CST)
785 return false;
787 /* Check if the intial idx is within bound. */
788 if (wi::to_widest (init) < wi::to_widest (low)
789 || wi::to_widest (init) > wi::to_widest (high))
790 return false;
792 /* The idx is always within bound. */
793 if (!step || integer_zerop (step))
794 return true;
796 if (!max_loop_iterations (loop, &niter))
797 return false;
799 if (wi::to_widest (step) < 0)
801 delta = wi::to_widest (init) - wi::to_widest (low);
802 wi_step = -wi::to_widest (step);
804 else
806 delta = wi::to_widest (high) - wi::to_widest (init);
807 wi_step = wi::to_widest (step);
810 valid_niter = wi::div_floor (delta, wi_step, SIGNED, &overflow);
811 /* The iteration space of idx is within array bound. */
812 if (!overflow && niter <= valid_niter)
813 return true;
815 return false;
818 /* Return TRUE if ref is a within bound array reference. */
820 static bool
821 ref_within_array_bound (gimple *stmt, tree ref)
823 struct loop *loop = loop_containing_stmt (stmt);
825 gcc_assert (loop != NULL);
826 return for_each_index (&ref, idx_within_array_bound, loop);
830 /* Given a memory reference expression T, return TRUE if base object
831 it refers to is writable. The base object of a memory reference
832 is the main object being referenced, which is returned by function
833 get_base_address. */
835 static bool
836 base_object_writable (tree ref)
838 tree base_tree = get_base_address (ref);
840 return (base_tree
841 && DECL_P (base_tree)
842 && decl_binds_to_current_def_p (base_tree)
843 && !TREE_READONLY (base_tree));
846 /* Return true when the memory references of STMT won't trap in the
847 if-converted code. There are two things that we have to check for:
849 - writes to memory occur to writable memory: if-conversion of
850 memory writes transforms the conditional memory writes into
851 unconditional writes, i.e. "if (cond) A[i] = foo" is transformed
852 into "A[i] = cond ? foo : A[i]", and as the write to memory may not
853 be executed at all in the original code, it may be a readonly
854 memory. To check that A is not const-qualified, we check that
855 there exists at least an unconditional write to A in the current
856 function.
858 - reads or writes to memory are valid memory accesses for every
859 iteration. To check that the memory accesses are correctly formed
860 and that we are allowed to read and write in these locations, we
861 check that the memory accesses to be if-converted occur at every
862 iteration unconditionally.
864 Returns true for the memory reference in STMT, same memory reference
865 is read or written unconditionally atleast once and the base memory
866 reference is written unconditionally once. This is to check reference
867 will not write fault. Also retuns true if the memory reference is
868 unconditionally read once then we are conditionally writing to memory
869 which is defined as read and write and is bound to the definition
870 we are seeing. */
871 static bool
872 ifcvt_memrefs_wont_trap (gimple *stmt, vec<data_reference_p> drs)
874 /* If DR didn't see a reference here we can't use it to tell
875 whether the ref traps or not. */
876 if (gimple_uid (stmt) == 0)
877 return false;
879 data_reference_p *master_dr, *base_master_dr;
880 data_reference_p a = drs[gimple_uid (stmt) - 1];
882 tree base = DR_BASE_OBJECT (a);
883 innermost_loop_behavior *innermost = &DR_INNERMOST (a);
885 gcc_assert (DR_STMT (a) == stmt);
886 gcc_assert (DR_BASE_ADDRESS (a) || DR_OFFSET (a)
887 || DR_INIT (a) || DR_STEP (a));
889 master_dr = innermost_DR_map->get (innermost);
890 gcc_assert (master_dr != NULL);
892 base_master_dr = baseref_DR_map->get (base);
894 /* If a is unconditionally written to it doesn't trap. */
895 if (DR_W_UNCONDITIONALLY (*master_dr))
896 return true;
898 /* If a is unconditionally accessed then ...
900 Even a is conditional access, we can treat it as an unconditional
901 one if it's an array reference and all its index are within array
902 bound. */
903 if (DR_RW_UNCONDITIONALLY (*master_dr)
904 || ref_within_array_bound (stmt, DR_REF (a)))
906 /* an unconditional read won't trap. */
907 if (DR_IS_READ (a))
908 return true;
910 /* an unconditionaly write won't trap if the base is written
911 to unconditionally. */
912 if (base_master_dr
913 && DR_BASE_W_UNCONDITIONALLY (*base_master_dr))
914 return PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES);
915 /* or the base is known to be not readonly. */
916 else if (base_object_writable (DR_REF (a)))
917 return PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES);
920 return false;
923 /* Return true if STMT could be converted into a masked load or store
924 (conditional load or store based on a mask computed from bb predicate). */
926 static bool
927 ifcvt_can_use_mask_load_store (gimple *stmt)
929 /* Check whether this is a load or store. */
930 tree lhs = gimple_assign_lhs (stmt);
931 bool is_load;
932 tree ref;
933 if (gimple_store_p (stmt))
935 if (!is_gimple_val (gimple_assign_rhs1 (stmt)))
936 return false;
937 is_load = false;
938 ref = lhs;
940 else if (gimple_assign_load_p (stmt))
942 is_load = true;
943 ref = gimple_assign_rhs1 (stmt);
945 else
946 return false;
948 if (may_be_nonaddressable_p (ref))
949 return false;
951 /* Mask should be integer mode of the same size as the load/store
952 mode. */
953 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
954 if (!int_mode_for_mode (mode).exists () || VECTOR_MODE_P (mode))
955 return false;
957 if (can_vec_mask_load_store_p (mode, VOIDmode, is_load))
958 return true;
960 return false;
963 /* Return true if STMT could be converted from an operation that is
964 unconditional to one that is conditional on a bb predicate mask. */
966 static bool
967 ifcvt_can_predicate (gimple *stmt)
969 basic_block bb = gimple_bb (stmt);
971 if (!(flag_tree_loop_vectorize || bb->loop_father->force_vectorize)
972 || bb->loop_father->dont_vectorize
973 || gimple_has_volatile_ops (stmt))
974 return false;
976 if (gimple_assign_single_p (stmt))
977 return ifcvt_can_use_mask_load_store (stmt);
979 tree_code code = gimple_assign_rhs_code (stmt);
980 tree lhs_type = TREE_TYPE (gimple_assign_lhs (stmt));
981 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
982 if (!types_compatible_p (lhs_type, rhs_type))
983 return false;
984 internal_fn cond_fn = get_conditional_internal_fn (code);
985 return (cond_fn != IFN_LAST
986 && vectorized_internal_fn_supported_p (cond_fn, lhs_type));
989 /* Return true when STMT is if-convertible.
991 GIMPLE_ASSIGN statement is not if-convertible if,
992 - it is not movable,
993 - it could trap,
994 - LHS is not var decl. */
996 static bool
997 if_convertible_gimple_assign_stmt_p (gimple *stmt,
998 vec<data_reference_p> refs)
1000 tree lhs = gimple_assign_lhs (stmt);
1002 if (dump_file && (dump_flags & TDF_DETAILS))
1004 fprintf (dump_file, "-------------------------\n");
1005 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1008 if (!is_gimple_reg_type (TREE_TYPE (lhs)))
1009 return false;
1011 /* Some of these constrains might be too conservative. */
1012 if (stmt_ends_bb_p (stmt)
1013 || gimple_has_volatile_ops (stmt)
1014 || (TREE_CODE (lhs) == SSA_NAME
1015 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
1016 || gimple_has_side_effects (stmt))
1018 if (dump_file && (dump_flags & TDF_DETAILS))
1019 fprintf (dump_file, "stmt not suitable for ifcvt\n");
1020 return false;
1023 /* tree-into-ssa.c uses GF_PLF_1, so avoid it, because
1024 in between if_convertible_loop_p and combine_blocks
1025 we can perform loop versioning. */
1026 gimple_set_plf (stmt, GF_PLF_2, false);
1028 if ((! gimple_vuse (stmt)
1029 || gimple_could_trap_p_1 (stmt, false, false)
1030 || ! ifcvt_memrefs_wont_trap (stmt, refs))
1031 && gimple_could_trap_p (stmt))
1033 if (ifcvt_can_predicate (stmt))
1035 gimple_set_plf (stmt, GF_PLF_2, true);
1036 need_to_predicate = true;
1037 return true;
1039 if (dump_file && (dump_flags & TDF_DETAILS))
1040 fprintf (dump_file, "tree could trap...\n");
1041 return false;
1044 /* When if-converting stores force versioning, likewise if we
1045 ended up generating store data races. */
1046 if (gimple_vdef (stmt))
1047 need_to_predicate = true;
1049 return true;
1052 /* Return true when STMT is if-convertible.
1054 A statement is if-convertible if:
1055 - it is an if-convertible GIMPLE_ASSIGN,
1056 - it is a GIMPLE_LABEL or a GIMPLE_COND,
1057 - it is builtins call. */
1059 static bool
1060 if_convertible_stmt_p (gimple *stmt, vec<data_reference_p> refs)
1062 switch (gimple_code (stmt))
1064 case GIMPLE_LABEL:
1065 case GIMPLE_DEBUG:
1066 case GIMPLE_COND:
1067 return true;
1069 case GIMPLE_ASSIGN:
1070 return if_convertible_gimple_assign_stmt_p (stmt, refs);
1072 case GIMPLE_CALL:
1074 tree fndecl = gimple_call_fndecl (stmt);
1075 if (fndecl)
1077 int flags = gimple_call_flags (stmt);
1078 if ((flags & ECF_CONST)
1079 && !(flags & ECF_LOOPING_CONST_OR_PURE)
1080 /* We can only vectorize some builtins at the moment,
1081 so restrict if-conversion to those. */
1082 && fndecl_built_in_p (fndecl))
1083 return true;
1085 return false;
1088 default:
1089 /* Don't know what to do with 'em so don't do anything. */
1090 if (dump_file && (dump_flags & TDF_DETAILS))
1092 fprintf (dump_file, "don't know what to do\n");
1093 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1095 return false;
1098 return true;
1101 /* Assumes that BB has more than 1 predecessors.
1102 Returns false if at least one successor is not on critical edge
1103 and true otherwise. */
1105 static inline bool
1106 all_preds_critical_p (basic_block bb)
1108 edge e;
1109 edge_iterator ei;
1111 FOR_EACH_EDGE (e, ei, bb->preds)
1112 if (EDGE_COUNT (e->src->succs) == 1)
1113 return false;
1114 return true;
1117 /* Return true when BB is if-convertible. This routine does not check
1118 basic block's statements and phis.
1120 A basic block is not if-convertible if:
1121 - it is non-empty and it is after the exit block (in BFS order),
1122 - it is after the exit block but before the latch,
1123 - its edges are not normal.
1125 EXIT_BB is the basic block containing the exit of the LOOP. BB is
1126 inside LOOP. */
1128 static bool
1129 if_convertible_bb_p (struct loop *loop, basic_block bb, basic_block exit_bb)
1131 edge e;
1132 edge_iterator ei;
1134 if (dump_file && (dump_flags & TDF_DETAILS))
1135 fprintf (dump_file, "----------[%d]-------------\n", bb->index);
1137 if (EDGE_COUNT (bb->succs) > 2)
1138 return false;
1140 if (exit_bb)
1142 if (bb != loop->latch)
1144 if (dump_file && (dump_flags & TDF_DETAILS))
1145 fprintf (dump_file, "basic block after exit bb but before latch\n");
1146 return false;
1148 else if (!empty_block_p (bb))
1150 if (dump_file && (dump_flags & TDF_DETAILS))
1151 fprintf (dump_file, "non empty basic block after exit bb\n");
1152 return false;
1154 else if (bb == loop->latch
1155 && bb != exit_bb
1156 && !dominated_by_p (CDI_DOMINATORS, bb, exit_bb))
1158 if (dump_file && (dump_flags & TDF_DETAILS))
1159 fprintf (dump_file, "latch is not dominated by exit_block\n");
1160 return false;
1164 /* Be less adventurous and handle only normal edges. */
1165 FOR_EACH_EDGE (e, ei, bb->succs)
1166 if (e->flags & (EDGE_EH | EDGE_ABNORMAL | EDGE_IRREDUCIBLE_LOOP))
1168 if (dump_file && (dump_flags & TDF_DETAILS))
1169 fprintf (dump_file, "Difficult to handle edges\n");
1170 return false;
1173 return true;
1176 /* Return true when all predecessor blocks of BB are visited. The
1177 VISITED bitmap keeps track of the visited blocks. */
1179 static bool
1180 pred_blocks_visited_p (basic_block bb, bitmap *visited)
1182 edge e;
1183 edge_iterator ei;
1184 FOR_EACH_EDGE (e, ei, bb->preds)
1185 if (!bitmap_bit_p (*visited, e->src->index))
1186 return false;
1188 return true;
1191 /* Get body of a LOOP in suitable order for if-conversion. It is
1192 caller's responsibility to deallocate basic block list.
1193 If-conversion suitable order is, breadth first sort (BFS) order
1194 with an additional constraint: select a block only if all its
1195 predecessors are already selected. */
1197 static basic_block *
1198 get_loop_body_in_if_conv_order (const struct loop *loop)
1200 basic_block *blocks, *blocks_in_bfs_order;
1201 basic_block bb;
1202 bitmap visited;
1203 unsigned int index = 0;
1204 unsigned int visited_count = 0;
1206 gcc_assert (loop->num_nodes);
1207 gcc_assert (loop->latch != EXIT_BLOCK_PTR_FOR_FN (cfun));
1209 blocks = XCNEWVEC (basic_block, loop->num_nodes);
1210 visited = BITMAP_ALLOC (NULL);
1212 blocks_in_bfs_order = get_loop_body_in_bfs_order (loop);
1214 index = 0;
1215 while (index < loop->num_nodes)
1217 bb = blocks_in_bfs_order [index];
1219 if (bb->flags & BB_IRREDUCIBLE_LOOP)
1221 free (blocks_in_bfs_order);
1222 BITMAP_FREE (visited);
1223 free (blocks);
1224 return NULL;
1227 if (!bitmap_bit_p (visited, bb->index))
1229 if (pred_blocks_visited_p (bb, &visited)
1230 || bb == loop->header)
1232 /* This block is now visited. */
1233 bitmap_set_bit (visited, bb->index);
1234 blocks[visited_count++] = bb;
1238 index++;
1240 if (index == loop->num_nodes
1241 && visited_count != loop->num_nodes)
1242 /* Not done yet. */
1243 index = 0;
1245 free (blocks_in_bfs_order);
1246 BITMAP_FREE (visited);
1247 return blocks;
1250 /* Returns true when the analysis of the predicates for all the basic
1251 blocks in LOOP succeeded.
1253 predicate_bbs first allocates the predicates of the basic blocks.
1254 These fields are then initialized with the tree expressions
1255 representing the predicates under which a basic block is executed
1256 in the LOOP. As the loop->header is executed at each iteration, it
1257 has the "true" predicate. Other statements executed under a
1258 condition are predicated with that condition, for example
1260 | if (x)
1261 | S1;
1262 | else
1263 | S2;
1265 S1 will be predicated with "x", and
1266 S2 will be predicated with "!x". */
1268 static void
1269 predicate_bbs (loop_p loop)
1271 unsigned int i;
1273 for (i = 0; i < loop->num_nodes; i++)
1274 init_bb_predicate (ifc_bbs[i]);
1276 for (i = 0; i < loop->num_nodes; i++)
1278 basic_block bb = ifc_bbs[i];
1279 tree cond;
1280 gimple *stmt;
1282 /* The loop latch and loop exit block are always executed and
1283 have no extra conditions to be processed: skip them. */
1284 if (bb == loop->latch
1285 || bb_with_exit_edge_p (loop, bb))
1287 reset_bb_predicate (bb);
1288 continue;
1291 cond = bb_predicate (bb);
1292 stmt = last_stmt (bb);
1293 if (stmt && gimple_code (stmt) == GIMPLE_COND)
1295 tree c2;
1296 edge true_edge, false_edge;
1297 location_t loc = gimple_location (stmt);
1298 tree c = build2_loc (loc, gimple_cond_code (stmt),
1299 boolean_type_node,
1300 gimple_cond_lhs (stmt),
1301 gimple_cond_rhs (stmt));
1303 /* Add new condition into destination's predicate list. */
1304 extract_true_false_edges_from_block (gimple_bb (stmt),
1305 &true_edge, &false_edge);
1307 /* If C is true, then TRUE_EDGE is taken. */
1308 add_to_dst_predicate_list (loop, true_edge, unshare_expr (cond),
1309 unshare_expr (c));
1311 /* If C is false, then FALSE_EDGE is taken. */
1312 c2 = build1_loc (loc, TRUTH_NOT_EXPR, boolean_type_node,
1313 unshare_expr (c));
1314 add_to_dst_predicate_list (loop, false_edge,
1315 unshare_expr (cond), c2);
1317 cond = NULL_TREE;
1320 /* If current bb has only one successor, then consider it as an
1321 unconditional goto. */
1322 if (single_succ_p (bb))
1324 basic_block bb_n = single_succ (bb);
1326 /* The successor bb inherits the predicate of its
1327 predecessor. If there is no predicate in the predecessor
1328 bb, then consider the successor bb as always executed. */
1329 if (cond == NULL_TREE)
1330 cond = boolean_true_node;
1332 add_to_predicate_list (loop, bb_n, cond);
1336 /* The loop header is always executed. */
1337 reset_bb_predicate (loop->header);
1338 gcc_assert (bb_predicate_gimplified_stmts (loop->header) == NULL
1339 && bb_predicate_gimplified_stmts (loop->latch) == NULL);
1342 /* Build region by adding loop pre-header and post-header blocks. */
1344 static vec<basic_block>
1345 build_region (struct loop *loop)
1347 vec<basic_block> region = vNULL;
1348 basic_block exit_bb = NULL;
1350 gcc_assert (ifc_bbs);
1351 /* The first element is loop pre-header. */
1352 region.safe_push (loop_preheader_edge (loop)->src);
1354 for (unsigned int i = 0; i < loop->num_nodes; i++)
1356 basic_block bb = ifc_bbs[i];
1357 region.safe_push (bb);
1358 /* Find loop postheader. */
1359 edge e;
1360 edge_iterator ei;
1361 FOR_EACH_EDGE (e, ei, bb->succs)
1362 if (loop_exit_edge_p (loop, e))
1364 exit_bb = e->dest;
1365 break;
1368 /* The last element is loop post-header. */
1369 gcc_assert (exit_bb);
1370 region.safe_push (exit_bb);
1371 return region;
1374 /* Return true when LOOP is if-convertible. This is a helper function
1375 for if_convertible_loop_p. REFS and DDRS are initialized and freed
1376 in if_convertible_loop_p. */
1378 static bool
1379 if_convertible_loop_p_1 (struct loop *loop, vec<data_reference_p> *refs)
1381 unsigned int i;
1382 basic_block exit_bb = NULL;
1383 vec<basic_block> region;
1385 if (find_data_references_in_loop (loop, refs) == chrec_dont_know)
1386 return false;
1388 calculate_dominance_info (CDI_DOMINATORS);
1390 /* Allow statements that can be handled during if-conversion. */
1391 ifc_bbs = get_loop_body_in_if_conv_order (loop);
1392 if (!ifc_bbs)
1394 if (dump_file && (dump_flags & TDF_DETAILS))
1395 fprintf (dump_file, "Irreducible loop\n");
1396 return false;
1399 for (i = 0; i < loop->num_nodes; i++)
1401 basic_block bb = ifc_bbs[i];
1403 if (!if_convertible_bb_p (loop, bb, exit_bb))
1404 return false;
1406 if (bb_with_exit_edge_p (loop, bb))
1407 exit_bb = bb;
1410 for (i = 0; i < loop->num_nodes; i++)
1412 basic_block bb = ifc_bbs[i];
1413 gimple_stmt_iterator gsi;
1415 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1416 switch (gimple_code (gsi_stmt (gsi)))
1418 case GIMPLE_LABEL:
1419 case GIMPLE_ASSIGN:
1420 case GIMPLE_CALL:
1421 case GIMPLE_DEBUG:
1422 case GIMPLE_COND:
1423 gimple_set_uid (gsi_stmt (gsi), 0);
1424 break;
1425 default:
1426 return false;
1430 data_reference_p dr;
1432 innermost_DR_map
1433 = new hash_map<innermost_loop_behavior_hash, data_reference_p>;
1434 baseref_DR_map = new hash_map<tree_operand_hash, data_reference_p>;
1436 /* Compute post-dominator tree locally. */
1437 region = build_region (loop);
1438 calculate_dominance_info_for_region (CDI_POST_DOMINATORS, region);
1440 predicate_bbs (loop);
1442 /* Free post-dominator tree since it is not used after predication. */
1443 free_dominance_info_for_region (cfun, CDI_POST_DOMINATORS, region);
1444 region.release ();
1446 for (i = 0; refs->iterate (i, &dr); i++)
1448 tree ref = DR_REF (dr);
1450 dr->aux = XNEW (struct ifc_dr);
1451 DR_BASE_W_UNCONDITIONALLY (dr) = false;
1452 DR_RW_UNCONDITIONALLY (dr) = false;
1453 DR_W_UNCONDITIONALLY (dr) = false;
1454 IFC_DR (dr)->rw_predicate = boolean_false_node;
1455 IFC_DR (dr)->w_predicate = boolean_false_node;
1456 IFC_DR (dr)->base_w_predicate = boolean_false_node;
1457 if (gimple_uid (DR_STMT (dr)) == 0)
1458 gimple_set_uid (DR_STMT (dr), i + 1);
1460 /* If DR doesn't have innermost loop behavior or it's a compound
1461 memory reference, we synthesize its innermost loop behavior
1462 for hashing. */
1463 if (TREE_CODE (ref) == COMPONENT_REF
1464 || TREE_CODE (ref) == IMAGPART_EXPR
1465 || TREE_CODE (ref) == REALPART_EXPR
1466 || !(DR_BASE_ADDRESS (dr) || DR_OFFSET (dr)
1467 || DR_INIT (dr) || DR_STEP (dr)))
1469 while (TREE_CODE (ref) == COMPONENT_REF
1470 || TREE_CODE (ref) == IMAGPART_EXPR
1471 || TREE_CODE (ref) == REALPART_EXPR)
1472 ref = TREE_OPERAND (ref, 0);
1474 memset (&DR_INNERMOST (dr), 0, sizeof (DR_INNERMOST (dr)));
1475 DR_BASE_ADDRESS (dr) = ref;
1477 hash_memrefs_baserefs_and_store_DRs_read_written_info (dr);
1480 for (i = 0; i < loop->num_nodes; i++)
1482 basic_block bb = ifc_bbs[i];
1483 gimple_stmt_iterator itr;
1485 /* Check the if-convertibility of statements in predicated BBs. */
1486 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1487 for (itr = gsi_start_bb (bb); !gsi_end_p (itr); gsi_next (&itr))
1488 if (!if_convertible_stmt_p (gsi_stmt (itr), *refs))
1489 return false;
1492 /* Checking PHIs needs to be done after stmts, as the fact whether there
1493 are any masked loads or stores affects the tests. */
1494 for (i = 0; i < loop->num_nodes; i++)
1496 basic_block bb = ifc_bbs[i];
1497 gphi_iterator itr;
1499 for (itr = gsi_start_phis (bb); !gsi_end_p (itr); gsi_next (&itr))
1500 if (!if_convertible_phi_p (loop, bb, itr.phi ()))
1501 return false;
1504 if (dump_file)
1505 fprintf (dump_file, "Applying if-conversion\n");
1507 return true;
1510 /* Return true when LOOP is if-convertible.
1511 LOOP is if-convertible if:
1512 - it is innermost,
1513 - it has two or more basic blocks,
1514 - it has only one exit,
1515 - loop header is not the exit edge,
1516 - if its basic blocks and phi nodes are if convertible. */
1518 static bool
1519 if_convertible_loop_p (struct loop *loop)
1521 edge e;
1522 edge_iterator ei;
1523 bool res = false;
1524 vec<data_reference_p> refs;
1526 /* Handle only innermost loop. */
1527 if (!loop || loop->inner)
1529 if (dump_file && (dump_flags & TDF_DETAILS))
1530 fprintf (dump_file, "not innermost loop\n");
1531 return false;
1534 /* If only one block, no need for if-conversion. */
1535 if (loop->num_nodes <= 2)
1537 if (dump_file && (dump_flags & TDF_DETAILS))
1538 fprintf (dump_file, "less than 2 basic blocks\n");
1539 return false;
1542 /* More than one loop exit is too much to handle. */
1543 if (!single_exit (loop))
1545 if (dump_file && (dump_flags & TDF_DETAILS))
1546 fprintf (dump_file, "multiple exits\n");
1547 return false;
1550 /* If one of the loop header's edge is an exit edge then do not
1551 apply if-conversion. */
1552 FOR_EACH_EDGE (e, ei, loop->header->succs)
1553 if (loop_exit_edge_p (loop, e))
1554 return false;
1556 refs.create (5);
1557 res = if_convertible_loop_p_1 (loop, &refs);
1559 data_reference_p dr;
1560 unsigned int i;
1561 for (i = 0; refs.iterate (i, &dr); i++)
1562 free (dr->aux);
1564 free_data_refs (refs);
1566 delete innermost_DR_map;
1567 innermost_DR_map = NULL;
1569 delete baseref_DR_map;
1570 baseref_DR_map = NULL;
1572 return res;
1575 /* Returns true if def-stmt for phi argument ARG is simple increment/decrement
1576 which is in predicated basic block.
1577 In fact, the following PHI pattern is searching:
1578 loop-header:
1579 reduc_1 = PHI <..., reduc_2>
1581 if (...)
1582 reduc_3 = ...
1583 reduc_2 = PHI <reduc_1, reduc_3>
1585 ARG_0 and ARG_1 are correspondent PHI arguments.
1586 REDUC, OP0 and OP1 contain reduction stmt and its operands.
1587 EXTENDED is true if PHI has > 2 arguments. */
1589 static bool
1590 is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1,
1591 tree *op0, tree *op1, bool extended)
1593 tree lhs, r_op1, r_op2;
1594 gimple *stmt;
1595 gimple *header_phi = NULL;
1596 enum tree_code reduction_op;
1597 basic_block bb = gimple_bb (phi);
1598 struct loop *loop = bb->loop_father;
1599 edge latch_e = loop_latch_edge (loop);
1600 imm_use_iterator imm_iter;
1601 use_operand_p use_p;
1602 edge e;
1603 edge_iterator ei;
1604 bool result = false;
1605 if (TREE_CODE (arg_0) != SSA_NAME || TREE_CODE (arg_1) != SSA_NAME)
1606 return false;
1608 if (!extended && gimple_code (SSA_NAME_DEF_STMT (arg_0)) == GIMPLE_PHI)
1610 lhs = arg_1;
1611 header_phi = SSA_NAME_DEF_STMT (arg_0);
1612 stmt = SSA_NAME_DEF_STMT (arg_1);
1614 else if (gimple_code (SSA_NAME_DEF_STMT (arg_1)) == GIMPLE_PHI)
1616 lhs = arg_0;
1617 header_phi = SSA_NAME_DEF_STMT (arg_1);
1618 stmt = SSA_NAME_DEF_STMT (arg_0);
1620 else
1621 return false;
1622 if (gimple_bb (header_phi) != loop->header)
1623 return false;
1625 if (PHI_ARG_DEF_FROM_EDGE (header_phi, latch_e) != PHI_RESULT (phi))
1626 return false;
1628 if (gimple_code (stmt) != GIMPLE_ASSIGN
1629 || gimple_has_volatile_ops (stmt))
1630 return false;
1632 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
1633 return false;
1635 if (!is_predicated (gimple_bb (stmt)))
1636 return false;
1638 /* Check that stmt-block is predecessor of phi-block. */
1639 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->succs)
1640 if (e->dest == bb)
1642 result = true;
1643 break;
1645 if (!result)
1646 return false;
1648 if (!has_single_use (lhs))
1649 return false;
1651 reduction_op = gimple_assign_rhs_code (stmt);
1652 if (reduction_op != PLUS_EXPR && reduction_op != MINUS_EXPR)
1653 return false;
1654 r_op1 = gimple_assign_rhs1 (stmt);
1655 r_op2 = gimple_assign_rhs2 (stmt);
1657 /* Make R_OP1 to hold reduction variable. */
1658 if (r_op2 == PHI_RESULT (header_phi)
1659 && reduction_op == PLUS_EXPR)
1660 std::swap (r_op1, r_op2);
1661 else if (r_op1 != PHI_RESULT (header_phi))
1662 return false;
1664 /* Check that R_OP1 is used in reduction stmt or in PHI only. */
1665 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_op1)
1667 gimple *use_stmt = USE_STMT (use_p);
1668 if (is_gimple_debug (use_stmt))
1669 continue;
1670 if (use_stmt == stmt)
1671 continue;
1672 if (gimple_code (use_stmt) != GIMPLE_PHI)
1673 return false;
1676 *op0 = r_op1; *op1 = r_op2;
1677 *reduc = stmt;
1678 return true;
1681 /* Converts conditional scalar reduction into unconditional form, e.g.
1682 bb_4
1683 if (_5 != 0) goto bb_5 else goto bb_6
1684 end_bb_4
1685 bb_5
1686 res_6 = res_13 + 1;
1687 end_bb_5
1688 bb_6
1689 # res_2 = PHI <res_13(4), res_6(5)>
1690 end_bb_6
1692 will be converted into sequence
1693 _ifc__1 = _5 != 0 ? 1 : 0;
1694 res_2 = res_13 + _ifc__1;
1695 Argument SWAP tells that arguments of conditional expression should be
1696 swapped.
1697 Returns rhs of resulting PHI assignment. */
1699 static tree
1700 convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi,
1701 tree cond, tree op0, tree op1, bool swap)
1703 gimple_stmt_iterator stmt_it;
1704 gimple *new_assign;
1705 tree rhs;
1706 tree rhs1 = gimple_assign_rhs1 (reduc);
1707 tree tmp = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, "_ifc_");
1708 tree c;
1709 tree zero = build_zero_cst (TREE_TYPE (rhs1));
1711 if (dump_file && (dump_flags & TDF_DETAILS))
1713 fprintf (dump_file, "Found cond scalar reduction.\n");
1714 print_gimple_stmt (dump_file, reduc, 0, TDF_SLIM);
1717 /* Build cond expression using COND and constant operand
1718 of reduction rhs. */
1719 c = fold_build_cond_expr (TREE_TYPE (rhs1),
1720 unshare_expr (cond),
1721 swap ? zero : op1,
1722 swap ? op1 : zero);
1724 /* Create assignment stmt and insert it at GSI. */
1725 new_assign = gimple_build_assign (tmp, c);
1726 gsi_insert_before (gsi, new_assign, GSI_SAME_STMT);
1727 /* Build rhs for unconditional increment/decrement. */
1728 rhs = fold_build2 (gimple_assign_rhs_code (reduc),
1729 TREE_TYPE (rhs1), op0, tmp);
1731 /* Delete original reduction stmt. */
1732 stmt_it = gsi_for_stmt (reduc);
1733 gsi_remove (&stmt_it, true);
1734 release_defs (reduc);
1735 return rhs;
1738 /* Produce condition for all occurrences of ARG in PHI node. */
1740 static tree
1741 gen_phi_arg_condition (gphi *phi, vec<int> *occur,
1742 gimple_stmt_iterator *gsi)
1744 int len;
1745 int i;
1746 tree cond = NULL_TREE;
1747 tree c;
1748 edge e;
1750 len = occur->length ();
1751 gcc_assert (len > 0);
1752 for (i = 0; i < len; i++)
1754 e = gimple_phi_arg_edge (phi, (*occur)[i]);
1755 c = bb_predicate (e->src);
1756 if (is_true_predicate (c))
1758 cond = c;
1759 break;
1761 c = force_gimple_operand_gsi_1 (gsi, unshare_expr (c),
1762 is_gimple_condexpr, NULL_TREE,
1763 true, GSI_SAME_STMT);
1764 if (cond != NULL_TREE)
1766 /* Must build OR expression. */
1767 cond = fold_or_predicates (EXPR_LOCATION (c), c, cond);
1768 cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond),
1769 is_gimple_condexpr, NULL_TREE,
1770 true, GSI_SAME_STMT);
1772 else
1773 cond = c;
1775 gcc_assert (cond != NULL_TREE);
1776 return cond;
1779 /* Local valueization callback that follows all-use SSA edges. */
1781 static tree
1782 ifcvt_follow_ssa_use_edges (tree val)
1784 return val;
1787 /* Replace a scalar PHI node with a COND_EXPR using COND as condition.
1788 This routine can handle PHI nodes with more than two arguments.
1790 For example,
1791 S1: A = PHI <x1(1), x2(5)>
1792 is converted into,
1793 S2: A = cond ? x1 : x2;
1795 The generated code is inserted at GSI that points to the top of
1796 basic block's statement list.
1797 If PHI node has more than two arguments a chain of conditional
1798 expression is produced. */
1801 static void
1802 predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
1804 gimple *new_stmt = NULL, *reduc;
1805 tree rhs, res, arg0, arg1, op0, op1, scev;
1806 tree cond;
1807 unsigned int index0;
1808 unsigned int max, args_len;
1809 edge e;
1810 basic_block bb;
1811 unsigned int i;
1813 res = gimple_phi_result (phi);
1814 if (virtual_operand_p (res))
1815 return;
1817 if ((rhs = degenerate_phi_result (phi))
1818 || ((scev = analyze_scalar_evolution (gimple_bb (phi)->loop_father,
1819 res))
1820 && !chrec_contains_undetermined (scev)
1821 && scev != res
1822 && (rhs = gimple_phi_arg_def (phi, 0))))
1824 if (dump_file && (dump_flags & TDF_DETAILS))
1826 fprintf (dump_file, "Degenerate phi!\n");
1827 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
1829 new_stmt = gimple_build_assign (res, rhs);
1830 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1831 update_stmt (new_stmt);
1832 return;
1835 bb = gimple_bb (phi);
1836 if (EDGE_COUNT (bb->preds) == 2)
1838 /* Predicate ordinary PHI node with 2 arguments. */
1839 edge first_edge, second_edge;
1840 basic_block true_bb;
1841 first_edge = EDGE_PRED (bb, 0);
1842 second_edge = EDGE_PRED (bb, 1);
1843 cond = bb_predicate (first_edge->src);
1844 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1845 std::swap (first_edge, second_edge);
1846 if (EDGE_COUNT (first_edge->src->succs) > 1)
1848 cond = bb_predicate (second_edge->src);
1849 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1850 cond = TREE_OPERAND (cond, 0);
1851 else
1852 first_edge = second_edge;
1854 else
1855 cond = bb_predicate (first_edge->src);
1856 /* Gimplify the condition to a valid cond-expr conditonal operand. */
1857 cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond),
1858 is_gimple_condexpr, NULL_TREE,
1859 true, GSI_SAME_STMT);
1860 true_bb = first_edge->src;
1861 if (EDGE_PRED (bb, 1)->src == true_bb)
1863 arg0 = gimple_phi_arg_def (phi, 1);
1864 arg1 = gimple_phi_arg_def (phi, 0);
1866 else
1868 arg0 = gimple_phi_arg_def (phi, 0);
1869 arg1 = gimple_phi_arg_def (phi, 1);
1871 if (is_cond_scalar_reduction (phi, &reduc, arg0, arg1,
1872 &op0, &op1, false))
1873 /* Convert reduction stmt into vectorizable form. */
1874 rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
1875 true_bb != gimple_bb (reduc));
1876 else
1877 /* Build new RHS using selected condition and arguments. */
1878 rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
1879 arg0, arg1);
1880 new_stmt = gimple_build_assign (res, rhs);
1881 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1882 gimple_stmt_iterator new_gsi = gsi_for_stmt (new_stmt);
1883 if (fold_stmt (&new_gsi, ifcvt_follow_ssa_use_edges))
1885 new_stmt = gsi_stmt (new_gsi);
1886 update_stmt (new_stmt);
1889 if (dump_file && (dump_flags & TDF_DETAILS))
1891 fprintf (dump_file, "new phi replacement stmt\n");
1892 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
1894 return;
1897 /* Create hashmap for PHI node which contain vector of argument indexes
1898 having the same value. */
1899 bool swap = false;
1900 hash_map<tree_operand_hash, auto_vec<int> > phi_arg_map;
1901 unsigned int num_args = gimple_phi_num_args (phi);
1902 int max_ind = -1;
1903 /* Vector of different PHI argument values. */
1904 auto_vec<tree> args (num_args);
1906 /* Compute phi_arg_map. */
1907 for (i = 0; i < num_args; i++)
1909 tree arg;
1911 arg = gimple_phi_arg_def (phi, i);
1912 if (!phi_arg_map.get (arg))
1913 args.quick_push (arg);
1914 phi_arg_map.get_or_insert (arg).safe_push (i);
1917 /* Determine element with max number of occurrences. */
1918 max_ind = -1;
1919 max = 1;
1920 args_len = args.length ();
1921 for (i = 0; i < args_len; i++)
1923 unsigned int len;
1924 if ((len = phi_arg_map.get (args[i])->length ()) > max)
1926 max_ind = (int) i;
1927 max = len;
1931 /* Put element with max number of occurences to the end of ARGS. */
1932 if (max_ind != -1 && max_ind +1 != (int) args_len)
1933 std::swap (args[args_len - 1], args[max_ind]);
1935 /* Handle one special case when number of arguments with different values
1936 is equal 2 and one argument has the only occurrence. Such PHI can be
1937 handled as if would have only 2 arguments. */
1938 if (args_len == 2 && phi_arg_map.get (args[0])->length () == 1)
1940 vec<int> *indexes;
1941 indexes = phi_arg_map.get (args[0]);
1942 index0 = (*indexes)[0];
1943 arg0 = args[0];
1944 arg1 = args[1];
1945 e = gimple_phi_arg_edge (phi, index0);
1946 cond = bb_predicate (e->src);
1947 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1949 swap = true;
1950 cond = TREE_OPERAND (cond, 0);
1952 /* Gimplify the condition to a valid cond-expr conditonal operand. */
1953 cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond),
1954 is_gimple_condexpr, NULL_TREE,
1955 true, GSI_SAME_STMT);
1956 if (!(is_cond_scalar_reduction (phi, &reduc, arg0 , arg1,
1957 &op0, &op1, true)))
1958 rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
1959 swap? arg1 : arg0,
1960 swap? arg0 : arg1);
1961 else
1962 /* Convert reduction stmt into vectorizable form. */
1963 rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
1964 swap);
1965 new_stmt = gimple_build_assign (res, rhs);
1966 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1967 update_stmt (new_stmt);
1969 else
1971 /* Common case. */
1972 vec<int> *indexes;
1973 tree type = TREE_TYPE (gimple_phi_result (phi));
1974 tree lhs;
1975 arg1 = args[1];
1976 for (i = 0; i < args_len; i++)
1978 arg0 = args[i];
1979 indexes = phi_arg_map.get (args[i]);
1980 if (i != args_len - 1)
1981 lhs = make_temp_ssa_name (type, NULL, "_ifc_");
1982 else
1983 lhs = res;
1984 cond = gen_phi_arg_condition (phi, indexes, gsi);
1985 rhs = fold_build_cond_expr (type, unshare_expr (cond),
1986 arg0, arg1);
1987 new_stmt = gimple_build_assign (lhs, rhs);
1988 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1989 update_stmt (new_stmt);
1990 arg1 = lhs;
1994 if (dump_file && (dump_flags & TDF_DETAILS))
1996 fprintf (dump_file, "new extended phi replacement stmt\n");
1997 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
2001 /* Replaces in LOOP all the scalar phi nodes other than those in the
2002 LOOP->header block with conditional modify expressions. */
2004 static void
2005 predicate_all_scalar_phis (struct loop *loop)
2007 basic_block bb;
2008 unsigned int orig_loop_num_nodes = loop->num_nodes;
2009 unsigned int i;
2011 for (i = 1; i < orig_loop_num_nodes; i++)
2013 gphi *phi;
2014 gimple_stmt_iterator gsi;
2015 gphi_iterator phi_gsi;
2016 bb = ifc_bbs[i];
2018 if (bb == loop->header)
2019 continue;
2021 phi_gsi = gsi_start_phis (bb);
2022 if (gsi_end_p (phi_gsi))
2023 continue;
2025 gsi = gsi_after_labels (bb);
2026 while (!gsi_end_p (phi_gsi))
2028 phi = phi_gsi.phi ();
2029 if (virtual_operand_p (gimple_phi_result (phi)))
2030 gsi_next (&phi_gsi);
2031 else
2033 predicate_scalar_phi (phi, &gsi);
2034 remove_phi_node (&phi_gsi, false);
2040 /* Insert in each basic block of LOOP the statements produced by the
2041 gimplification of the predicates. */
2043 static void
2044 insert_gimplified_predicates (loop_p loop)
2046 unsigned int i;
2048 for (i = 0; i < loop->num_nodes; i++)
2050 basic_block bb = ifc_bbs[i];
2051 gimple_seq stmts;
2052 if (!is_predicated (bb))
2053 gcc_assert (bb_predicate_gimplified_stmts (bb) == NULL);
2054 if (!is_predicated (bb))
2056 /* Do not insert statements for a basic block that is not
2057 predicated. Also make sure that the predicate of the
2058 basic block is set to true. */
2059 reset_bb_predicate (bb);
2060 continue;
2063 stmts = bb_predicate_gimplified_stmts (bb);
2064 if (stmts)
2066 if (need_to_predicate)
2068 /* Insert the predicate of the BB just after the label,
2069 as the if-conversion of memory writes will use this
2070 predicate. */
2071 gimple_stmt_iterator gsi = gsi_after_labels (bb);
2072 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2074 else
2076 /* Insert the predicate of the BB at the end of the BB
2077 as this would reduce the register pressure: the only
2078 use of this predicate will be in successor BBs. */
2079 gimple_stmt_iterator gsi = gsi_last_bb (bb);
2081 if (gsi_end_p (gsi)
2082 || stmt_ends_bb_p (gsi_stmt (gsi)))
2083 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2084 else
2085 gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
2088 /* Once the sequence is code generated, set it to NULL. */
2089 set_bb_predicate_gimplified_stmts (bb, NULL);
2094 /* Helper function for predicate_statements. Returns index of existent
2095 mask if it was created for given SIZE and -1 otherwise. */
2097 static int
2098 mask_exists (int size, vec<int> vec)
2100 unsigned int ix;
2101 int v;
2102 FOR_EACH_VEC_ELT (vec, ix, v)
2103 if (v == size)
2104 return (int) ix;
2105 return -1;
2108 /* Helper function for predicate_statements. STMT is a memory read or
2109 write and it needs to be predicated by MASK. Return a statement
2110 that does so. */
2112 static gimple *
2113 predicate_load_or_store (gimple_stmt_iterator *gsi, gassign *stmt, tree mask)
2115 gcall *new_stmt;
2117 tree lhs = gimple_assign_lhs (stmt);
2118 tree rhs = gimple_assign_rhs1 (stmt);
2119 tree ref = TREE_CODE (lhs) == SSA_NAME ? rhs : lhs;
2120 mark_addressable (ref);
2121 tree addr = force_gimple_operand_gsi (gsi, build_fold_addr_expr (ref),
2122 true, NULL_TREE, true, GSI_SAME_STMT);
2123 tree ptr = build_int_cst (reference_alias_ptr_type (ref),
2124 get_object_alignment (ref));
2125 /* Copy points-to info if possible. */
2126 if (TREE_CODE (addr) == SSA_NAME && !SSA_NAME_PTR_INFO (addr))
2127 copy_ref_info (build2 (MEM_REF, TREE_TYPE (ref), addr, ptr),
2128 ref);
2129 if (TREE_CODE (lhs) == SSA_NAME)
2131 new_stmt
2132 = gimple_build_call_internal (IFN_MASK_LOAD, 3, addr,
2133 ptr, mask);
2134 gimple_call_set_lhs (new_stmt, lhs);
2135 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
2137 else
2139 new_stmt
2140 = gimple_build_call_internal (IFN_MASK_STORE, 4, addr, ptr,
2141 mask, rhs);
2142 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
2143 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
2144 SSA_NAME_DEF_STMT (gimple_vdef (new_stmt)) = new_stmt;
2146 gimple_call_set_nothrow (new_stmt, true);
2147 return new_stmt;
2150 /* STMT uses OP_LHS. Check whether it is equivalent to:
2152 ... = OP_MASK ? OP_LHS : X;
2154 Return X if so, otherwise return null. OP_MASK is an SSA_NAME that is
2155 known to have value OP_COND. */
2157 static tree
2158 check_redundant_cond_expr (gimple *stmt, tree op_mask, tree op_cond,
2159 tree op_lhs)
2161 gassign *assign = dyn_cast <gassign *> (stmt);
2162 if (!assign || gimple_assign_rhs_code (assign) != COND_EXPR)
2163 return NULL_TREE;
2165 tree use_cond = gimple_assign_rhs1 (assign);
2166 tree if_true = gimple_assign_rhs2 (assign);
2167 tree if_false = gimple_assign_rhs3 (assign);
2169 if ((use_cond == op_mask || operand_equal_p (use_cond, op_cond, 0))
2170 && if_true == op_lhs)
2171 return if_false;
2173 if (inverse_conditions_p (use_cond, op_cond) && if_false == op_lhs)
2174 return if_true;
2176 return NULL_TREE;
2179 /* Return true if VALUE is available for use at STMT. SSA_NAMES is
2180 the set of SSA names defined earlier in STMT's block. */
2182 static bool
2183 value_available_p (gimple *stmt, hash_set<tree_ssa_name_hash> *ssa_names,
2184 tree value)
2186 if (is_gimple_min_invariant (value))
2187 return true;
2189 if (TREE_CODE (value) == SSA_NAME)
2191 if (SSA_NAME_IS_DEFAULT_DEF (value))
2192 return true;
2194 basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (value));
2195 basic_block use_bb = gimple_bb (stmt);
2196 return (def_bb == use_bb
2197 ? ssa_names->contains (value)
2198 : dominated_by_p (CDI_DOMINATORS, use_bb, def_bb));
2201 return false;
2204 /* Helper function for predicate_statements. STMT is a potentially-trapping
2205 arithmetic operation that needs to be predicated by MASK, an SSA_NAME that
2206 has value COND. Return a statement that does so. SSA_NAMES is the set of
2207 SSA names defined earlier in STMT's block. */
2209 static gimple *
2210 predicate_rhs_code (gassign *stmt, tree mask, tree cond,
2211 hash_set<tree_ssa_name_hash> *ssa_names)
2213 tree lhs = gimple_assign_lhs (stmt);
2214 tree_code code = gimple_assign_rhs_code (stmt);
2215 unsigned int nops = gimple_num_ops (stmt);
2216 internal_fn cond_fn = get_conditional_internal_fn (code);
2218 /* Construct the arguments to the conditional internal function. */
2219 auto_vec<tree, 8> args;
2220 args.safe_grow (nops + 1);
2221 args[0] = mask;
2222 for (unsigned int i = 1; i < nops; ++i)
2223 args[i] = gimple_op (stmt, i);
2224 args[nops] = NULL_TREE;
2226 /* Look for uses of the result to see whether they are COND_EXPRs that can
2227 be folded into the conditional call. */
2228 imm_use_iterator imm_iter;
2229 gimple *use_stmt;
2230 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs)
2232 tree new_else = check_redundant_cond_expr (use_stmt, mask, cond, lhs);
2233 if (new_else && value_available_p (stmt, ssa_names, new_else))
2235 if (!args[nops])
2236 args[nops] = new_else;
2237 if (operand_equal_p (new_else, args[nops], 0))
2239 /* We have:
2241 LHS = IFN_COND (MASK, ..., ELSE);
2242 X = MASK ? LHS : ELSE;
2244 which makes X equivalent to LHS. */
2245 tree use_lhs = gimple_assign_lhs (use_stmt);
2246 redundant_ssa_names.safe_push (std::make_pair (use_lhs, lhs));
2250 if (!args[nops])
2251 args[nops] = targetm.preferred_else_value (cond_fn, TREE_TYPE (lhs),
2252 nops - 1, &args[1]);
2254 /* Create and insert the call. */
2255 gcall *new_stmt = gimple_build_call_internal_vec (cond_fn, args);
2256 gimple_call_set_lhs (new_stmt, lhs);
2257 gimple_call_set_nothrow (new_stmt, true);
2259 return new_stmt;
2262 /* Predicate each write to memory in LOOP.
2264 This function transforms control flow constructs containing memory
2265 writes of the form:
2267 | for (i = 0; i < N; i++)
2268 | if (cond)
2269 | A[i] = expr;
2271 into the following form that does not contain control flow:
2273 | for (i = 0; i < N; i++)
2274 | A[i] = cond ? expr : A[i];
2276 The original CFG looks like this:
2278 | bb_0
2279 | i = 0
2280 | end_bb_0
2282 | bb_1
2283 | if (i < N) goto bb_5 else goto bb_2
2284 | end_bb_1
2286 | bb_2
2287 | cond = some_computation;
2288 | if (cond) goto bb_3 else goto bb_4
2289 | end_bb_2
2291 | bb_3
2292 | A[i] = expr;
2293 | goto bb_4
2294 | end_bb_3
2296 | bb_4
2297 | goto bb_1
2298 | end_bb_4
2300 insert_gimplified_predicates inserts the computation of the COND
2301 expression at the beginning of the destination basic block:
2303 | bb_0
2304 | i = 0
2305 | end_bb_0
2307 | bb_1
2308 | if (i < N) goto bb_5 else goto bb_2
2309 | end_bb_1
2311 | bb_2
2312 | cond = some_computation;
2313 | if (cond) goto bb_3 else goto bb_4
2314 | end_bb_2
2316 | bb_3
2317 | cond = some_computation;
2318 | A[i] = expr;
2319 | goto bb_4
2320 | end_bb_3
2322 | bb_4
2323 | goto bb_1
2324 | end_bb_4
2326 predicate_statements is then predicating the memory write as follows:
2328 | bb_0
2329 | i = 0
2330 | end_bb_0
2332 | bb_1
2333 | if (i < N) goto bb_5 else goto bb_2
2334 | end_bb_1
2336 | bb_2
2337 | if (cond) goto bb_3 else goto bb_4
2338 | end_bb_2
2340 | bb_3
2341 | cond = some_computation;
2342 | A[i] = cond ? expr : A[i];
2343 | goto bb_4
2344 | end_bb_3
2346 | bb_4
2347 | goto bb_1
2348 | end_bb_4
2350 and finally combine_blocks removes the basic block boundaries making
2351 the loop vectorizable:
2353 | bb_0
2354 | i = 0
2355 | if (i < N) goto bb_5 else goto bb_1
2356 | end_bb_0
2358 | bb_1
2359 | cond = some_computation;
2360 | A[i] = cond ? expr : A[i];
2361 | if (i < N) goto bb_5 else goto bb_4
2362 | end_bb_1
2364 | bb_4
2365 | goto bb_1
2366 | end_bb_4
2369 static void
2370 predicate_statements (loop_p loop)
2372 unsigned int i, orig_loop_num_nodes = loop->num_nodes;
2373 auto_vec<int, 1> vect_sizes;
2374 auto_vec<tree, 1> vect_masks;
2375 hash_set<tree_ssa_name_hash> ssa_names;
2377 for (i = 1; i < orig_loop_num_nodes; i++)
2379 gimple_stmt_iterator gsi;
2380 basic_block bb = ifc_bbs[i];
2381 tree cond = bb_predicate (bb);
2382 bool swap;
2383 int index;
2385 if (is_true_predicate (cond))
2386 continue;
2388 swap = false;
2389 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2391 swap = true;
2392 cond = TREE_OPERAND (cond, 0);
2395 vect_sizes.truncate (0);
2396 vect_masks.truncate (0);
2398 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
2400 gassign *stmt = dyn_cast <gassign *> (gsi_stmt (gsi));
2401 if (!stmt)
2403 else if (is_false_predicate (cond)
2404 && gimple_vdef (stmt))
2406 unlink_stmt_vdef (stmt);
2407 gsi_remove (&gsi, true);
2408 release_defs (stmt);
2409 continue;
2411 else if (gimple_plf (stmt, GF_PLF_2))
2413 tree lhs = gimple_assign_lhs (stmt);
2414 tree mask;
2415 gimple *new_stmt;
2416 gimple_seq stmts = NULL;
2417 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
2418 /* We checked before setting GF_PLF_2 that an equivalent
2419 integer mode exists. */
2420 int bitsize = GET_MODE_BITSIZE (mode).to_constant ();
2421 if (!vect_sizes.is_empty ()
2422 && (index = mask_exists (bitsize, vect_sizes)) != -1)
2423 /* Use created mask. */
2424 mask = vect_masks[index];
2425 else
2427 if (COMPARISON_CLASS_P (cond))
2428 mask = gimple_build (&stmts, TREE_CODE (cond),
2429 boolean_type_node,
2430 TREE_OPERAND (cond, 0),
2431 TREE_OPERAND (cond, 1));
2432 else
2433 mask = cond;
2435 if (swap)
2437 tree true_val
2438 = constant_boolean_node (true, TREE_TYPE (mask));
2439 mask = gimple_build (&stmts, BIT_XOR_EXPR,
2440 TREE_TYPE (mask), mask, true_val);
2442 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2444 /* Save mask and its size for further use. */
2445 vect_sizes.safe_push (bitsize);
2446 vect_masks.safe_push (mask);
2448 if (gimple_assign_single_p (stmt))
2449 new_stmt = predicate_load_or_store (&gsi, stmt, mask);
2450 else
2451 new_stmt = predicate_rhs_code (stmt, mask, cond, &ssa_names);
2453 gsi_replace (&gsi, new_stmt, true);
2455 else if (gimple_vdef (stmt))
2457 tree lhs = gimple_assign_lhs (stmt);
2458 tree rhs = gimple_assign_rhs1 (stmt);
2459 tree type = TREE_TYPE (lhs);
2461 lhs = ifc_temp_var (type, unshare_expr (lhs), &gsi);
2462 rhs = ifc_temp_var (type, unshare_expr (rhs), &gsi);
2463 if (swap)
2464 std::swap (lhs, rhs);
2465 cond = force_gimple_operand_gsi_1 (&gsi, unshare_expr (cond),
2466 is_gimple_condexpr, NULL_TREE,
2467 true, GSI_SAME_STMT);
2468 rhs = fold_build_cond_expr (type, unshare_expr (cond), rhs, lhs);
2469 gimple_assign_set_rhs1 (stmt, ifc_temp_var (type, rhs, &gsi));
2470 update_stmt (stmt);
2472 tree lhs = gimple_get_lhs (gsi_stmt (gsi));
2473 if (lhs && TREE_CODE (lhs) == SSA_NAME)
2474 ssa_names.add (lhs);
2475 gsi_next (&gsi);
2477 ssa_names.empty ();
2481 /* Remove all GIMPLE_CONDs and GIMPLE_LABELs of all the basic blocks
2482 other than the exit and latch of the LOOP. Also resets the
2483 GIMPLE_DEBUG information. */
2485 static void
2486 remove_conditions_and_labels (loop_p loop)
2488 gimple_stmt_iterator gsi;
2489 unsigned int i;
2491 for (i = 0; i < loop->num_nodes; i++)
2493 basic_block bb = ifc_bbs[i];
2495 if (bb_with_exit_edge_p (loop, bb)
2496 || bb == loop->latch)
2497 continue;
2499 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
2500 switch (gimple_code (gsi_stmt (gsi)))
2502 case GIMPLE_COND:
2503 case GIMPLE_LABEL:
2504 gsi_remove (&gsi, true);
2505 break;
2507 case GIMPLE_DEBUG:
2508 /* ??? Should there be conditional GIMPLE_DEBUG_BINDs? */
2509 if (gimple_debug_bind_p (gsi_stmt (gsi)))
2511 gimple_debug_bind_reset_value (gsi_stmt (gsi));
2512 update_stmt (gsi_stmt (gsi));
2514 gsi_next (&gsi);
2515 break;
2517 default:
2518 gsi_next (&gsi);
2523 /* Combine all the basic blocks from LOOP into one or two super basic
2524 blocks. Replace PHI nodes with conditional modify expressions. */
2526 static void
2527 combine_blocks (struct loop *loop)
2529 basic_block bb, exit_bb, merge_target_bb;
2530 unsigned int orig_loop_num_nodes = loop->num_nodes;
2531 unsigned int i;
2532 edge e;
2533 edge_iterator ei;
2535 remove_conditions_and_labels (loop);
2536 insert_gimplified_predicates (loop);
2537 predicate_all_scalar_phis (loop);
2539 if (need_to_predicate)
2540 predicate_statements (loop);
2542 /* Merge basic blocks: first remove all the edges in the loop,
2543 except for those from the exit block. */
2544 exit_bb = NULL;
2545 bool *predicated = XNEWVEC (bool, orig_loop_num_nodes);
2546 for (i = 0; i < orig_loop_num_nodes; i++)
2548 bb = ifc_bbs[i];
2549 predicated[i] = !is_true_predicate (bb_predicate (bb));
2550 free_bb_predicate (bb);
2551 if (bb_with_exit_edge_p (loop, bb))
2553 gcc_assert (exit_bb == NULL);
2554 exit_bb = bb;
2557 gcc_assert (exit_bb != loop->latch);
2559 for (i = 1; i < orig_loop_num_nodes; i++)
2561 bb = ifc_bbs[i];
2563 for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));)
2565 if (e->src == exit_bb)
2566 ei_next (&ei);
2567 else
2568 remove_edge (e);
2572 if (exit_bb != NULL)
2574 if (exit_bb != loop->header)
2576 /* Connect this node to loop header. */
2577 make_single_succ_edge (loop->header, exit_bb, EDGE_FALLTHRU);
2578 set_immediate_dominator (CDI_DOMINATORS, exit_bb, loop->header);
2581 /* Redirect non-exit edges to loop->latch. */
2582 FOR_EACH_EDGE (e, ei, exit_bb->succs)
2584 if (!loop_exit_edge_p (loop, e))
2585 redirect_edge_and_branch (e, loop->latch);
2587 set_immediate_dominator (CDI_DOMINATORS, loop->latch, exit_bb);
2589 else
2591 /* If the loop does not have an exit, reconnect header and latch. */
2592 make_edge (loop->header, loop->latch, EDGE_FALLTHRU);
2593 set_immediate_dominator (CDI_DOMINATORS, loop->latch, loop->header);
2596 merge_target_bb = loop->header;
2598 /* Get at the virtual def valid for uses starting at the first block
2599 we merge into the header. Without a virtual PHI the loop has the
2600 same virtual use on all stmts. */
2601 gphi *vphi = get_virtual_phi (loop->header);
2602 tree last_vdef = NULL_TREE;
2603 if (vphi)
2605 last_vdef = gimple_phi_result (vphi);
2606 for (gimple_stmt_iterator gsi = gsi_start_bb (loop->header);
2607 ! gsi_end_p (gsi); gsi_next (&gsi))
2608 if (gimple_vdef (gsi_stmt (gsi)))
2609 last_vdef = gimple_vdef (gsi_stmt (gsi));
2611 for (i = 1; i < orig_loop_num_nodes; i++)
2613 gimple_stmt_iterator gsi;
2614 gimple_stmt_iterator last;
2616 bb = ifc_bbs[i];
2618 if (bb == exit_bb || bb == loop->latch)
2619 continue;
2621 /* We release virtual PHIs late because we have to propagate them
2622 out using the current VUSE. The def might be the one used
2623 after the loop. */
2624 vphi = get_virtual_phi (bb);
2625 if (vphi)
2627 imm_use_iterator iter;
2628 use_operand_p use_p;
2629 gimple *use_stmt;
2630 FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi))
2632 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2633 SET_USE (use_p, last_vdef);
2635 gsi = gsi_for_stmt (vphi);
2636 remove_phi_node (&gsi, true);
2639 /* Make stmts member of loop->header and clear range info from all stmts
2640 in BB which is now no longer executed conditional on a predicate we
2641 could have derived it from. */
2642 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2644 gimple *stmt = gsi_stmt (gsi);
2645 gimple_set_bb (stmt, merge_target_bb);
2646 /* Update virtual operands. */
2647 if (last_vdef)
2649 use_operand_p use_p = ssa_vuse_operand (stmt);
2650 if (use_p
2651 && USE_FROM_PTR (use_p) != last_vdef)
2652 SET_USE (use_p, last_vdef);
2653 if (gimple_vdef (stmt))
2654 last_vdef = gimple_vdef (stmt);
2656 if (predicated[i])
2658 ssa_op_iter i;
2659 tree op;
2660 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF)
2661 reset_flow_sensitive_info (op);
2665 /* Update stmt list. */
2666 last = gsi_last_bb (merge_target_bb);
2667 gsi_insert_seq_after_without_update (&last, bb_seq (bb), GSI_NEW_STMT);
2668 set_bb_seq (bb, NULL);
2670 delete_basic_block (bb);
2673 /* If possible, merge loop header to the block with the exit edge.
2674 This reduces the number of basic blocks to two, to please the
2675 vectorizer that handles only loops with two nodes. */
2676 if (exit_bb
2677 && exit_bb != loop->header)
2679 /* We release virtual PHIs late because we have to propagate them
2680 out using the current VUSE. The def might be the one used
2681 after the loop. */
2682 vphi = get_virtual_phi (exit_bb);
2683 if (vphi)
2685 imm_use_iterator iter;
2686 use_operand_p use_p;
2687 gimple *use_stmt;
2688 FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi))
2690 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2691 SET_USE (use_p, last_vdef);
2693 gimple_stmt_iterator gsi = gsi_for_stmt (vphi);
2694 remove_phi_node (&gsi, true);
2697 if (can_merge_blocks_p (loop->header, exit_bb))
2698 merge_blocks (loop->header, exit_bb);
2701 free (ifc_bbs);
2702 ifc_bbs = NULL;
2703 free (predicated);
2706 /* Version LOOP before if-converting it; the original loop
2707 will be if-converted, the new copy of the loop will not,
2708 and the LOOP_VECTORIZED internal call will be guarding which
2709 loop to execute. The vectorizer pass will fold this
2710 internal call into either true or false.
2712 Note that this function intentionally invalidates profile. Both edges
2713 out of LOOP_VECTORIZED must have 100% probability so the profile remains
2714 consistent after the condition is folded in the vectorizer. */
2716 static struct loop *
2717 version_loop_for_if_conversion (struct loop *loop)
2719 basic_block cond_bb;
2720 tree cond = make_ssa_name (boolean_type_node);
2721 struct loop *new_loop;
2722 gimple *g;
2723 gimple_stmt_iterator gsi;
2724 unsigned int save_length;
2726 g = gimple_build_call_internal (IFN_LOOP_VECTORIZED, 2,
2727 build_int_cst (integer_type_node, loop->num),
2728 integer_zero_node);
2729 gimple_call_set_lhs (g, cond);
2731 /* Save BB->aux around loop_version as that uses the same field. */
2732 save_length = loop->inner ? loop->inner->num_nodes : loop->num_nodes;
2733 void **saved_preds = XALLOCAVEC (void *, save_length);
2734 for (unsigned i = 0; i < save_length; i++)
2735 saved_preds[i] = ifc_bbs[i]->aux;
2737 initialize_original_copy_tables ();
2738 /* At this point we invalidate porfile confistency until IFN_LOOP_VECTORIZED
2739 is re-merged in the vectorizer. */
2740 new_loop = loop_version (loop, cond, &cond_bb,
2741 profile_probability::always (),
2742 profile_probability::always (),
2743 profile_probability::always (),
2744 profile_probability::always (), true);
2745 free_original_copy_tables ();
2747 for (unsigned i = 0; i < save_length; i++)
2748 ifc_bbs[i]->aux = saved_preds[i];
2750 if (new_loop == NULL)
2751 return NULL;
2753 new_loop->dont_vectorize = true;
2754 new_loop->force_vectorize = false;
2755 gsi = gsi_last_bb (cond_bb);
2756 gimple_call_set_arg (g, 1, build_int_cst (integer_type_node, new_loop->num));
2757 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2758 update_ssa (TODO_update_ssa);
2759 return new_loop;
2762 /* Return true when LOOP satisfies the follow conditions that will
2763 allow it to be recognized by the vectorizer for outer-loop
2764 vectorization:
2765 - The loop is not the root node of the loop tree.
2766 - The loop has exactly one inner loop.
2767 - The loop has a single exit.
2768 - The loop header has a single successor, which is the inner
2769 loop header.
2770 - Each of the inner and outer loop latches have a single
2771 predecessor.
2772 - The loop exit block has a single predecessor, which is the
2773 inner loop's exit block. */
2775 static bool
2776 versionable_outer_loop_p (struct loop *loop)
2778 if (!loop_outer (loop)
2779 || loop->dont_vectorize
2780 || !loop->inner
2781 || loop->inner->next
2782 || !single_exit (loop)
2783 || !single_succ_p (loop->header)
2784 || single_succ (loop->header) != loop->inner->header
2785 || !single_pred_p (loop->latch)
2786 || !single_pred_p (loop->inner->latch))
2787 return false;
2789 basic_block outer_exit = single_pred (loop->latch);
2790 basic_block inner_exit = single_pred (loop->inner->latch);
2792 if (!single_pred_p (outer_exit) || single_pred (outer_exit) != inner_exit)
2793 return false;
2795 if (dump_file)
2796 fprintf (dump_file, "Found vectorizable outer loop for versioning\n");
2798 return true;
2801 /* Performs splitting of critical edges. Skip splitting and return false
2802 if LOOP will not be converted because:
2804 - LOOP is not well formed.
2805 - LOOP has PHI with more than MAX_PHI_ARG_NUM arguments.
2807 Last restriction is valid only if AGGRESSIVE_IF_CONV is false. */
2809 static bool
2810 ifcvt_split_critical_edges (struct loop *loop, bool aggressive_if_conv)
2812 basic_block *body;
2813 basic_block bb;
2814 unsigned int num = loop->num_nodes;
2815 unsigned int i;
2816 gimple *stmt;
2817 edge e;
2818 edge_iterator ei;
2819 auto_vec<edge> critical_edges;
2821 /* Loop is not well formed. */
2822 if (num <= 2 || loop->inner || !single_exit (loop))
2823 return false;
2825 body = get_loop_body (loop);
2826 for (i = 0; i < num; i++)
2828 bb = body[i];
2829 if (!aggressive_if_conv
2830 && phi_nodes (bb)
2831 && EDGE_COUNT (bb->preds) > MAX_PHI_ARG_NUM)
2833 if (dump_file && (dump_flags & TDF_DETAILS))
2834 fprintf (dump_file,
2835 "BB %d has complicated PHI with more than %u args.\n",
2836 bb->index, MAX_PHI_ARG_NUM);
2838 free (body);
2839 return false;
2841 if (bb == loop->latch || bb_with_exit_edge_p (loop, bb))
2842 continue;
2844 stmt = last_stmt (bb);
2845 /* Skip basic blocks not ending with conditional branch. */
2846 if (!stmt || gimple_code (stmt) != GIMPLE_COND)
2847 continue;
2849 FOR_EACH_EDGE (e, ei, bb->succs)
2850 if (EDGE_CRITICAL_P (e) && e->dest->loop_father == loop)
2851 critical_edges.safe_push (e);
2853 free (body);
2855 while (critical_edges.length () > 0)
2857 e = critical_edges.pop ();
2858 /* Don't split if bb can be predicated along non-critical edge. */
2859 if (EDGE_COUNT (e->dest->preds) > 2 || all_preds_critical_p (e->dest))
2860 split_edge (e);
2863 return true;
2866 /* Delete redundant statements produced by predication which prevents
2867 loop vectorization. */
2869 static void
2870 ifcvt_local_dce (basic_block bb)
2872 gimple *stmt;
2873 gimple *stmt1;
2874 gimple *phi;
2875 gimple_stmt_iterator gsi;
2876 auto_vec<gimple *> worklist;
2877 enum gimple_code code;
2878 use_operand_p use_p;
2879 imm_use_iterator imm_iter;
2880 std::pair <tree, tree> *name_pair;
2881 unsigned int i;
2883 FOR_EACH_VEC_ELT (redundant_ssa_names, i, name_pair)
2884 replace_uses_by (name_pair->first, name_pair->second);
2885 redundant_ssa_names.release ();
2887 worklist.create (64);
2888 /* Consider all phi as live statements. */
2889 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2891 phi = gsi_stmt (gsi);
2892 gimple_set_plf (phi, GF_PLF_2, true);
2893 worklist.safe_push (phi);
2895 /* Consider load/store statements, CALL and COND as live. */
2896 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2898 stmt = gsi_stmt (gsi);
2899 if (gimple_store_p (stmt)
2900 || gimple_assign_load_p (stmt)
2901 || is_gimple_debug (stmt))
2903 gimple_set_plf (stmt, GF_PLF_2, true);
2904 worklist.safe_push (stmt);
2905 continue;
2907 code = gimple_code (stmt);
2908 if (code == GIMPLE_COND || code == GIMPLE_CALL)
2910 gimple_set_plf (stmt, GF_PLF_2, true);
2911 worklist.safe_push (stmt);
2912 continue;
2914 gimple_set_plf (stmt, GF_PLF_2, false);
2916 if (code == GIMPLE_ASSIGN)
2918 tree lhs = gimple_assign_lhs (stmt);
2919 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2921 stmt1 = USE_STMT (use_p);
2922 if (gimple_bb (stmt1) != bb)
2924 gimple_set_plf (stmt, GF_PLF_2, true);
2925 worklist.safe_push (stmt);
2926 break;
2931 /* Propagate liveness through arguments of live stmt. */
2932 while (worklist.length () > 0)
2934 ssa_op_iter iter;
2935 use_operand_p use_p;
2936 tree use;
2938 stmt = worklist.pop ();
2939 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2941 use = USE_FROM_PTR (use_p);
2942 if (TREE_CODE (use) != SSA_NAME)
2943 continue;
2944 stmt1 = SSA_NAME_DEF_STMT (use);
2945 if (gimple_bb (stmt1) != bb
2946 || gimple_plf (stmt1, GF_PLF_2))
2947 continue;
2948 gimple_set_plf (stmt1, GF_PLF_2, true);
2949 worklist.safe_push (stmt1);
2952 /* Delete dead statements. */
2953 gsi = gsi_start_bb (bb);
2954 while (!gsi_end_p (gsi))
2956 stmt = gsi_stmt (gsi);
2957 if (gimple_plf (stmt, GF_PLF_2))
2959 gsi_next (&gsi);
2960 continue;
2962 if (dump_file && (dump_flags & TDF_DETAILS))
2964 fprintf (dump_file, "Delete dead stmt in bb#%d\n", bb->index);
2965 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2967 gsi_remove (&gsi, true);
2968 release_defs (stmt);
2972 /* If-convert LOOP when it is legal. For the moment this pass has no
2973 profitability analysis. Returns non-zero todo flags when something
2974 changed. */
2976 unsigned int
2977 tree_if_conversion (struct loop *loop)
2979 unsigned int todo = 0;
2980 bool aggressive_if_conv;
2981 struct loop *rloop;
2983 again:
2984 rloop = NULL;
2985 ifc_bbs = NULL;
2986 need_to_predicate = false;
2987 any_complicated_phi = false;
2989 /* Apply more aggressive if-conversion when loop or its outer loop were
2990 marked with simd pragma. When that's the case, we try to if-convert
2991 loop containing PHIs with more than MAX_PHI_ARG_NUM arguments. */
2992 aggressive_if_conv = loop->force_vectorize;
2993 if (!aggressive_if_conv)
2995 struct loop *outer_loop = loop_outer (loop);
2996 if (outer_loop && outer_loop->force_vectorize)
2997 aggressive_if_conv = true;
3000 if (!ifcvt_split_critical_edges (loop, aggressive_if_conv))
3001 goto cleanup;
3003 if (!if_convertible_loop_p (loop)
3004 || !dbg_cnt (if_conversion_tree))
3005 goto cleanup;
3007 if ((need_to_predicate || any_complicated_phi)
3008 && ((!flag_tree_loop_vectorize && !loop->force_vectorize)
3009 || loop->dont_vectorize))
3010 goto cleanup;
3012 /* Since we have no cost model, always version loops unless the user
3013 specified -ftree-loop-if-convert or unless versioning is required.
3014 Either version this loop, or if the pattern is right for outer-loop
3015 vectorization, version the outer loop. In the latter case we will
3016 still if-convert the original inner loop. */
3017 if (need_to_predicate
3018 || any_complicated_phi
3019 || flag_tree_loop_if_convert != 1)
3021 struct loop *vloop
3022 = (versionable_outer_loop_p (loop_outer (loop))
3023 ? loop_outer (loop) : loop);
3024 struct loop *nloop = version_loop_for_if_conversion (vloop);
3025 if (nloop == NULL)
3026 goto cleanup;
3027 if (vloop != loop)
3029 /* If versionable_outer_loop_p decided to version the
3030 outer loop, version also the inner loop of the non-vectorized
3031 loop copy. So we transform:
3032 loop1
3033 loop2
3034 into:
3035 if (LOOP_VECTORIZED (1, 3))
3037 loop1
3038 loop2
3040 else
3041 loop3 (copy of loop1)
3042 if (LOOP_VECTORIZED (4, 5))
3043 loop4 (copy of loop2)
3044 else
3045 loop5 (copy of loop4) */
3046 gcc_assert (nloop->inner && nloop->inner->next == NULL);
3047 rloop = nloop->inner;
3051 /* Now all statements are if-convertible. Combine all the basic
3052 blocks into one huge basic block doing the if-conversion
3053 on-the-fly. */
3054 combine_blocks (loop);
3056 /* Delete dead predicate computations. */
3057 ifcvt_local_dce (loop->header);
3059 todo |= TODO_cleanup_cfg;
3061 cleanup:
3062 if (ifc_bbs)
3064 unsigned int i;
3066 for (i = 0; i < loop->num_nodes; i++)
3067 free_bb_predicate (ifc_bbs[i]);
3069 free (ifc_bbs);
3070 ifc_bbs = NULL;
3072 if (rloop != NULL)
3074 loop = rloop;
3075 goto again;
3078 return todo;
3081 /* Tree if-conversion pass management. */
3083 namespace {
3085 const pass_data pass_data_if_conversion =
3087 GIMPLE_PASS, /* type */
3088 "ifcvt", /* name */
3089 OPTGROUP_NONE, /* optinfo_flags */
3090 TV_TREE_LOOP_IFCVT, /* tv_id */
3091 ( PROP_cfg | PROP_ssa ), /* properties_required */
3092 0, /* properties_provided */
3093 0, /* properties_destroyed */
3094 0, /* todo_flags_start */
3095 0, /* todo_flags_finish */
3098 class pass_if_conversion : public gimple_opt_pass
3100 public:
3101 pass_if_conversion (gcc::context *ctxt)
3102 : gimple_opt_pass (pass_data_if_conversion, ctxt)
3105 /* opt_pass methods: */
3106 virtual bool gate (function *);
3107 virtual unsigned int execute (function *);
3109 }; // class pass_if_conversion
3111 bool
3112 pass_if_conversion::gate (function *fun)
3114 return (((flag_tree_loop_vectorize || fun->has_force_vectorize_loops)
3115 && flag_tree_loop_if_convert != 0)
3116 || flag_tree_loop_if_convert == 1);
3119 unsigned int
3120 pass_if_conversion::execute (function *fun)
3122 struct loop *loop;
3123 unsigned todo = 0;
3125 if (number_of_loops (fun) <= 1)
3126 return 0;
3128 FOR_EACH_LOOP (loop, 0)
3129 if (flag_tree_loop_if_convert == 1
3130 || ((flag_tree_loop_vectorize || loop->force_vectorize)
3131 && !loop->dont_vectorize))
3132 todo |= tree_if_conversion (loop);
3134 if (todo)
3136 free_numbers_of_iterations_estimates (fun);
3137 scev_reset ();
3140 if (flag_checking)
3142 basic_block bb;
3143 FOR_EACH_BB_FN (bb, fun)
3144 gcc_assert (!bb->aux);
3147 return todo;
3150 } // anon namespace
3152 gimple_opt_pass *
3153 make_pass_if_conversion (gcc::context *ctxt)
3155 return new pass_if_conversion (ctxt);