S/390: Testsuite: Add asm scan patterns for -m31.
[official-gcc.git] / gcc / tree-if-conv.c
blobf43942ddff0a842e520587e01c2e0e50e6649bd9
1 /* If-conversion for vectorizer.
2 Copyright (C) 2004-2015 Free Software Foundation, Inc.
3 Contributed by Devang Patel <dpatel@apple.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass implements a tree level if-conversion of loops. Its
22 initial goal is to help the vectorizer to vectorize loops with
23 conditions.
25 A short description of if-conversion:
27 o Decide if a loop is if-convertible or not.
28 o Walk all loop basic blocks in breadth first order (BFS order).
29 o Remove conditional statements (at the end of basic block)
30 and propagate condition into destination basic blocks'
31 predicate list.
32 o Replace modify expression with conditional modify expression
33 using current basic block's condition.
34 o Merge all basic blocks
35 o Replace phi nodes with conditional modify expr
36 o Merge all basic blocks into header
38 Sample transformation:
40 INPUT
41 -----
43 # i_23 = PHI <0(0), i_18(10)>;
44 <L0>:;
45 j_15 = A[i_23];
46 if (j_15 > 41) goto <L1>; else goto <L17>;
48 <L17>:;
49 goto <bb 3> (<L3>);
51 <L1>:;
53 # iftmp.2_4 = PHI <0(8), 42(2)>;
54 <L3>:;
55 A[i_23] = iftmp.2_4;
56 i_18 = i_23 + 1;
57 if (i_18 <= 15) goto <L19>; else goto <L18>;
59 <L19>:;
60 goto <bb 1> (<L0>);
62 <L18>:;
64 OUTPUT
65 ------
67 # i_23 = PHI <0(0), i_18(10)>;
68 <L0>:;
69 j_15 = A[i_23];
71 <L3>:;
72 iftmp.2_4 = j_15 > 41 ? 42 : 0;
73 A[i_23] = iftmp.2_4;
74 i_18 = i_23 + 1;
75 if (i_18 <= 15) goto <L19>; else goto <L18>;
77 <L19>:;
78 goto <bb 1> (<L0>);
80 <L18>:;
83 #include "config.h"
84 #include "system.h"
85 #include "coretypes.h"
86 #include "backend.h"
87 #include "rtl.h"
88 #include "tree.h"
89 #include "gimple.h"
90 #include "cfghooks.h"
91 #include "tree-pass.h"
92 #include "ssa.h"
93 #include "expmed.h"
94 #include "optabs-query.h"
95 #include "gimple-pretty-print.h"
96 #include "alias.h"
97 #include "fold-const.h"
98 #include "stor-layout.h"
99 #include "gimple-fold.h"
100 #include "gimplify.h"
101 #include "gimple-iterator.h"
102 #include "gimplify-me.h"
103 #include "tree-cfg.h"
104 #include "tree-into-ssa.h"
105 #include "tree-ssa.h"
106 #include "cfgloop.h"
107 #include "tree-data-ref.h"
108 #include "tree-scalar-evolution.h"
109 #include "tree-ssa-loop-ivopts.h"
110 #include "tree-ssa-address.h"
111 #include "dbgcnt.h"
112 #include "tree-hash-traits.h"
113 #include "varasm.h"
115 /* List of basic blocks in if-conversion-suitable order. */
116 static basic_block *ifc_bbs;
118 /* Apply more aggressive (extended) if-conversion if true. */
119 static bool aggressive_if_conv;
121 /* Hash table to store references, DR pairs. */
122 static hash_map<tree_operand_hash, data_reference_p> *ref_DR_map;
124 /* Hash table to store base reference, DR pairs. */
125 static hash_map<tree_operand_hash, data_reference_p> *baseref_DR_map;
127 /* Structure used to predicate basic blocks. This is attached to the
128 ->aux field of the BBs in the loop to be if-converted. */
129 struct bb_predicate {
131 /* The condition under which this basic block is executed. */
132 tree predicate;
134 /* PREDICATE is gimplified, and the sequence of statements is
135 recorded here, in order to avoid the duplication of computations
136 that occur in previous conditions. See PR44483. */
137 gimple_seq predicate_gimplified_stmts;
140 /* Returns true when the basic block BB has a predicate. */
142 static inline bool
143 bb_has_predicate (basic_block bb)
145 return bb->aux != NULL;
148 /* Returns the gimplified predicate for basic block BB. */
150 static inline tree
151 bb_predicate (basic_block bb)
153 return ((struct bb_predicate *) bb->aux)->predicate;
156 /* Sets the gimplified predicate COND for basic block BB. */
158 static inline void
159 set_bb_predicate (basic_block bb, tree cond)
161 gcc_assert ((TREE_CODE (cond) == TRUTH_NOT_EXPR
162 && is_gimple_condexpr (TREE_OPERAND (cond, 0)))
163 || is_gimple_condexpr (cond));
164 ((struct bb_predicate *) bb->aux)->predicate = cond;
167 /* Returns the sequence of statements of the gimplification of the
168 predicate for basic block BB. */
170 static inline gimple_seq
171 bb_predicate_gimplified_stmts (basic_block bb)
173 return ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts;
176 /* Sets the sequence of statements STMTS of the gimplification of the
177 predicate for basic block BB. */
179 static inline void
180 set_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
182 ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts = stmts;
185 /* Adds the sequence of statements STMTS to the sequence of statements
186 of the predicate for basic block BB. */
188 static inline void
189 add_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
191 gimple_seq_add_seq
192 (&(((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts), stmts);
195 /* Initializes to TRUE the predicate of basic block BB. */
197 static inline void
198 init_bb_predicate (basic_block bb)
200 bb->aux = XNEW (struct bb_predicate);
201 set_bb_predicate_gimplified_stmts (bb, NULL);
202 set_bb_predicate (bb, boolean_true_node);
205 /* Release the SSA_NAMEs associated with the predicate of basic block BB,
206 but don't actually free it. */
208 static inline void
209 release_bb_predicate (basic_block bb)
211 gimple_seq stmts = bb_predicate_gimplified_stmts (bb);
212 if (stmts)
214 gimple_stmt_iterator i;
216 for (i = gsi_start (stmts); !gsi_end_p (i); gsi_next (&i))
217 free_stmt_operands (cfun, gsi_stmt (i));
218 set_bb_predicate_gimplified_stmts (bb, NULL);
222 /* Free the predicate of basic block BB. */
224 static inline void
225 free_bb_predicate (basic_block bb)
227 if (!bb_has_predicate (bb))
228 return;
230 release_bb_predicate (bb);
231 free (bb->aux);
232 bb->aux = NULL;
235 /* Reinitialize predicate of BB with the true predicate. */
237 static inline void
238 reset_bb_predicate (basic_block bb)
240 if (!bb_has_predicate (bb))
241 init_bb_predicate (bb);
242 else
244 release_bb_predicate (bb);
245 set_bb_predicate (bb, boolean_true_node);
249 /* Returns a new SSA_NAME of type TYPE that is assigned the value of
250 the expression EXPR. Inserts the statement created for this
251 computation before GSI and leaves the iterator GSI at the same
252 statement. */
254 static tree
255 ifc_temp_var (tree type, tree expr, gimple_stmt_iterator *gsi)
257 tree new_name = make_temp_ssa_name (type, NULL, "_ifc_");
258 gimple *stmt = gimple_build_assign (new_name, expr);
259 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
260 return new_name;
263 /* Return true when COND is a true predicate. */
265 static inline bool
266 is_true_predicate (tree cond)
268 return (cond == NULL_TREE
269 || cond == boolean_true_node
270 || integer_onep (cond));
273 /* Returns true when BB has a predicate that is not trivial: true or
274 NULL_TREE. */
276 static inline bool
277 is_predicated (basic_block bb)
279 return !is_true_predicate (bb_predicate (bb));
282 /* Parses the predicate COND and returns its comparison code and
283 operands OP0 and OP1. */
285 static enum tree_code
286 parse_predicate (tree cond, tree *op0, tree *op1)
288 gimple *s;
290 if (TREE_CODE (cond) == SSA_NAME
291 && is_gimple_assign (s = SSA_NAME_DEF_STMT (cond)))
293 if (TREE_CODE_CLASS (gimple_assign_rhs_code (s)) == tcc_comparison)
295 *op0 = gimple_assign_rhs1 (s);
296 *op1 = gimple_assign_rhs2 (s);
297 return gimple_assign_rhs_code (s);
300 else if (gimple_assign_rhs_code (s) == TRUTH_NOT_EXPR)
302 tree op = gimple_assign_rhs1 (s);
303 tree type = TREE_TYPE (op);
304 enum tree_code code = parse_predicate (op, op0, op1);
306 return code == ERROR_MARK ? ERROR_MARK
307 : invert_tree_comparison (code, HONOR_NANS (type));
310 return ERROR_MARK;
313 if (COMPARISON_CLASS_P (cond))
315 *op0 = TREE_OPERAND (cond, 0);
316 *op1 = TREE_OPERAND (cond, 1);
317 return TREE_CODE (cond);
320 return ERROR_MARK;
323 /* Returns the fold of predicate C1 OR C2 at location LOC. */
325 static tree
326 fold_or_predicates (location_t loc, tree c1, tree c2)
328 tree op1a, op1b, op2a, op2b;
329 enum tree_code code1 = parse_predicate (c1, &op1a, &op1b);
330 enum tree_code code2 = parse_predicate (c2, &op2a, &op2b);
332 if (code1 != ERROR_MARK && code2 != ERROR_MARK)
334 tree t = maybe_fold_or_comparisons (code1, op1a, op1b,
335 code2, op2a, op2b);
336 if (t)
337 return t;
340 return fold_build2_loc (loc, TRUTH_OR_EXPR, boolean_type_node, c1, c2);
343 /* Returns true if N is either a constant or a SSA_NAME. */
345 static bool
346 constant_or_ssa_name (tree n)
348 switch (TREE_CODE (n))
350 case SSA_NAME:
351 case INTEGER_CST:
352 case REAL_CST:
353 case COMPLEX_CST:
354 case VECTOR_CST:
355 return true;
356 default:
357 return false;
361 /* Returns either a COND_EXPR or the folded expression if the folded
362 expression is a MIN_EXPR, a MAX_EXPR, an ABS_EXPR,
363 a constant or a SSA_NAME. */
365 static tree
366 fold_build_cond_expr (tree type, tree cond, tree rhs, tree lhs)
368 tree rhs1, lhs1, cond_expr;
370 /* If COND is comparison r != 0 and r has boolean type, convert COND
371 to SSA_NAME to accept by vect bool pattern. */
372 if (TREE_CODE (cond) == NE_EXPR)
374 tree op0 = TREE_OPERAND (cond, 0);
375 tree op1 = TREE_OPERAND (cond, 1);
376 if (TREE_CODE (op0) == SSA_NAME
377 && TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE
378 && (integer_zerop (op1)))
379 cond = op0;
381 cond_expr = fold_ternary (COND_EXPR, type, cond,
382 rhs, lhs);
384 if (cond_expr == NULL_TREE)
385 return build3 (COND_EXPR, type, cond, rhs, lhs);
387 STRIP_USELESS_TYPE_CONVERSION (cond_expr);
389 if (constant_or_ssa_name (cond_expr))
390 return cond_expr;
392 if (TREE_CODE (cond_expr) == ABS_EXPR)
394 rhs1 = TREE_OPERAND (cond_expr, 1);
395 STRIP_USELESS_TYPE_CONVERSION (rhs1);
396 if (constant_or_ssa_name (rhs1))
397 return build1 (ABS_EXPR, type, rhs1);
400 if (TREE_CODE (cond_expr) == MIN_EXPR
401 || TREE_CODE (cond_expr) == MAX_EXPR)
403 lhs1 = TREE_OPERAND (cond_expr, 0);
404 STRIP_USELESS_TYPE_CONVERSION (lhs1);
405 rhs1 = TREE_OPERAND (cond_expr, 1);
406 STRIP_USELESS_TYPE_CONVERSION (rhs1);
407 if (constant_or_ssa_name (rhs1)
408 && constant_or_ssa_name (lhs1))
409 return build2 (TREE_CODE (cond_expr), type, lhs1, rhs1);
411 return build3 (COND_EXPR, type, cond, rhs, lhs);
414 /* Add condition NC to the predicate list of basic block BB. LOOP is
415 the loop to be if-converted. Use predicate of cd-equivalent block
416 for join bb if it exists: we call basic blocks bb1 and bb2
417 cd-equivalent if they are executed under the same condition. */
419 static inline void
420 add_to_predicate_list (struct loop *loop, basic_block bb, tree nc)
422 tree bc, *tp;
423 basic_block dom_bb;
425 if (is_true_predicate (nc))
426 return;
428 /* If dominance tells us this basic block is always executed,
429 don't record any predicates for it. */
430 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
431 return;
433 dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb);
434 /* We use notion of cd equivalence to get simpler predicate for
435 join block, e.g. if join block has 2 predecessors with predicates
436 p1 & p2 and p1 & !p2, we'd like to get p1 for it instead of
437 p1 & p2 | p1 & !p2. */
438 if (dom_bb != loop->header
439 && get_immediate_dominator (CDI_POST_DOMINATORS, dom_bb) == bb)
441 gcc_assert (flow_bb_inside_loop_p (loop, dom_bb));
442 bc = bb_predicate (dom_bb);
443 if (!is_true_predicate (bc))
444 set_bb_predicate (bb, bc);
445 else
446 gcc_assert (is_true_predicate (bb_predicate (bb)));
447 if (dump_file && (dump_flags & TDF_DETAILS))
448 fprintf (dump_file, "Use predicate of bb#%d for bb#%d\n",
449 dom_bb->index, bb->index);
450 return;
453 if (!is_predicated (bb))
454 bc = nc;
455 else
457 bc = bb_predicate (bb);
458 bc = fold_or_predicates (EXPR_LOCATION (bc), nc, bc);
459 if (is_true_predicate (bc))
461 reset_bb_predicate (bb);
462 return;
466 /* Allow a TRUTH_NOT_EXPR around the main predicate. */
467 if (TREE_CODE (bc) == TRUTH_NOT_EXPR)
468 tp = &TREE_OPERAND (bc, 0);
469 else
470 tp = &bc;
471 if (!is_gimple_condexpr (*tp))
473 gimple_seq stmts;
474 *tp = force_gimple_operand_1 (*tp, &stmts, is_gimple_condexpr, NULL_TREE);
475 add_bb_predicate_gimplified_stmts (bb, stmts);
477 set_bb_predicate (bb, bc);
480 /* Add the condition COND to the previous condition PREV_COND, and add
481 this to the predicate list of the destination of edge E. LOOP is
482 the loop to be if-converted. */
484 static void
485 add_to_dst_predicate_list (struct loop *loop, edge e,
486 tree prev_cond, tree cond)
488 if (!flow_bb_inside_loop_p (loop, e->dest))
489 return;
491 if (!is_true_predicate (prev_cond))
492 cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
493 prev_cond, cond);
495 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, e->dest))
496 add_to_predicate_list (loop, e->dest, cond);
499 /* Return true if one of the successor edges of BB exits LOOP. */
501 static bool
502 bb_with_exit_edge_p (struct loop *loop, basic_block bb)
504 edge e;
505 edge_iterator ei;
507 FOR_EACH_EDGE (e, ei, bb->succs)
508 if (loop_exit_edge_p (loop, e))
509 return true;
511 return false;
514 /* Return true when PHI is if-convertible. PHI is part of loop LOOP
515 and it belongs to basic block BB.
517 PHI is not if-convertible if:
518 - it has more than 2 arguments.
520 When the flag_tree_loop_if_convert_stores is not set, PHI is not
521 if-convertible if:
522 - a virtual PHI is immediately used in another PHI node,
523 - there is a virtual PHI in a BB other than the loop->header.
524 When the aggressive_if_conv is set, PHI can have more than
525 two arguments. */
527 static bool
528 if_convertible_phi_p (struct loop *loop, basic_block bb, gphi *phi,
529 bool any_mask_load_store)
531 if (dump_file && (dump_flags & TDF_DETAILS))
533 fprintf (dump_file, "-------------------------\n");
534 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
537 if (bb != loop->header)
539 if (gimple_phi_num_args (phi) != 2
540 && !aggressive_if_conv)
542 if (dump_file && (dump_flags & TDF_DETAILS))
543 fprintf (dump_file, "More than two phi node args.\n");
544 return false;
548 if (flag_tree_loop_if_convert_stores || any_mask_load_store)
549 return true;
551 /* When the flag_tree_loop_if_convert_stores is not set, check
552 that there are no memory writes in the branches of the loop to be
553 if-converted. */
554 if (virtual_operand_p (gimple_phi_result (phi)))
556 imm_use_iterator imm_iter;
557 use_operand_p use_p;
559 if (bb != loop->header)
561 if (dump_file && (dump_flags & TDF_DETAILS))
562 fprintf (dump_file, "Virtual phi not on loop->header.\n");
563 return false;
566 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_phi_result (phi))
568 if (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI
569 && USE_STMT (use_p) != phi)
571 if (dump_file && (dump_flags & TDF_DETAILS))
572 fprintf (dump_file, "Difficult to handle this virtual phi.\n");
573 return false;
578 return true;
581 /* Records the status of a data reference. This struct is attached to
582 each DR->aux field. */
584 struct ifc_dr {
585 /* -1 when not initialized, 0 when false, 1 when true. */
586 int written_at_least_once;
588 /* -1 when not initialized, 0 when false, 1 when true. */
589 int rw_unconditionally;
591 tree predicate;
593 tree base_predicate;
596 #define IFC_DR(DR) ((struct ifc_dr *) (DR)->aux)
597 #define DR_WRITTEN_AT_LEAST_ONCE(DR) (IFC_DR (DR)->written_at_least_once)
598 #define DR_RW_UNCONDITIONALLY(DR) (IFC_DR (DR)->rw_unconditionally)
600 /* Iterates over DR's and stores refs, DR and base refs, DR pairs in
601 HASH tables. While storing them in HASH table, it checks if the
602 reference is unconditionally read or written and stores that as a flag
603 information. For base reference it checks if it is written atlest once
604 unconditionally and stores it as flag information along with DR.
605 In other words for every data reference A in STMT there exist other
606 accesses to a data reference with the same base with predicates that
607 add up (OR-up) to the true predicate: this ensures that the data
608 reference A is touched (read or written) on every iteration of the
609 if-converted loop. */
610 static void
611 hash_memrefs_baserefs_and_store_DRs_read_written_info (data_reference_p a)
614 data_reference_p *master_dr, *base_master_dr;
615 tree ref = DR_REF (a);
616 tree base_ref = DR_BASE_OBJECT (a);
617 tree ca = bb_predicate (gimple_bb (DR_STMT (a)));
618 bool exist1, exist2;
620 while (TREE_CODE (ref) == COMPONENT_REF
621 || TREE_CODE (ref) == IMAGPART_EXPR
622 || TREE_CODE (ref) == REALPART_EXPR)
623 ref = TREE_OPERAND (ref, 0);
625 master_dr = &ref_DR_map->get_or_insert (ref, &exist1);
627 if (!exist1)
629 IFC_DR (a)->predicate = ca;
630 *master_dr = a;
632 else
633 IFC_DR (*master_dr)->predicate
634 = fold_or_predicates
635 (EXPR_LOCATION (IFC_DR (*master_dr)->predicate),
636 ca, IFC_DR (*master_dr)->predicate);
638 if (is_true_predicate (IFC_DR (*master_dr)->predicate))
639 DR_RW_UNCONDITIONALLY (*master_dr) = 1;
641 if (DR_IS_WRITE (a))
643 base_master_dr = &baseref_DR_map->get_or_insert (base_ref, &exist2);
645 if (!exist2)
647 IFC_DR (a)->base_predicate = ca;
648 *base_master_dr = a;
650 else
651 IFC_DR (*base_master_dr)->base_predicate
652 = fold_or_predicates
653 (EXPR_LOCATION (IFC_DR (*base_master_dr)->base_predicate),
654 ca, IFC_DR (*base_master_dr)->base_predicate);
656 if (is_true_predicate (IFC_DR (*base_master_dr)->base_predicate))
657 DR_WRITTEN_AT_LEAST_ONCE (*base_master_dr) = 1;
661 /* Return true when the memory references of STMT won't trap in the
662 if-converted code. There are two things that we have to check for:
664 - writes to memory occur to writable memory: if-conversion of
665 memory writes transforms the conditional memory writes into
666 unconditional writes, i.e. "if (cond) A[i] = foo" is transformed
667 into "A[i] = cond ? foo : A[i]", and as the write to memory may not
668 be executed at all in the original code, it may be a readonly
669 memory. To check that A is not const-qualified, we check that
670 there exists at least an unconditional write to A in the current
671 function.
673 - reads or writes to memory are valid memory accesses for every
674 iteration. To check that the memory accesses are correctly formed
675 and that we are allowed to read and write in these locations, we
676 check that the memory accesses to be if-converted occur at every
677 iteration unconditionally.
679 Returns true for the memory reference in STMT, same memory reference
680 is read or written unconditionally atleast once and the base memory
681 reference is written unconditionally once. This is to check reference
682 will not write fault. Also retuns true if the memory reference is
683 unconditionally read once then we are conditionally writing to memory
684 which is defined as read and write and is bound to the definition
685 we are seeing. */
686 static bool
687 ifcvt_memrefs_wont_trap (gimple *stmt, vec<data_reference_p> drs)
689 data_reference_p *master_dr, *base_master_dr;
690 data_reference_p a = drs[gimple_uid (stmt) - 1];
692 tree ref_base_a = DR_REF (a);
693 tree base = DR_BASE_OBJECT (a);
695 gcc_assert (DR_STMT (a) == stmt);
697 while (TREE_CODE (ref_base_a) == COMPONENT_REF
698 || TREE_CODE (ref_base_a) == IMAGPART_EXPR
699 || TREE_CODE (ref_base_a) == REALPART_EXPR)
700 ref_base_a = TREE_OPERAND (ref_base_a, 0);
702 master_dr = ref_DR_map->get (ref_base_a);
703 base_master_dr = baseref_DR_map->get (base);
705 gcc_assert (master_dr != NULL);
707 if (DR_RW_UNCONDITIONALLY (*master_dr) == 1)
709 if (base_master_dr
710 && DR_WRITTEN_AT_LEAST_ONCE (*base_master_dr) == 1)
711 return true;
712 else
714 tree base_tree = get_base_address (DR_REF (a));
715 if (DECL_P (base_tree)
716 && decl_binds_to_current_def_p (base_tree)
717 && flag_tree_loop_if_convert_stores
718 && !TREE_READONLY (base_tree))
719 return true;
722 return false;
725 /* Wrapper around gimple_could_trap_p refined for the needs of the
726 if-conversion. Try to prove that the memory accesses of STMT could
727 not trap in the innermost loop containing STMT. */
729 static bool
730 ifcvt_could_trap_p (gimple *stmt, vec<data_reference_p> refs)
732 if (gimple_vuse (stmt)
733 && !gimple_could_trap_p_1 (stmt, false, false)
734 && ifcvt_memrefs_wont_trap (stmt, refs))
735 return false;
737 return gimple_could_trap_p (stmt);
740 /* Return true if STMT could be converted into a masked load or store
741 (conditional load or store based on a mask computed from bb predicate). */
743 static bool
744 ifcvt_can_use_mask_load_store (gimple *stmt)
746 tree lhs, ref;
747 machine_mode mode;
748 basic_block bb = gimple_bb (stmt);
749 bool is_load;
751 if (!(flag_tree_loop_vectorize || bb->loop_father->force_vectorize)
752 || bb->loop_father->dont_vectorize
753 || !gimple_assign_single_p (stmt)
754 || gimple_has_volatile_ops (stmt))
755 return false;
757 /* Check whether this is a load or store. */
758 lhs = gimple_assign_lhs (stmt);
759 if (gimple_store_p (stmt))
761 if (!is_gimple_val (gimple_assign_rhs1 (stmt)))
762 return false;
763 is_load = false;
764 ref = lhs;
766 else if (gimple_assign_load_p (stmt))
768 is_load = true;
769 ref = gimple_assign_rhs1 (stmt);
771 else
772 return false;
774 if (may_be_nonaddressable_p (ref))
775 return false;
777 /* Mask should be integer mode of the same size as the load/store
778 mode. */
779 mode = TYPE_MODE (TREE_TYPE (lhs));
780 if (int_mode_for_mode (mode) == BLKmode
781 || VECTOR_MODE_P (mode))
782 return false;
784 if (can_vec_mask_load_store_p (mode, VOIDmode, is_load))
785 return true;
787 return false;
790 /* Return true when STMT is if-convertible.
792 GIMPLE_ASSIGN statement is not if-convertible if,
793 - it is not movable,
794 - it could trap,
795 - LHS is not var decl. */
797 static bool
798 if_convertible_gimple_assign_stmt_p (gimple *stmt,
799 vec<data_reference_p> refs,
800 bool *any_mask_load_store)
802 tree lhs = gimple_assign_lhs (stmt);
803 basic_block bb;
805 if (dump_file && (dump_flags & TDF_DETAILS))
807 fprintf (dump_file, "-------------------------\n");
808 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
811 if (!is_gimple_reg_type (TREE_TYPE (lhs)))
812 return false;
814 /* Some of these constrains might be too conservative. */
815 if (stmt_ends_bb_p (stmt)
816 || gimple_has_volatile_ops (stmt)
817 || (TREE_CODE (lhs) == SSA_NAME
818 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
819 || gimple_has_side_effects (stmt))
821 if (dump_file && (dump_flags & TDF_DETAILS))
822 fprintf (dump_file, "stmt not suitable for ifcvt\n");
823 return false;
826 /* tree-into-ssa.c uses GF_PLF_1, so avoid it, because
827 in between if_convertible_loop_p and combine_blocks
828 we can perform loop versioning. */
829 gimple_set_plf (stmt, GF_PLF_2, false);
831 if (flag_tree_loop_if_convert_stores)
833 if (ifcvt_could_trap_p (stmt, refs))
835 if (ifcvt_can_use_mask_load_store (stmt))
837 gimple_set_plf (stmt, GF_PLF_2, true);
838 *any_mask_load_store = true;
839 return true;
841 if (dump_file && (dump_flags & TDF_DETAILS))
842 fprintf (dump_file, "tree could trap...\n");
843 return false;
845 return true;
848 if (ifcvt_could_trap_p (stmt, refs))
850 if (ifcvt_can_use_mask_load_store (stmt))
852 gimple_set_plf (stmt, GF_PLF_2, true);
853 *any_mask_load_store = true;
854 return true;
856 if (dump_file && (dump_flags & TDF_DETAILS))
857 fprintf (dump_file, "tree could trap...\n");
858 return false;
861 bb = gimple_bb (stmt);
863 if (TREE_CODE (lhs) != SSA_NAME
864 && bb != bb->loop_father->header
865 && !bb_with_exit_edge_p (bb->loop_father, bb))
867 if (ifcvt_can_use_mask_load_store (stmt))
869 gimple_set_plf (stmt, GF_PLF_2, true);
870 *any_mask_load_store = true;
871 return true;
873 if (dump_file && (dump_flags & TDF_DETAILS))
875 fprintf (dump_file, "LHS is not var\n");
876 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
878 return false;
881 return true;
884 /* Return true when STMT is if-convertible.
886 A statement is if-convertible if:
887 - it is an if-convertible GIMPLE_ASSIGN,
888 - it is a GIMPLE_LABEL or a GIMPLE_COND,
889 - it is builtins call. */
891 static bool
892 if_convertible_stmt_p (gimple *stmt, vec<data_reference_p> refs,
893 bool *any_mask_load_store)
895 switch (gimple_code (stmt))
897 case GIMPLE_LABEL:
898 case GIMPLE_DEBUG:
899 case GIMPLE_COND:
900 return true;
902 case GIMPLE_ASSIGN:
903 return if_convertible_gimple_assign_stmt_p (stmt, refs,
904 any_mask_load_store);
906 case GIMPLE_CALL:
908 tree fndecl = gimple_call_fndecl (stmt);
909 if (fndecl)
911 int flags = gimple_call_flags (stmt);
912 if ((flags & ECF_CONST)
913 && !(flags & ECF_LOOPING_CONST_OR_PURE)
914 /* We can only vectorize some builtins at the moment,
915 so restrict if-conversion to those. */
916 && DECL_BUILT_IN (fndecl))
917 return true;
919 return false;
922 default:
923 /* Don't know what to do with 'em so don't do anything. */
924 if (dump_file && (dump_flags & TDF_DETAILS))
926 fprintf (dump_file, "don't know what to do\n");
927 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
929 return false;
930 break;
933 return true;
936 /* Assumes that BB has more than 1 predecessors.
937 Returns false if at least one successor is not on critical edge
938 and true otherwise. */
940 static inline bool
941 all_preds_critical_p (basic_block bb)
943 edge e;
944 edge_iterator ei;
946 FOR_EACH_EDGE (e, ei, bb->preds)
947 if (EDGE_COUNT (e->src->succs) == 1)
948 return false;
949 return true;
952 /* Returns true if at least one successor in on critical edge. */
953 static inline bool
954 has_pred_critical_p (basic_block bb)
956 edge e;
957 edge_iterator ei;
959 FOR_EACH_EDGE (e, ei, bb->preds)
960 if (EDGE_COUNT (e->src->succs) > 1)
961 return true;
962 return false;
965 /* Return true when BB is if-convertible. This routine does not check
966 basic block's statements and phis.
968 A basic block is not if-convertible if:
969 - it is non-empty and it is after the exit block (in BFS order),
970 - it is after the exit block but before the latch,
971 - its edges are not normal.
973 Last restriction is valid if aggressive_if_conv is false.
975 EXIT_BB is the basic block containing the exit of the LOOP. BB is
976 inside LOOP. */
978 static bool
979 if_convertible_bb_p (struct loop *loop, basic_block bb, basic_block exit_bb)
981 edge e;
982 edge_iterator ei;
984 if (dump_file && (dump_flags & TDF_DETAILS))
985 fprintf (dump_file, "----------[%d]-------------\n", bb->index);
987 if (EDGE_COUNT (bb->succs) > 2)
988 return false;
990 if (EDGE_COUNT (bb->preds) > 2
991 && !aggressive_if_conv)
992 return false;
994 if (exit_bb)
996 if (bb != loop->latch)
998 if (dump_file && (dump_flags & TDF_DETAILS))
999 fprintf (dump_file, "basic block after exit bb but before latch\n");
1000 return false;
1002 else if (!empty_block_p (bb))
1004 if (dump_file && (dump_flags & TDF_DETAILS))
1005 fprintf (dump_file, "non empty basic block after exit bb\n");
1006 return false;
1008 else if (bb == loop->latch
1009 && bb != exit_bb
1010 && !dominated_by_p (CDI_DOMINATORS, bb, exit_bb))
1012 if (dump_file && (dump_flags & TDF_DETAILS))
1013 fprintf (dump_file, "latch is not dominated by exit_block\n");
1014 return false;
1018 /* Be less adventurous and handle only normal edges. */
1019 FOR_EACH_EDGE (e, ei, bb->succs)
1020 if (e->flags & (EDGE_EH | EDGE_ABNORMAL | EDGE_IRREDUCIBLE_LOOP))
1022 if (dump_file && (dump_flags & TDF_DETAILS))
1023 fprintf (dump_file, "Difficult to handle edges\n");
1024 return false;
1027 /* At least one incoming edge has to be non-critical as otherwise edge
1028 predicates are not equal to basic-block predicates of the edge
1029 source. This check is skipped if aggressive_if_conv is true. */
1030 if (!aggressive_if_conv
1031 && EDGE_COUNT (bb->preds) > 1
1032 && bb != loop->header
1033 && all_preds_critical_p (bb))
1035 if (dump_file && (dump_flags & TDF_DETAILS))
1036 fprintf (dump_file, "only critical predecessors\n");
1037 return false;
1040 return true;
1043 /* Return true when all predecessor blocks of BB are visited. The
1044 VISITED bitmap keeps track of the visited blocks. */
1046 static bool
1047 pred_blocks_visited_p (basic_block bb, bitmap *visited)
1049 edge e;
1050 edge_iterator ei;
1051 FOR_EACH_EDGE (e, ei, bb->preds)
1052 if (!bitmap_bit_p (*visited, e->src->index))
1053 return false;
1055 return true;
1058 /* Get body of a LOOP in suitable order for if-conversion. It is
1059 caller's responsibility to deallocate basic block list.
1060 If-conversion suitable order is, breadth first sort (BFS) order
1061 with an additional constraint: select a block only if all its
1062 predecessors are already selected. */
1064 static basic_block *
1065 get_loop_body_in_if_conv_order (const struct loop *loop)
1067 basic_block *blocks, *blocks_in_bfs_order;
1068 basic_block bb;
1069 bitmap visited;
1070 unsigned int index = 0;
1071 unsigned int visited_count = 0;
1073 gcc_assert (loop->num_nodes);
1074 gcc_assert (loop->latch != EXIT_BLOCK_PTR_FOR_FN (cfun));
1076 blocks = XCNEWVEC (basic_block, loop->num_nodes);
1077 visited = BITMAP_ALLOC (NULL);
1079 blocks_in_bfs_order = get_loop_body_in_bfs_order (loop);
1081 index = 0;
1082 while (index < loop->num_nodes)
1084 bb = blocks_in_bfs_order [index];
1086 if (bb->flags & BB_IRREDUCIBLE_LOOP)
1088 free (blocks_in_bfs_order);
1089 BITMAP_FREE (visited);
1090 free (blocks);
1091 return NULL;
1094 if (!bitmap_bit_p (visited, bb->index))
1096 if (pred_blocks_visited_p (bb, &visited)
1097 || bb == loop->header)
1099 /* This block is now visited. */
1100 bitmap_set_bit (visited, bb->index);
1101 blocks[visited_count++] = bb;
1105 index++;
1107 if (index == loop->num_nodes
1108 && visited_count != loop->num_nodes)
1109 /* Not done yet. */
1110 index = 0;
1112 free (blocks_in_bfs_order);
1113 BITMAP_FREE (visited);
1114 return blocks;
1117 /* Returns true when the analysis of the predicates for all the basic
1118 blocks in LOOP succeeded.
1120 predicate_bbs first allocates the predicates of the basic blocks.
1121 These fields are then initialized with the tree expressions
1122 representing the predicates under which a basic block is executed
1123 in the LOOP. As the loop->header is executed at each iteration, it
1124 has the "true" predicate. Other statements executed under a
1125 condition are predicated with that condition, for example
1127 | if (x)
1128 | S1;
1129 | else
1130 | S2;
1132 S1 will be predicated with "x", and
1133 S2 will be predicated with "!x". */
1135 static void
1136 predicate_bbs (loop_p loop)
1138 unsigned int i;
1140 for (i = 0; i < loop->num_nodes; i++)
1141 init_bb_predicate (ifc_bbs[i]);
1143 for (i = 0; i < loop->num_nodes; i++)
1145 basic_block bb = ifc_bbs[i];
1146 tree cond;
1147 gimple *stmt;
1149 /* The loop latch and loop exit block are always executed and
1150 have no extra conditions to be processed: skip them. */
1151 if (bb == loop->latch
1152 || bb_with_exit_edge_p (loop, bb))
1154 reset_bb_predicate (bb);
1155 continue;
1158 cond = bb_predicate (bb);
1159 stmt = last_stmt (bb);
1160 if (stmt && gimple_code (stmt) == GIMPLE_COND)
1162 tree c2;
1163 edge true_edge, false_edge;
1164 location_t loc = gimple_location (stmt);
1165 tree c = build2_loc (loc, gimple_cond_code (stmt),
1166 boolean_type_node,
1167 gimple_cond_lhs (stmt),
1168 gimple_cond_rhs (stmt));
1170 /* Add new condition into destination's predicate list. */
1171 extract_true_false_edges_from_block (gimple_bb (stmt),
1172 &true_edge, &false_edge);
1174 /* If C is true, then TRUE_EDGE is taken. */
1175 add_to_dst_predicate_list (loop, true_edge, unshare_expr (cond),
1176 unshare_expr (c));
1178 /* If C is false, then FALSE_EDGE is taken. */
1179 c2 = build1_loc (loc, TRUTH_NOT_EXPR, boolean_type_node,
1180 unshare_expr (c));
1181 add_to_dst_predicate_list (loop, false_edge,
1182 unshare_expr (cond), c2);
1184 cond = NULL_TREE;
1187 /* If current bb has only one successor, then consider it as an
1188 unconditional goto. */
1189 if (single_succ_p (bb))
1191 basic_block bb_n = single_succ (bb);
1193 /* The successor bb inherits the predicate of its
1194 predecessor. If there is no predicate in the predecessor
1195 bb, then consider the successor bb as always executed. */
1196 if (cond == NULL_TREE)
1197 cond = boolean_true_node;
1199 add_to_predicate_list (loop, bb_n, cond);
1203 /* The loop header is always executed. */
1204 reset_bb_predicate (loop->header);
1205 gcc_assert (bb_predicate_gimplified_stmts (loop->header) == NULL
1206 && bb_predicate_gimplified_stmts (loop->latch) == NULL);
1209 /* Return true when LOOP is if-convertible. This is a helper function
1210 for if_convertible_loop_p. REFS and DDRS are initialized and freed
1211 in if_convertible_loop_p. */
1213 static bool
1214 if_convertible_loop_p_1 (struct loop *loop,
1215 vec<loop_p> *loop_nest,
1216 vec<data_reference_p> *refs,
1217 vec<ddr_p> *ddrs, bool *any_mask_load_store)
1219 bool res;
1220 unsigned int i;
1221 basic_block exit_bb = NULL;
1223 /* Don't if-convert the loop when the data dependences cannot be
1224 computed: the loop won't be vectorized in that case. */
1225 res = compute_data_dependences_for_loop (loop, true, loop_nest, refs, ddrs);
1226 if (!res)
1227 return false;
1229 calculate_dominance_info (CDI_DOMINATORS);
1230 calculate_dominance_info (CDI_POST_DOMINATORS);
1232 /* Allow statements that can be handled during if-conversion. */
1233 ifc_bbs = get_loop_body_in_if_conv_order (loop);
1234 if (!ifc_bbs)
1236 if (dump_file && (dump_flags & TDF_DETAILS))
1237 fprintf (dump_file, "Irreducible loop\n");
1238 return false;
1241 for (i = 0; i < loop->num_nodes; i++)
1243 basic_block bb = ifc_bbs[i];
1245 if (!if_convertible_bb_p (loop, bb, exit_bb))
1246 return false;
1248 if (bb_with_exit_edge_p (loop, bb))
1249 exit_bb = bb;
1252 for (i = 0; i < loop->num_nodes; i++)
1254 basic_block bb = ifc_bbs[i];
1255 gimple_stmt_iterator gsi;
1257 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1258 switch (gimple_code (gsi_stmt (gsi)))
1260 case GIMPLE_LABEL:
1261 case GIMPLE_ASSIGN:
1262 case GIMPLE_CALL:
1263 case GIMPLE_DEBUG:
1264 case GIMPLE_COND:
1265 gimple_set_uid (gsi_stmt (gsi), 0);
1266 break;
1267 default:
1268 return false;
1272 data_reference_p dr;
1274 ref_DR_map = new hash_map<tree_operand_hash, data_reference_p>;
1275 baseref_DR_map = new hash_map<tree_operand_hash, data_reference_p>;
1277 predicate_bbs (loop);
1279 for (i = 0; refs->iterate (i, &dr); i++)
1281 dr->aux = XNEW (struct ifc_dr);
1282 DR_WRITTEN_AT_LEAST_ONCE (dr) = -1;
1283 DR_RW_UNCONDITIONALLY (dr) = -1;
1284 if (gimple_uid (DR_STMT (dr)) == 0)
1285 gimple_set_uid (DR_STMT (dr), i + 1);
1286 hash_memrefs_baserefs_and_store_DRs_read_written_info (dr);
1289 for (i = 0; i < loop->num_nodes; i++)
1291 basic_block bb = ifc_bbs[i];
1292 gimple_stmt_iterator itr;
1294 /* Check the if-convertibility of statements in predicated BBs. */
1295 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1296 for (itr = gsi_start_bb (bb); !gsi_end_p (itr); gsi_next (&itr))
1297 if (!if_convertible_stmt_p (gsi_stmt (itr), *refs,
1298 any_mask_load_store))
1299 return false;
1302 for (i = 0; i < loop->num_nodes; i++)
1303 free_bb_predicate (ifc_bbs[i]);
1305 /* Checking PHIs needs to be done after stmts, as the fact whether there
1306 are any masked loads or stores affects the tests. */
1307 for (i = 0; i < loop->num_nodes; i++)
1309 basic_block bb = ifc_bbs[i];
1310 gphi_iterator itr;
1312 for (itr = gsi_start_phis (bb); !gsi_end_p (itr); gsi_next (&itr))
1313 if (!if_convertible_phi_p (loop, bb, itr.phi (),
1314 *any_mask_load_store))
1315 return false;
1318 if (dump_file)
1319 fprintf (dump_file, "Applying if-conversion\n");
1321 return true;
1324 /* Return true when LOOP is if-convertible.
1325 LOOP is if-convertible if:
1326 - it is innermost,
1327 - it has two or more basic blocks,
1328 - it has only one exit,
1329 - loop header is not the exit edge,
1330 - if its basic blocks and phi nodes are if convertible. */
1332 static bool
1333 if_convertible_loop_p (struct loop *loop, bool *any_mask_load_store)
1335 edge e;
1336 edge_iterator ei;
1337 bool res = false;
1338 vec<data_reference_p> refs;
1339 vec<ddr_p> ddrs;
1341 /* Handle only innermost loop. */
1342 if (!loop || loop->inner)
1344 if (dump_file && (dump_flags & TDF_DETAILS))
1345 fprintf (dump_file, "not innermost loop\n");
1346 return false;
1349 /* If only one block, no need for if-conversion. */
1350 if (loop->num_nodes <= 2)
1352 if (dump_file && (dump_flags & TDF_DETAILS))
1353 fprintf (dump_file, "less than 2 basic blocks\n");
1354 return false;
1357 /* More than one loop exit is too much to handle. */
1358 if (!single_exit (loop))
1360 if (dump_file && (dump_flags & TDF_DETAILS))
1361 fprintf (dump_file, "multiple exits\n");
1362 return false;
1365 /* If one of the loop header's edge is an exit edge then do not
1366 apply if-conversion. */
1367 FOR_EACH_EDGE (e, ei, loop->header->succs)
1368 if (loop_exit_edge_p (loop, e))
1369 return false;
1371 refs.create (5);
1372 ddrs.create (25);
1373 auto_vec<loop_p, 3> loop_nest;
1374 res = if_convertible_loop_p_1 (loop, &loop_nest, &refs, &ddrs,
1375 any_mask_load_store);
1377 data_reference_p dr;
1378 unsigned int i;
1379 for (i = 0; refs.iterate (i, &dr); i++)
1380 free (dr->aux);
1382 free_data_refs (refs);
1383 free_dependence_relations (ddrs);
1385 delete ref_DR_map;
1386 ref_DR_map = NULL;
1388 delete baseref_DR_map;
1389 baseref_DR_map = NULL;
1391 return res;
1394 /* Returns true if def-stmt for phi argument ARG is simple increment/decrement
1395 which is in predicated basic block.
1396 In fact, the following PHI pattern is searching:
1397 loop-header:
1398 reduc_1 = PHI <..., reduc_2>
1400 if (...)
1401 reduc_3 = ...
1402 reduc_2 = PHI <reduc_1, reduc_3>
1404 ARG_0 and ARG_1 are correspondent PHI arguments.
1405 REDUC, OP0 and OP1 contain reduction stmt and its operands.
1406 EXTENDED is true if PHI has > 2 arguments. */
1408 static bool
1409 is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1,
1410 tree *op0, tree *op1, bool extended)
1412 tree lhs, r_op1, r_op2;
1413 gimple *stmt;
1414 gimple *header_phi = NULL;
1415 enum tree_code reduction_op;
1416 basic_block bb = gimple_bb (phi);
1417 struct loop *loop = bb->loop_father;
1418 edge latch_e = loop_latch_edge (loop);
1419 imm_use_iterator imm_iter;
1420 use_operand_p use_p;
1421 edge e;
1422 edge_iterator ei;
1423 bool result = false;
1424 if (TREE_CODE (arg_0) != SSA_NAME || TREE_CODE (arg_1) != SSA_NAME)
1425 return false;
1427 if (!extended && gimple_code (SSA_NAME_DEF_STMT (arg_0)) == GIMPLE_PHI)
1429 lhs = arg_1;
1430 header_phi = SSA_NAME_DEF_STMT (arg_0);
1431 stmt = SSA_NAME_DEF_STMT (arg_1);
1433 else if (gimple_code (SSA_NAME_DEF_STMT (arg_1)) == GIMPLE_PHI)
1435 lhs = arg_0;
1436 header_phi = SSA_NAME_DEF_STMT (arg_1);
1437 stmt = SSA_NAME_DEF_STMT (arg_0);
1439 else
1440 return false;
1441 if (gimple_bb (header_phi) != loop->header)
1442 return false;
1444 if (PHI_ARG_DEF_FROM_EDGE (header_phi, latch_e) != PHI_RESULT (phi))
1445 return false;
1447 if (gimple_code (stmt) != GIMPLE_ASSIGN
1448 || gimple_has_volatile_ops (stmt))
1449 return false;
1451 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
1452 return false;
1454 if (!is_predicated (gimple_bb (stmt)))
1455 return false;
1457 /* Check that stmt-block is predecessor of phi-block. */
1458 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->succs)
1459 if (e->dest == bb)
1461 result = true;
1462 break;
1464 if (!result)
1465 return false;
1467 if (!has_single_use (lhs))
1468 return false;
1470 reduction_op = gimple_assign_rhs_code (stmt);
1471 if (reduction_op != PLUS_EXPR && reduction_op != MINUS_EXPR)
1472 return false;
1473 r_op1 = gimple_assign_rhs1 (stmt);
1474 r_op2 = gimple_assign_rhs2 (stmt);
1476 /* Make R_OP1 to hold reduction variable. */
1477 if (r_op2 == PHI_RESULT (header_phi)
1478 && reduction_op == PLUS_EXPR)
1479 std::swap (r_op1, r_op2);
1480 else if (r_op1 != PHI_RESULT (header_phi))
1481 return false;
1483 /* Check that R_OP1 is used in reduction stmt or in PHI only. */
1484 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_op1)
1486 gimple *use_stmt = USE_STMT (use_p);
1487 if (is_gimple_debug (use_stmt))
1488 continue;
1489 if (use_stmt == stmt)
1490 continue;
1491 if (gimple_code (use_stmt) != GIMPLE_PHI)
1492 return false;
1495 *op0 = r_op1; *op1 = r_op2;
1496 *reduc = stmt;
1497 return true;
1500 /* Converts conditional scalar reduction into unconditional form, e.g.
1501 bb_4
1502 if (_5 != 0) goto bb_5 else goto bb_6
1503 end_bb_4
1504 bb_5
1505 res_6 = res_13 + 1;
1506 end_bb_5
1507 bb_6
1508 # res_2 = PHI <res_13(4), res_6(5)>
1509 end_bb_6
1511 will be converted into sequence
1512 _ifc__1 = _5 != 0 ? 1 : 0;
1513 res_2 = res_13 + _ifc__1;
1514 Argument SWAP tells that arguments of conditional expression should be
1515 swapped.
1516 Returns rhs of resulting PHI assignment. */
1518 static tree
1519 convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi,
1520 tree cond, tree op0, tree op1, bool swap)
1522 gimple_stmt_iterator stmt_it;
1523 gimple *new_assign;
1524 tree rhs;
1525 tree rhs1 = gimple_assign_rhs1 (reduc);
1526 tree tmp = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, "_ifc_");
1527 tree c;
1528 tree zero = build_zero_cst (TREE_TYPE (rhs1));
1530 if (dump_file && (dump_flags & TDF_DETAILS))
1532 fprintf (dump_file, "Found cond scalar reduction.\n");
1533 print_gimple_stmt (dump_file, reduc, 0, TDF_SLIM);
1536 /* Build cond expression using COND and constant operand
1537 of reduction rhs. */
1538 c = fold_build_cond_expr (TREE_TYPE (rhs1),
1539 unshare_expr (cond),
1540 swap ? zero : op1,
1541 swap ? op1 : zero);
1543 /* Create assignment stmt and insert it at GSI. */
1544 new_assign = gimple_build_assign (tmp, c);
1545 gsi_insert_before (gsi, new_assign, GSI_SAME_STMT);
1546 /* Build rhs for unconditional increment/decrement. */
1547 rhs = fold_build2 (gimple_assign_rhs_code (reduc),
1548 TREE_TYPE (rhs1), op0, tmp);
1550 /* Delete original reduction stmt. */
1551 stmt_it = gsi_for_stmt (reduc);
1552 gsi_remove (&stmt_it, true);
1553 release_defs (reduc);
1554 return rhs;
1557 /* Produce condition for all occurrences of ARG in PHI node. */
1559 static tree
1560 gen_phi_arg_condition (gphi *phi, vec<int> *occur,
1561 gimple_stmt_iterator *gsi)
1563 int len;
1564 int i;
1565 tree cond = NULL_TREE;
1566 tree c;
1567 edge e;
1569 len = occur->length ();
1570 gcc_assert (len > 0);
1571 for (i = 0; i < len; i++)
1573 e = gimple_phi_arg_edge (phi, (*occur)[i]);
1574 c = bb_predicate (e->src);
1575 if (is_true_predicate (c))
1576 continue;
1577 c = force_gimple_operand_gsi_1 (gsi, unshare_expr (c),
1578 is_gimple_condexpr, NULL_TREE,
1579 true, GSI_SAME_STMT);
1580 if (cond != NULL_TREE)
1582 /* Must build OR expression. */
1583 cond = fold_or_predicates (EXPR_LOCATION (c), c, cond);
1584 cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond),
1585 is_gimple_condexpr, NULL_TREE,
1586 true, GSI_SAME_STMT);
1588 else
1589 cond = c;
1591 gcc_assert (cond != NULL_TREE);
1592 return cond;
1595 /* Replace a scalar PHI node with a COND_EXPR using COND as condition.
1596 This routine can handle PHI nodes with more than two arguments.
1598 For example,
1599 S1: A = PHI <x1(1), x2(5)>
1600 is converted into,
1601 S2: A = cond ? x1 : x2;
1603 The generated code is inserted at GSI that points to the top of
1604 basic block's statement list.
1605 If PHI node has more than two arguments a chain of conditional
1606 expression is produced. */
1609 static void
1610 predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
1612 gimple *new_stmt = NULL, *reduc;
1613 tree rhs, res, arg0, arg1, op0, op1, scev;
1614 tree cond;
1615 unsigned int index0;
1616 unsigned int max, args_len;
1617 edge e;
1618 basic_block bb;
1619 unsigned int i;
1621 res = gimple_phi_result (phi);
1622 if (virtual_operand_p (res))
1623 return;
1625 if ((rhs = degenerate_phi_result (phi))
1626 || ((scev = analyze_scalar_evolution (gimple_bb (phi)->loop_father,
1627 res))
1628 && !chrec_contains_undetermined (scev)
1629 && scev != res
1630 && (rhs = gimple_phi_arg_def (phi, 0))))
1632 if (dump_file && (dump_flags & TDF_DETAILS))
1634 fprintf (dump_file, "Degenerate phi!\n");
1635 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
1637 new_stmt = gimple_build_assign (res, rhs);
1638 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1639 update_stmt (new_stmt);
1640 return;
1643 bb = gimple_bb (phi);
1644 if (EDGE_COUNT (bb->preds) == 2)
1646 /* Predicate ordinary PHI node with 2 arguments. */
1647 edge first_edge, second_edge;
1648 basic_block true_bb;
1649 first_edge = EDGE_PRED (bb, 0);
1650 second_edge = EDGE_PRED (bb, 1);
1651 cond = bb_predicate (first_edge->src);
1652 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1653 std::swap (first_edge, second_edge);
1654 if (EDGE_COUNT (first_edge->src->succs) > 1)
1656 cond = bb_predicate (second_edge->src);
1657 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1658 cond = TREE_OPERAND (cond, 0);
1659 else
1660 first_edge = second_edge;
1662 else
1663 cond = bb_predicate (first_edge->src);
1664 /* Gimplify the condition to a valid cond-expr conditonal operand. */
1665 cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond),
1666 is_gimple_condexpr, NULL_TREE,
1667 true, GSI_SAME_STMT);
1668 true_bb = first_edge->src;
1669 if (EDGE_PRED (bb, 1)->src == true_bb)
1671 arg0 = gimple_phi_arg_def (phi, 1);
1672 arg1 = gimple_phi_arg_def (phi, 0);
1674 else
1676 arg0 = gimple_phi_arg_def (phi, 0);
1677 arg1 = gimple_phi_arg_def (phi, 1);
1679 if (is_cond_scalar_reduction (phi, &reduc, arg0, arg1,
1680 &op0, &op1, false))
1681 /* Convert reduction stmt into vectorizable form. */
1682 rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
1683 true_bb != gimple_bb (reduc));
1684 else
1685 /* Build new RHS using selected condition and arguments. */
1686 rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
1687 arg0, arg1);
1688 new_stmt = gimple_build_assign (res, rhs);
1689 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1690 update_stmt (new_stmt);
1692 if (dump_file && (dump_flags & TDF_DETAILS))
1694 fprintf (dump_file, "new phi replacement stmt\n");
1695 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
1697 return;
1700 /* Create hashmap for PHI node which contain vector of argument indexes
1701 having the same value. */
1702 bool swap = false;
1703 hash_map<tree_operand_hash, auto_vec<int> > phi_arg_map;
1704 unsigned int num_args = gimple_phi_num_args (phi);
1705 int max_ind = -1;
1706 /* Vector of different PHI argument values. */
1707 auto_vec<tree> args (num_args);
1709 /* Compute phi_arg_map. */
1710 for (i = 0; i < num_args; i++)
1712 tree arg;
1714 arg = gimple_phi_arg_def (phi, i);
1715 if (!phi_arg_map.get (arg))
1716 args.quick_push (arg);
1717 phi_arg_map.get_or_insert (arg).safe_push (i);
1720 /* Determine element with max number of occurrences. */
1721 max_ind = -1;
1722 max = 1;
1723 args_len = args.length ();
1724 for (i = 0; i < args_len; i++)
1726 unsigned int len;
1727 if ((len = phi_arg_map.get (args[i])->length ()) > max)
1729 max_ind = (int) i;
1730 max = len;
1734 /* Put element with max number of occurences to the end of ARGS. */
1735 if (max_ind != -1 && max_ind +1 != (int) args_len)
1736 std::swap (args[args_len - 1], args[max_ind]);
1738 /* Handle one special case when number of arguments with different values
1739 is equal 2 and one argument has the only occurrence. Such PHI can be
1740 handled as if would have only 2 arguments. */
1741 if (args_len == 2 && phi_arg_map.get (args[0])->length () == 1)
1743 vec<int> *indexes;
1744 indexes = phi_arg_map.get (args[0]);
1745 index0 = (*indexes)[0];
1746 arg0 = args[0];
1747 arg1 = args[1];
1748 e = gimple_phi_arg_edge (phi, index0);
1749 cond = bb_predicate (e->src);
1750 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1752 swap = true;
1753 cond = TREE_OPERAND (cond, 0);
1755 /* Gimplify the condition to a valid cond-expr conditonal operand. */
1756 cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond),
1757 is_gimple_condexpr, NULL_TREE,
1758 true, GSI_SAME_STMT);
1759 if (!(is_cond_scalar_reduction (phi, &reduc, arg0 , arg1,
1760 &op0, &op1, true)))
1761 rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
1762 swap? arg1 : arg0,
1763 swap? arg0 : arg1);
1764 else
1765 /* Convert reduction stmt into vectorizable form. */
1766 rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
1767 swap);
1768 new_stmt = gimple_build_assign (res, rhs);
1769 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1770 update_stmt (new_stmt);
1772 else
1774 /* Common case. */
1775 vec<int> *indexes;
1776 tree type = TREE_TYPE (gimple_phi_result (phi));
1777 tree lhs;
1778 arg1 = args[1];
1779 for (i = 0; i < args_len; i++)
1781 arg0 = args[i];
1782 indexes = phi_arg_map.get (args[i]);
1783 if (i != args_len - 1)
1784 lhs = make_temp_ssa_name (type, NULL, "_ifc_");
1785 else
1786 lhs = res;
1787 cond = gen_phi_arg_condition (phi, indexes, gsi);
1788 rhs = fold_build_cond_expr (type, unshare_expr (cond),
1789 arg0, arg1);
1790 new_stmt = gimple_build_assign (lhs, rhs);
1791 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1792 update_stmt (new_stmt);
1793 arg1 = lhs;
1797 if (dump_file && (dump_flags & TDF_DETAILS))
1799 fprintf (dump_file, "new extended phi replacement stmt\n");
1800 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
1804 /* Replaces in LOOP all the scalar phi nodes other than those in the
1805 LOOP->header block with conditional modify expressions. */
1807 static void
1808 predicate_all_scalar_phis (struct loop *loop)
1810 basic_block bb;
1811 unsigned int orig_loop_num_nodes = loop->num_nodes;
1812 unsigned int i;
1814 for (i = 1; i < orig_loop_num_nodes; i++)
1816 gphi *phi;
1817 gimple_stmt_iterator gsi;
1818 gphi_iterator phi_gsi;
1819 bb = ifc_bbs[i];
1821 if (bb == loop->header)
1822 continue;
1824 if (EDGE_COUNT (bb->preds) == 1)
1825 continue;
1827 phi_gsi = gsi_start_phis (bb);
1828 if (gsi_end_p (phi_gsi))
1829 continue;
1831 gsi = gsi_after_labels (bb);
1832 while (!gsi_end_p (phi_gsi))
1834 phi = phi_gsi.phi ();
1835 predicate_scalar_phi (phi, &gsi);
1836 release_phi_node (phi);
1837 gsi_next (&phi_gsi);
1840 set_phi_nodes (bb, NULL);
1844 /* Insert in each basic block of LOOP the statements produced by the
1845 gimplification of the predicates. */
1847 static void
1848 insert_gimplified_predicates (loop_p loop, bool any_mask_load_store)
1850 unsigned int i;
1852 for (i = 0; i < loop->num_nodes; i++)
1854 basic_block bb = ifc_bbs[i];
1855 gimple_seq stmts;
1856 if (!is_predicated (bb))
1857 gcc_assert (bb_predicate_gimplified_stmts (bb) == NULL);
1858 if (!is_predicated (bb))
1860 /* Do not insert statements for a basic block that is not
1861 predicated. Also make sure that the predicate of the
1862 basic block is set to true. */
1863 reset_bb_predicate (bb);
1864 continue;
1867 stmts = bb_predicate_gimplified_stmts (bb);
1868 if (stmts)
1870 if (flag_tree_loop_if_convert_stores
1871 || any_mask_load_store)
1873 /* Insert the predicate of the BB just after the label,
1874 as the if-conversion of memory writes will use this
1875 predicate. */
1876 gimple_stmt_iterator gsi = gsi_after_labels (bb);
1877 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1879 else
1881 /* Insert the predicate of the BB at the end of the BB
1882 as this would reduce the register pressure: the only
1883 use of this predicate will be in successor BBs. */
1884 gimple_stmt_iterator gsi = gsi_last_bb (bb);
1886 if (gsi_end_p (gsi)
1887 || stmt_ends_bb_p (gsi_stmt (gsi)))
1888 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1889 else
1890 gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
1893 /* Once the sequence is code generated, set it to NULL. */
1894 set_bb_predicate_gimplified_stmts (bb, NULL);
1899 /* Helper function for predicate_mem_writes. Returns index of existent
1900 mask if it was created for given SIZE and -1 otherwise. */
1902 static int
1903 mask_exists (int size, vec<int> vec)
1905 unsigned int ix;
1906 int v;
1907 FOR_EACH_VEC_ELT (vec, ix, v)
1908 if (v == size)
1909 return (int) ix;
1910 return -1;
1913 /* Predicate each write to memory in LOOP.
1915 This function transforms control flow constructs containing memory
1916 writes of the form:
1918 | for (i = 0; i < N; i++)
1919 | if (cond)
1920 | A[i] = expr;
1922 into the following form that does not contain control flow:
1924 | for (i = 0; i < N; i++)
1925 | A[i] = cond ? expr : A[i];
1927 The original CFG looks like this:
1929 | bb_0
1930 | i = 0
1931 | end_bb_0
1933 | bb_1
1934 | if (i < N) goto bb_5 else goto bb_2
1935 | end_bb_1
1937 | bb_2
1938 | cond = some_computation;
1939 | if (cond) goto bb_3 else goto bb_4
1940 | end_bb_2
1942 | bb_3
1943 | A[i] = expr;
1944 | goto bb_4
1945 | end_bb_3
1947 | bb_4
1948 | goto bb_1
1949 | end_bb_4
1951 insert_gimplified_predicates inserts the computation of the COND
1952 expression at the beginning of the destination basic block:
1954 | bb_0
1955 | i = 0
1956 | end_bb_0
1958 | bb_1
1959 | if (i < N) goto bb_5 else goto bb_2
1960 | end_bb_1
1962 | bb_2
1963 | cond = some_computation;
1964 | if (cond) goto bb_3 else goto bb_4
1965 | end_bb_2
1967 | bb_3
1968 | cond = some_computation;
1969 | A[i] = expr;
1970 | goto bb_4
1971 | end_bb_3
1973 | bb_4
1974 | goto bb_1
1975 | end_bb_4
1977 predicate_mem_writes is then predicating the memory write as follows:
1979 | bb_0
1980 | i = 0
1981 | end_bb_0
1983 | bb_1
1984 | if (i < N) goto bb_5 else goto bb_2
1985 | end_bb_1
1987 | bb_2
1988 | if (cond) goto bb_3 else goto bb_4
1989 | end_bb_2
1991 | bb_3
1992 | cond = some_computation;
1993 | A[i] = cond ? expr : A[i];
1994 | goto bb_4
1995 | end_bb_3
1997 | bb_4
1998 | goto bb_1
1999 | end_bb_4
2001 and finally combine_blocks removes the basic block boundaries making
2002 the loop vectorizable:
2004 | bb_0
2005 | i = 0
2006 | if (i < N) goto bb_5 else goto bb_1
2007 | end_bb_0
2009 | bb_1
2010 | cond = some_computation;
2011 | A[i] = cond ? expr : A[i];
2012 | if (i < N) goto bb_5 else goto bb_4
2013 | end_bb_1
2015 | bb_4
2016 | goto bb_1
2017 | end_bb_4
2020 static void
2021 predicate_mem_writes (loop_p loop)
2023 unsigned int i, orig_loop_num_nodes = loop->num_nodes;
2024 auto_vec<int, 1> vect_sizes;
2025 auto_vec<tree, 1> vect_masks;
2027 for (i = 1; i < orig_loop_num_nodes; i++)
2029 gimple_stmt_iterator gsi;
2030 basic_block bb = ifc_bbs[i];
2031 tree cond = bb_predicate (bb);
2032 bool swap;
2033 gimple *stmt;
2034 int index;
2036 if (is_true_predicate (cond))
2037 continue;
2039 swap = false;
2040 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2042 swap = true;
2043 cond = TREE_OPERAND (cond, 0);
2046 vect_sizes.truncate (0);
2047 vect_masks.truncate (0);
2049 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2050 if (!gimple_assign_single_p (stmt = gsi_stmt (gsi)))
2051 continue;
2052 else if (gimple_plf (stmt, GF_PLF_2))
2054 tree lhs = gimple_assign_lhs (stmt);
2055 tree rhs = gimple_assign_rhs1 (stmt);
2056 tree ref, addr, ptr, mask;
2057 gimple *new_stmt;
2058 gimple_seq stmts = NULL;
2059 int bitsize = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (lhs)));
2060 ref = TREE_CODE (lhs) == SSA_NAME ? rhs : lhs;
2061 mark_addressable (ref);
2062 addr = force_gimple_operand_gsi (&gsi, build_fold_addr_expr (ref),
2063 true, NULL_TREE, true,
2064 GSI_SAME_STMT);
2065 if (!vect_sizes.is_empty ()
2066 && (index = mask_exists (bitsize, vect_sizes)) != -1)
2067 /* Use created mask. */
2068 mask = vect_masks[index];
2069 else
2071 if (COMPARISON_CLASS_P (cond))
2072 mask = gimple_build (&stmts, TREE_CODE (cond),
2073 boolean_type_node,
2074 TREE_OPERAND (cond, 0),
2075 TREE_OPERAND (cond, 1));
2076 else
2078 gcc_assert (TREE_CODE (cond) == SSA_NAME);
2079 mask = cond;
2082 if (swap)
2084 tree true_val
2085 = constant_boolean_node (true, TREE_TYPE (mask));
2086 mask = gimple_build (&stmts, BIT_XOR_EXPR,
2087 TREE_TYPE (mask), mask, true_val);
2089 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2091 mask = ifc_temp_var (TREE_TYPE (mask), mask, &gsi);
2092 /* Save mask and its size for further use. */
2093 vect_sizes.safe_push (bitsize);
2094 vect_masks.safe_push (mask);
2096 ptr = build_int_cst (reference_alias_ptr_type (ref), 0);
2097 /* Copy points-to info if possible. */
2098 if (TREE_CODE (addr) == SSA_NAME && !SSA_NAME_PTR_INFO (addr))
2099 copy_ref_info (build2 (MEM_REF, TREE_TYPE (ref), addr, ptr),
2100 ref);
2101 if (TREE_CODE (lhs) == SSA_NAME)
2103 new_stmt
2104 = gimple_build_call_internal (IFN_MASK_LOAD, 3, addr,
2105 ptr, mask);
2106 gimple_call_set_lhs (new_stmt, lhs);
2108 else
2109 new_stmt
2110 = gimple_build_call_internal (IFN_MASK_STORE, 4, addr, ptr,
2111 mask, rhs);
2112 gsi_replace (&gsi, new_stmt, true);
2114 else if (gimple_vdef (stmt))
2116 tree lhs = gimple_assign_lhs (stmt);
2117 tree rhs = gimple_assign_rhs1 (stmt);
2118 tree type = TREE_TYPE (lhs);
2120 lhs = ifc_temp_var (type, unshare_expr (lhs), &gsi);
2121 rhs = ifc_temp_var (type, unshare_expr (rhs), &gsi);
2122 if (swap)
2123 std::swap (lhs, rhs);
2124 cond = force_gimple_operand_gsi_1 (&gsi, unshare_expr (cond),
2125 is_gimple_condexpr, NULL_TREE,
2126 true, GSI_SAME_STMT);
2127 rhs = fold_build_cond_expr (type, unshare_expr (cond), rhs, lhs);
2128 gimple_assign_set_rhs1 (stmt, ifc_temp_var (type, rhs, &gsi));
2129 update_stmt (stmt);
2134 /* Remove all GIMPLE_CONDs and GIMPLE_LABELs of all the basic blocks
2135 other than the exit and latch of the LOOP. Also resets the
2136 GIMPLE_DEBUG information. */
2138 static void
2139 remove_conditions_and_labels (loop_p loop)
2141 gimple_stmt_iterator gsi;
2142 unsigned int i;
2144 for (i = 0; i < loop->num_nodes; i++)
2146 basic_block bb = ifc_bbs[i];
2148 if (bb_with_exit_edge_p (loop, bb)
2149 || bb == loop->latch)
2150 continue;
2152 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
2153 switch (gimple_code (gsi_stmt (gsi)))
2155 case GIMPLE_COND:
2156 case GIMPLE_LABEL:
2157 gsi_remove (&gsi, true);
2158 break;
2160 case GIMPLE_DEBUG:
2161 /* ??? Should there be conditional GIMPLE_DEBUG_BINDs? */
2162 if (gimple_debug_bind_p (gsi_stmt (gsi)))
2164 gimple_debug_bind_reset_value (gsi_stmt (gsi));
2165 update_stmt (gsi_stmt (gsi));
2167 gsi_next (&gsi);
2168 break;
2170 default:
2171 gsi_next (&gsi);
2176 /* Combine all the basic blocks from LOOP into one or two super basic
2177 blocks. Replace PHI nodes with conditional modify expressions. */
2179 static void
2180 combine_blocks (struct loop *loop, bool any_mask_load_store)
2182 basic_block bb, exit_bb, merge_target_bb;
2183 unsigned int orig_loop_num_nodes = loop->num_nodes;
2184 unsigned int i;
2185 edge e;
2186 edge_iterator ei;
2188 predicate_bbs (loop);
2189 remove_conditions_and_labels (loop);
2190 insert_gimplified_predicates (loop, any_mask_load_store);
2191 predicate_all_scalar_phis (loop);
2193 if (flag_tree_loop_if_convert_stores || any_mask_load_store)
2194 predicate_mem_writes (loop);
2196 /* Merge basic blocks: first remove all the edges in the loop,
2197 except for those from the exit block. */
2198 exit_bb = NULL;
2199 bool *predicated = XNEWVEC (bool, orig_loop_num_nodes);
2200 for (i = 0; i < orig_loop_num_nodes; i++)
2202 bb = ifc_bbs[i];
2203 predicated[i] = !is_true_predicate (bb_predicate (bb));
2204 free_bb_predicate (bb);
2205 if (bb_with_exit_edge_p (loop, bb))
2207 gcc_assert (exit_bb == NULL);
2208 exit_bb = bb;
2211 gcc_assert (exit_bb != loop->latch);
2213 for (i = 1; i < orig_loop_num_nodes; i++)
2215 bb = ifc_bbs[i];
2217 for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));)
2219 if (e->src == exit_bb)
2220 ei_next (&ei);
2221 else
2222 remove_edge (e);
2226 if (exit_bb != NULL)
2228 if (exit_bb != loop->header)
2230 /* Connect this node to loop header. */
2231 make_edge (loop->header, exit_bb, EDGE_FALLTHRU);
2232 set_immediate_dominator (CDI_DOMINATORS, exit_bb, loop->header);
2235 /* Redirect non-exit edges to loop->latch. */
2236 FOR_EACH_EDGE (e, ei, exit_bb->succs)
2238 if (!loop_exit_edge_p (loop, e))
2239 redirect_edge_and_branch (e, loop->latch);
2241 set_immediate_dominator (CDI_DOMINATORS, loop->latch, exit_bb);
2243 else
2245 /* If the loop does not have an exit, reconnect header and latch. */
2246 make_edge (loop->header, loop->latch, EDGE_FALLTHRU);
2247 set_immediate_dominator (CDI_DOMINATORS, loop->latch, loop->header);
2250 merge_target_bb = loop->header;
2251 for (i = 1; i < orig_loop_num_nodes; i++)
2253 gimple_stmt_iterator gsi;
2254 gimple_stmt_iterator last;
2256 bb = ifc_bbs[i];
2258 if (bb == exit_bb || bb == loop->latch)
2259 continue;
2261 /* Make stmts member of loop->header and clear range info from all stmts
2262 in BB which is now no longer executed conditional on a predicate we
2263 could have derived it from. */
2264 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2266 gimple *stmt = gsi_stmt (gsi);
2267 gimple_set_bb (stmt, merge_target_bb);
2268 if (predicated[i])
2270 ssa_op_iter i;
2271 tree op;
2272 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF)
2273 reset_flow_sensitive_info (op);
2277 /* Update stmt list. */
2278 last = gsi_last_bb (merge_target_bb);
2279 gsi_insert_seq_after (&last, bb_seq (bb), GSI_NEW_STMT);
2280 set_bb_seq (bb, NULL);
2282 delete_basic_block (bb);
2285 /* If possible, merge loop header to the block with the exit edge.
2286 This reduces the number of basic blocks to two, to please the
2287 vectorizer that handles only loops with two nodes. */
2288 if (exit_bb
2289 && exit_bb != loop->header
2290 && can_merge_blocks_p (loop->header, exit_bb))
2291 merge_blocks (loop->header, exit_bb);
2293 free (ifc_bbs);
2294 ifc_bbs = NULL;
2295 free (predicated);
2298 /* Version LOOP before if-converting it; the original loop
2299 will be if-converted, the new copy of the loop will not,
2300 and the LOOP_VECTORIZED internal call will be guarding which
2301 loop to execute. The vectorizer pass will fold this
2302 internal call into either true or false. */
2304 static bool
2305 version_loop_for_if_conversion (struct loop *loop)
2307 basic_block cond_bb;
2308 tree cond = make_ssa_name (boolean_type_node);
2309 struct loop *new_loop;
2310 gimple *g;
2311 gimple_stmt_iterator gsi;
2313 g = gimple_build_call_internal (IFN_LOOP_VECTORIZED, 2,
2314 build_int_cst (integer_type_node, loop->num),
2315 integer_zero_node);
2316 gimple_call_set_lhs (g, cond);
2318 initialize_original_copy_tables ();
2319 new_loop = loop_version (loop, cond, &cond_bb,
2320 REG_BR_PROB_BASE, REG_BR_PROB_BASE,
2321 REG_BR_PROB_BASE, true);
2322 free_original_copy_tables ();
2323 if (new_loop == NULL)
2324 return false;
2325 new_loop->dont_vectorize = true;
2326 new_loop->force_vectorize = false;
2327 gsi = gsi_last_bb (cond_bb);
2328 gimple_call_set_arg (g, 1, build_int_cst (integer_type_node, new_loop->num));
2329 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2330 update_ssa (TODO_update_ssa);
2331 return true;
2334 /* Performs splitting of critical edges if aggressive_if_conv is true.
2335 Returns false if loop won't be if converted and true otherwise. */
2337 static bool
2338 ifcvt_split_critical_edges (struct loop *loop)
2340 basic_block *body;
2341 basic_block bb;
2342 unsigned int num = loop->num_nodes;
2343 unsigned int i;
2344 gimple *stmt;
2345 edge e;
2346 edge_iterator ei;
2348 if (num <= 2)
2349 return false;
2350 if (loop->inner)
2351 return false;
2352 if (!single_exit (loop))
2353 return false;
2355 body = get_loop_body (loop);
2356 for (i = 0; i < num; i++)
2358 bb = body[i];
2359 if (bb == loop->latch
2360 || bb_with_exit_edge_p (loop, bb))
2361 continue;
2362 stmt = last_stmt (bb);
2363 /* Skip basic blocks not ending with conditional branch. */
2364 if (!(stmt && gimple_code (stmt) == GIMPLE_COND))
2365 continue;
2366 FOR_EACH_EDGE (e, ei, bb->succs)
2367 if (EDGE_CRITICAL_P (e) && e->dest->loop_father == loop)
2368 split_edge (e);
2370 free (body);
2371 return true;
2374 /* Assumes that lhs of DEF_STMT have multiple uses.
2375 Delete one use by (1) creation of copy DEF_STMT with
2376 unique lhs; (2) change original use of lhs in one
2377 use statement with newly created lhs. */
2379 static void
2380 ifcvt_split_def_stmt (gimple *def_stmt, gimple *use_stmt)
2382 tree var;
2383 tree lhs;
2384 gimple *copy_stmt;
2385 gimple_stmt_iterator gsi;
2386 use_operand_p use_p;
2387 imm_use_iterator imm_iter;
2389 var = gimple_assign_lhs (def_stmt);
2390 copy_stmt = gimple_copy (def_stmt);
2391 lhs = make_temp_ssa_name (TREE_TYPE (var), NULL, "_ifc_");
2392 gimple_assign_set_lhs (copy_stmt, lhs);
2393 SSA_NAME_DEF_STMT (lhs) = copy_stmt;
2394 /* Insert copy of DEF_STMT. */
2395 gsi = gsi_for_stmt (def_stmt);
2396 gsi_insert_after (&gsi, copy_stmt, GSI_SAME_STMT);
2397 /* Change use of var to lhs in use_stmt. */
2398 if (dump_file && (dump_flags & TDF_DETAILS))
2400 fprintf (dump_file, "Change use of var ");
2401 print_generic_expr (dump_file, var, TDF_SLIM);
2402 fprintf (dump_file, " to ");
2403 print_generic_expr (dump_file, lhs, TDF_SLIM);
2404 fprintf (dump_file, "\n");
2406 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, var)
2408 if (USE_STMT (use_p) != use_stmt)
2409 continue;
2410 SET_USE (use_p, lhs);
2411 break;
2415 /* Traverse bool pattern recursively starting from VAR.
2416 Save its def and use statements to defuse_list if VAR does
2417 not have single use. */
2419 static void
2420 ifcvt_walk_pattern_tree (tree var, vec<gimple *> *defuse_list,
2421 gimple *use_stmt)
2423 tree rhs1, rhs2;
2424 enum tree_code code;
2425 gimple *def_stmt;
2427 def_stmt = SSA_NAME_DEF_STMT (var);
2428 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
2429 return;
2430 if (!has_single_use (var))
2432 /* Put def and use stmts into defuse_list. */
2433 defuse_list->safe_push (def_stmt);
2434 defuse_list->safe_push (use_stmt);
2435 if (dump_file && (dump_flags & TDF_DETAILS))
2437 fprintf (dump_file, "Multiple lhs uses in stmt\n");
2438 print_gimple_stmt (dump_file, def_stmt, 0, TDF_SLIM);
2441 rhs1 = gimple_assign_rhs1 (def_stmt);
2442 code = gimple_assign_rhs_code (def_stmt);
2443 switch (code)
2445 case SSA_NAME:
2446 ifcvt_walk_pattern_tree (rhs1, defuse_list, def_stmt);
2447 break;
2448 CASE_CONVERT:
2449 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2450 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2451 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2452 break;
2453 ifcvt_walk_pattern_tree (rhs1, defuse_list, def_stmt);
2454 break;
2455 case BIT_NOT_EXPR:
2456 ifcvt_walk_pattern_tree (rhs1, defuse_list, def_stmt);
2457 break;
2458 case BIT_AND_EXPR:
2459 case BIT_IOR_EXPR:
2460 case BIT_XOR_EXPR:
2461 ifcvt_walk_pattern_tree (rhs1, defuse_list, def_stmt);
2462 rhs2 = gimple_assign_rhs2 (def_stmt);
2463 ifcvt_walk_pattern_tree (rhs2, defuse_list, def_stmt);
2464 break;
2465 default:
2466 break;
2468 return;
2471 /* Returns true if STMT can be a root of bool pattern applied
2472 by vectorizer. */
2474 static bool
2475 stmt_is_root_of_bool_pattern (gimple *stmt)
2477 enum tree_code code;
2478 tree lhs, rhs;
2480 code = gimple_assign_rhs_code (stmt);
2481 if (CONVERT_EXPR_CODE_P (code))
2483 lhs = gimple_assign_lhs (stmt);
2484 rhs = gimple_assign_rhs1 (stmt);
2485 if (TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2486 return false;
2487 if (TREE_CODE (TREE_TYPE (lhs)) == BOOLEAN_TYPE)
2488 return false;
2489 return true;
2491 else if (code == COND_EXPR)
2493 rhs = gimple_assign_rhs1 (stmt);
2494 if (TREE_CODE (rhs) != SSA_NAME)
2495 return false;
2496 return true;
2498 return false;
2501 /* Traverse all statements in BB which correspond to loop header to
2502 find out all statements which can start bool pattern applied by
2503 vectorizer and convert multiple uses in it to conform pattern
2504 restrictions. Such case can occur if the same predicate is used both
2505 for phi node conversion and load/store mask. */
2507 static void
2508 ifcvt_repair_bool_pattern (basic_block bb)
2510 tree rhs;
2511 gimple *stmt;
2512 gimple_stmt_iterator gsi;
2513 vec<gimple *> defuse_list = vNULL;
2514 vec<gimple *> pattern_roots = vNULL;
2515 bool repeat = true;
2516 int niter = 0;
2517 unsigned int ix;
2519 /* Collect all root pattern statements. */
2520 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2522 stmt = gsi_stmt (gsi);
2523 if (gimple_code (stmt) != GIMPLE_ASSIGN)
2524 continue;
2525 if (!stmt_is_root_of_bool_pattern (stmt))
2526 continue;
2527 pattern_roots.safe_push (stmt);
2530 if (pattern_roots.is_empty ())
2531 return;
2533 /* Split all statements with multiple uses iteratively since splitting
2534 may create new multiple uses. */
2535 while (repeat)
2537 repeat = false;
2538 niter++;
2539 FOR_EACH_VEC_ELT (pattern_roots, ix, stmt)
2541 rhs = gimple_assign_rhs1 (stmt);
2542 ifcvt_walk_pattern_tree (rhs, &defuse_list, stmt);
2543 while (defuse_list.length () > 0)
2545 repeat = true;
2546 gimple *def_stmt, *use_stmt;
2547 use_stmt = defuse_list.pop ();
2548 def_stmt = defuse_list.pop ();
2549 ifcvt_split_def_stmt (def_stmt, use_stmt);
2554 if (dump_file && (dump_flags & TDF_DETAILS))
2555 fprintf (dump_file, "Repair bool pattern takes %d iterations. \n",
2556 niter);
2559 /* Delete redundant statements produced by predication which prevents
2560 loop vectorization. */
2562 static void
2563 ifcvt_local_dce (basic_block bb)
2565 gimple *stmt;
2566 gimple *stmt1;
2567 gimple *phi;
2568 gimple_stmt_iterator gsi;
2569 vec<gimple *> worklist;
2570 enum gimple_code code;
2571 use_operand_p use_p;
2572 imm_use_iterator imm_iter;
2574 worklist.create (64);
2575 /* Consider all phi as live statements. */
2576 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2578 phi = gsi_stmt (gsi);
2579 gimple_set_plf (phi, GF_PLF_2, true);
2580 worklist.safe_push (phi);
2582 /* Consider load/store statements, CALL and COND as live. */
2583 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2585 stmt = gsi_stmt (gsi);
2586 if (gimple_store_p (stmt)
2587 || gimple_assign_load_p (stmt)
2588 || is_gimple_debug (stmt))
2590 gimple_set_plf (stmt, GF_PLF_2, true);
2591 worklist.safe_push (stmt);
2592 continue;
2594 code = gimple_code (stmt);
2595 if (code == GIMPLE_COND || code == GIMPLE_CALL)
2597 gimple_set_plf (stmt, GF_PLF_2, true);
2598 worklist.safe_push (stmt);
2599 continue;
2601 gimple_set_plf (stmt, GF_PLF_2, false);
2603 if (code == GIMPLE_ASSIGN)
2605 tree lhs = gimple_assign_lhs (stmt);
2606 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2608 stmt1 = USE_STMT (use_p);
2609 if (gimple_bb (stmt1) != bb)
2611 gimple_set_plf (stmt, GF_PLF_2, true);
2612 worklist.safe_push (stmt);
2613 break;
2618 /* Propagate liveness through arguments of live stmt. */
2619 while (worklist.length () > 0)
2621 ssa_op_iter iter;
2622 use_operand_p use_p;
2623 tree use;
2625 stmt = worklist.pop ();
2626 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2628 use = USE_FROM_PTR (use_p);
2629 if (TREE_CODE (use) != SSA_NAME)
2630 continue;
2631 stmt1 = SSA_NAME_DEF_STMT (use);
2632 if (gimple_bb (stmt1) != bb
2633 || gimple_plf (stmt1, GF_PLF_2))
2634 continue;
2635 gimple_set_plf (stmt1, GF_PLF_2, true);
2636 worklist.safe_push (stmt1);
2639 /* Delete dead statements. */
2640 gsi = gsi_start_bb (bb);
2641 while (!gsi_end_p (gsi))
2643 stmt = gsi_stmt (gsi);
2644 if (gimple_plf (stmt, GF_PLF_2))
2646 gsi_next (&gsi);
2647 continue;
2649 if (dump_file && (dump_flags & TDF_DETAILS))
2651 fprintf (dump_file, "Delete dead stmt in bb#%d\n", bb->index);
2652 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2654 gsi_remove (&gsi, true);
2655 release_defs (stmt);
2659 /* If-convert LOOP when it is legal. For the moment this pass has no
2660 profitability analysis. Returns non-zero todo flags when something
2661 changed. */
2663 static unsigned int
2664 tree_if_conversion (struct loop *loop)
2666 unsigned int todo = 0;
2667 ifc_bbs = NULL;
2668 bool any_mask_load_store = false;
2670 /* Set up aggressive if-conversion for loops marked with simd pragma. */
2671 aggressive_if_conv = loop->force_vectorize;
2672 /* Check either outer loop was marked with simd pragma. */
2673 if (!aggressive_if_conv)
2675 struct loop *outer_loop = loop_outer (loop);
2676 if (outer_loop && outer_loop->force_vectorize)
2677 aggressive_if_conv = true;
2680 if (aggressive_if_conv)
2681 if (!ifcvt_split_critical_edges (loop))
2682 goto cleanup;
2684 if (!if_convertible_loop_p (loop, &any_mask_load_store)
2685 || !dbg_cnt (if_conversion_tree))
2686 goto cleanup;
2688 if (any_mask_load_store
2689 && ((!flag_tree_loop_vectorize && !loop->force_vectorize)
2690 || loop->dont_vectorize))
2691 goto cleanup;
2693 if (any_mask_load_store && !version_loop_for_if_conversion (loop))
2694 goto cleanup;
2696 /* Now all statements are if-convertible. Combine all the basic
2697 blocks into one huge basic block doing the if-conversion
2698 on-the-fly. */
2699 combine_blocks (loop, any_mask_load_store);
2701 /* Delete dead predicate computations and repair tree correspondent
2702 to bool pattern to delete multiple uses of predicates. */
2703 if (aggressive_if_conv)
2705 ifcvt_local_dce (loop->header);
2706 ifcvt_repair_bool_pattern (loop->header);
2709 todo |= TODO_cleanup_cfg;
2710 if (flag_tree_loop_if_convert_stores || any_mask_load_store)
2712 mark_virtual_operands_for_renaming (cfun);
2713 todo |= TODO_update_ssa_only_virtuals;
2716 cleanup:
2717 if (ifc_bbs)
2719 unsigned int i;
2721 for (i = 0; i < loop->num_nodes; i++)
2722 free_bb_predicate (ifc_bbs[i]);
2724 free (ifc_bbs);
2725 ifc_bbs = NULL;
2727 free_dominance_info (CDI_POST_DOMINATORS);
2729 return todo;
2732 /* Tree if-conversion pass management. */
2734 namespace {
2736 const pass_data pass_data_if_conversion =
2738 GIMPLE_PASS, /* type */
2739 "ifcvt", /* name */
2740 OPTGROUP_NONE, /* optinfo_flags */
2741 TV_NONE, /* tv_id */
2742 ( PROP_cfg | PROP_ssa ), /* properties_required */
2743 0, /* properties_provided */
2744 0, /* properties_destroyed */
2745 0, /* todo_flags_start */
2746 0, /* todo_flags_finish */
2749 class pass_if_conversion : public gimple_opt_pass
2751 public:
2752 pass_if_conversion (gcc::context *ctxt)
2753 : gimple_opt_pass (pass_data_if_conversion, ctxt)
2756 /* opt_pass methods: */
2757 virtual bool gate (function *);
2758 virtual unsigned int execute (function *);
2760 }; // class pass_if_conversion
2762 bool
2763 pass_if_conversion::gate (function *fun)
2765 return (((flag_tree_loop_vectorize || fun->has_force_vectorize_loops)
2766 && flag_tree_loop_if_convert != 0)
2767 || flag_tree_loop_if_convert == 1
2768 || flag_tree_loop_if_convert_stores == 1);
2771 unsigned int
2772 pass_if_conversion::execute (function *fun)
2774 struct loop *loop;
2775 unsigned todo = 0;
2777 if (number_of_loops (fun) <= 1)
2778 return 0;
2780 FOR_EACH_LOOP (loop, 0)
2781 if (flag_tree_loop_if_convert == 1
2782 || flag_tree_loop_if_convert_stores == 1
2783 || ((flag_tree_loop_vectorize || loop->force_vectorize)
2784 && !loop->dont_vectorize))
2785 todo |= tree_if_conversion (loop);
2787 if (flag_checking)
2789 basic_block bb;
2790 FOR_EACH_BB_FN (bb, fun)
2791 gcc_assert (!bb->aux);
2794 return todo;
2797 } // anon namespace
2799 gimple_opt_pass *
2800 make_pass_if_conversion (gcc::context *ctxt)
2802 return new pass_if_conversion (ctxt);