2015-12-18 Ville Voutilainen <ville.voutilainen@gmail.com>
[official-gcc.git] / gcc / tree-if-conv.c
blob635a55204c39effc733f33409d35d5917128a74c
1 /* If-conversion for vectorizer.
2 Copyright (C) 2004-2015 Free Software Foundation, Inc.
3 Contributed by Devang Patel <dpatel@apple.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass implements a tree level if-conversion of loops. Its
22 initial goal is to help the vectorizer to vectorize loops with
23 conditions.
25 A short description of if-conversion:
27 o Decide if a loop is if-convertible or not.
28 o Walk all loop basic blocks in breadth first order (BFS order).
29 o Remove conditional statements (at the end of basic block)
30 and propagate condition into destination basic blocks'
31 predicate list.
32 o Replace modify expression with conditional modify expression
33 using current basic block's condition.
34 o Merge all basic blocks
35 o Replace phi nodes with conditional modify expr
36 o Merge all basic blocks into header
38 Sample transformation:
40 INPUT
41 -----
43 # i_23 = PHI <0(0), i_18(10)>;
44 <L0>:;
45 j_15 = A[i_23];
46 if (j_15 > 41) goto <L1>; else goto <L17>;
48 <L17>:;
49 goto <bb 3> (<L3>);
51 <L1>:;
53 # iftmp.2_4 = PHI <0(8), 42(2)>;
54 <L3>:;
55 A[i_23] = iftmp.2_4;
56 i_18 = i_23 + 1;
57 if (i_18 <= 15) goto <L19>; else goto <L18>;
59 <L19>:;
60 goto <bb 1> (<L0>);
62 <L18>:;
64 OUTPUT
65 ------
67 # i_23 = PHI <0(0), i_18(10)>;
68 <L0>:;
69 j_15 = A[i_23];
71 <L3>:;
72 iftmp.2_4 = j_15 > 41 ? 42 : 0;
73 A[i_23] = iftmp.2_4;
74 i_18 = i_23 + 1;
75 if (i_18 <= 15) goto <L19>; else goto <L18>;
77 <L19>:;
78 goto <bb 1> (<L0>);
80 <L18>:;
83 #include "config.h"
84 #include "system.h"
85 #include "coretypes.h"
86 #include "backend.h"
87 #include "rtl.h"
88 #include "tree.h"
89 #include "gimple.h"
90 #include "cfghooks.h"
91 #include "tree-pass.h"
92 #include "ssa.h"
93 #include "expmed.h"
94 #include "optabs-query.h"
95 #include "gimple-pretty-print.h"
96 #include "alias.h"
97 #include "fold-const.h"
98 #include "stor-layout.h"
99 #include "gimple-fold.h"
100 #include "gimplify.h"
101 #include "gimple-iterator.h"
102 #include "gimplify-me.h"
103 #include "tree-cfg.h"
104 #include "tree-into-ssa.h"
105 #include "tree-ssa.h"
106 #include "cfgloop.h"
107 #include "tree-data-ref.h"
108 #include "tree-scalar-evolution.h"
109 #include "tree-ssa-loop-ivopts.h"
110 #include "tree-ssa-address.h"
111 #include "dbgcnt.h"
112 #include "tree-hash-traits.h"
113 #include "varasm.h"
114 #include "builtins.h"
115 #include "params.h"
117 /* List of basic blocks in if-conversion-suitable order. */
118 static basic_block *ifc_bbs;
120 /* Apply more aggressive (extended) if-conversion if true. */
121 static bool aggressive_if_conv;
123 /* Hash table to store references, DR pairs. */
124 static hash_map<tree_operand_hash, data_reference_p> *ref_DR_map;
126 /* Hash table to store base reference, DR pairs. */
127 static hash_map<tree_operand_hash, data_reference_p> *baseref_DR_map;
129 /* Structure used to predicate basic blocks. This is attached to the
130 ->aux field of the BBs in the loop to be if-converted. */
131 struct bb_predicate {
133 /* The condition under which this basic block is executed. */
134 tree predicate;
136 /* PREDICATE is gimplified, and the sequence of statements is
137 recorded here, in order to avoid the duplication of computations
138 that occur in previous conditions. See PR44483. */
139 gimple_seq predicate_gimplified_stmts;
142 /* Returns true when the basic block BB has a predicate. */
144 static inline bool
145 bb_has_predicate (basic_block bb)
147 return bb->aux != NULL;
150 /* Returns the gimplified predicate for basic block BB. */
152 static inline tree
153 bb_predicate (basic_block bb)
155 return ((struct bb_predicate *) bb->aux)->predicate;
158 /* Sets the gimplified predicate COND for basic block BB. */
160 static inline void
161 set_bb_predicate (basic_block bb, tree cond)
163 gcc_assert ((TREE_CODE (cond) == TRUTH_NOT_EXPR
164 && is_gimple_condexpr (TREE_OPERAND (cond, 0)))
165 || is_gimple_condexpr (cond));
166 ((struct bb_predicate *) bb->aux)->predicate = cond;
169 /* Returns the sequence of statements of the gimplification of the
170 predicate for basic block BB. */
172 static inline gimple_seq
173 bb_predicate_gimplified_stmts (basic_block bb)
175 return ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts;
178 /* Sets the sequence of statements STMTS of the gimplification of the
179 predicate for basic block BB. */
181 static inline void
182 set_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
184 ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts = stmts;
187 /* Adds the sequence of statements STMTS to the sequence of statements
188 of the predicate for basic block BB. */
190 static inline void
191 add_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
193 gimple_seq_add_seq
194 (&(((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts), stmts);
197 /* Initializes to TRUE the predicate of basic block BB. */
199 static inline void
200 init_bb_predicate (basic_block bb)
202 bb->aux = XNEW (struct bb_predicate);
203 set_bb_predicate_gimplified_stmts (bb, NULL);
204 set_bb_predicate (bb, boolean_true_node);
207 /* Release the SSA_NAMEs associated with the predicate of basic block BB,
208 but don't actually free it. */
210 static inline void
211 release_bb_predicate (basic_block bb)
213 gimple_seq stmts = bb_predicate_gimplified_stmts (bb);
214 if (stmts)
216 gimple_stmt_iterator i;
218 for (i = gsi_start (stmts); !gsi_end_p (i); gsi_next (&i))
219 free_stmt_operands (cfun, gsi_stmt (i));
220 set_bb_predicate_gimplified_stmts (bb, NULL);
224 /* Free the predicate of basic block BB. */
226 static inline void
227 free_bb_predicate (basic_block bb)
229 if (!bb_has_predicate (bb))
230 return;
232 release_bb_predicate (bb);
233 free (bb->aux);
234 bb->aux = NULL;
237 /* Reinitialize predicate of BB with the true predicate. */
239 static inline void
240 reset_bb_predicate (basic_block bb)
242 if (!bb_has_predicate (bb))
243 init_bb_predicate (bb);
244 else
246 release_bb_predicate (bb);
247 set_bb_predicate (bb, boolean_true_node);
251 /* Returns a new SSA_NAME of type TYPE that is assigned the value of
252 the expression EXPR. Inserts the statement created for this
253 computation before GSI and leaves the iterator GSI at the same
254 statement. */
256 static tree
257 ifc_temp_var (tree type, tree expr, gimple_stmt_iterator *gsi)
259 tree new_name = make_temp_ssa_name (type, NULL, "_ifc_");
260 gimple *stmt = gimple_build_assign (new_name, expr);
261 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
262 return new_name;
265 /* Return true when COND is a true predicate. */
267 static inline bool
268 is_true_predicate (tree cond)
270 return (cond == NULL_TREE
271 || cond == boolean_true_node
272 || integer_onep (cond));
275 /* Returns true when BB has a predicate that is not trivial: true or
276 NULL_TREE. */
278 static inline bool
279 is_predicated (basic_block bb)
281 return !is_true_predicate (bb_predicate (bb));
284 /* Parses the predicate COND and returns its comparison code and
285 operands OP0 and OP1. */
287 static enum tree_code
288 parse_predicate (tree cond, tree *op0, tree *op1)
290 gimple *s;
292 if (TREE_CODE (cond) == SSA_NAME
293 && is_gimple_assign (s = SSA_NAME_DEF_STMT (cond)))
295 if (TREE_CODE_CLASS (gimple_assign_rhs_code (s)) == tcc_comparison)
297 *op0 = gimple_assign_rhs1 (s);
298 *op1 = gimple_assign_rhs2 (s);
299 return gimple_assign_rhs_code (s);
302 else if (gimple_assign_rhs_code (s) == TRUTH_NOT_EXPR)
304 tree op = gimple_assign_rhs1 (s);
305 tree type = TREE_TYPE (op);
306 enum tree_code code = parse_predicate (op, op0, op1);
308 return code == ERROR_MARK ? ERROR_MARK
309 : invert_tree_comparison (code, HONOR_NANS (type));
312 return ERROR_MARK;
315 if (COMPARISON_CLASS_P (cond))
317 *op0 = TREE_OPERAND (cond, 0);
318 *op1 = TREE_OPERAND (cond, 1);
319 return TREE_CODE (cond);
322 return ERROR_MARK;
325 /* Returns the fold of predicate C1 OR C2 at location LOC. */
327 static tree
328 fold_or_predicates (location_t loc, tree c1, tree c2)
330 tree op1a, op1b, op2a, op2b;
331 enum tree_code code1 = parse_predicate (c1, &op1a, &op1b);
332 enum tree_code code2 = parse_predicate (c2, &op2a, &op2b);
334 if (code1 != ERROR_MARK && code2 != ERROR_MARK)
336 tree t = maybe_fold_or_comparisons (code1, op1a, op1b,
337 code2, op2a, op2b);
338 if (t)
339 return t;
342 return fold_build2_loc (loc, TRUTH_OR_EXPR, boolean_type_node, c1, c2);
345 /* Returns true if N is either a constant or a SSA_NAME. */
347 static bool
348 constant_or_ssa_name (tree n)
350 switch (TREE_CODE (n))
352 case SSA_NAME:
353 case INTEGER_CST:
354 case REAL_CST:
355 case COMPLEX_CST:
356 case VECTOR_CST:
357 return true;
358 default:
359 return false;
363 /* Returns either a COND_EXPR or the folded expression if the folded
364 expression is a MIN_EXPR, a MAX_EXPR, an ABS_EXPR,
365 a constant or a SSA_NAME. */
367 static tree
368 fold_build_cond_expr (tree type, tree cond, tree rhs, tree lhs)
370 tree rhs1, lhs1, cond_expr;
372 /* If COND is comparison r != 0 and r has boolean type, convert COND
373 to SSA_NAME to accept by vect bool pattern. */
374 if (TREE_CODE (cond) == NE_EXPR)
376 tree op0 = TREE_OPERAND (cond, 0);
377 tree op1 = TREE_OPERAND (cond, 1);
378 if (TREE_CODE (op0) == SSA_NAME
379 && TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE
380 && (integer_zerop (op1)))
381 cond = op0;
383 cond_expr = fold_ternary (COND_EXPR, type, cond,
384 rhs, lhs);
386 if (cond_expr == NULL_TREE)
387 return build3 (COND_EXPR, type, cond, rhs, lhs);
389 STRIP_USELESS_TYPE_CONVERSION (cond_expr);
391 if (constant_or_ssa_name (cond_expr))
392 return cond_expr;
394 if (TREE_CODE (cond_expr) == ABS_EXPR)
396 rhs1 = TREE_OPERAND (cond_expr, 1);
397 STRIP_USELESS_TYPE_CONVERSION (rhs1);
398 if (constant_or_ssa_name (rhs1))
399 return build1 (ABS_EXPR, type, rhs1);
402 if (TREE_CODE (cond_expr) == MIN_EXPR
403 || TREE_CODE (cond_expr) == MAX_EXPR)
405 lhs1 = TREE_OPERAND (cond_expr, 0);
406 STRIP_USELESS_TYPE_CONVERSION (lhs1);
407 rhs1 = TREE_OPERAND (cond_expr, 1);
408 STRIP_USELESS_TYPE_CONVERSION (rhs1);
409 if (constant_or_ssa_name (rhs1)
410 && constant_or_ssa_name (lhs1))
411 return build2 (TREE_CODE (cond_expr), type, lhs1, rhs1);
413 return build3 (COND_EXPR, type, cond, rhs, lhs);
416 /* Add condition NC to the predicate list of basic block BB. LOOP is
417 the loop to be if-converted. Use predicate of cd-equivalent block
418 for join bb if it exists: we call basic blocks bb1 and bb2
419 cd-equivalent if they are executed under the same condition. */
421 static inline void
422 add_to_predicate_list (struct loop *loop, basic_block bb, tree nc)
424 tree bc, *tp;
425 basic_block dom_bb;
427 if (is_true_predicate (nc))
428 return;
430 /* If dominance tells us this basic block is always executed,
431 don't record any predicates for it. */
432 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
433 return;
435 dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb);
436 /* We use notion of cd equivalence to get simpler predicate for
437 join block, e.g. if join block has 2 predecessors with predicates
438 p1 & p2 and p1 & !p2, we'd like to get p1 for it instead of
439 p1 & p2 | p1 & !p2. */
440 if (dom_bb != loop->header
441 && get_immediate_dominator (CDI_POST_DOMINATORS, dom_bb) == bb)
443 gcc_assert (flow_bb_inside_loop_p (loop, dom_bb));
444 bc = bb_predicate (dom_bb);
445 if (!is_true_predicate (bc))
446 set_bb_predicate (bb, bc);
447 else
448 gcc_assert (is_true_predicate (bb_predicate (bb)));
449 if (dump_file && (dump_flags & TDF_DETAILS))
450 fprintf (dump_file, "Use predicate of bb#%d for bb#%d\n",
451 dom_bb->index, bb->index);
452 return;
455 if (!is_predicated (bb))
456 bc = nc;
457 else
459 bc = bb_predicate (bb);
460 bc = fold_or_predicates (EXPR_LOCATION (bc), nc, bc);
461 if (is_true_predicate (bc))
463 reset_bb_predicate (bb);
464 return;
468 /* Allow a TRUTH_NOT_EXPR around the main predicate. */
469 if (TREE_CODE (bc) == TRUTH_NOT_EXPR)
470 tp = &TREE_OPERAND (bc, 0);
471 else
472 tp = &bc;
473 if (!is_gimple_condexpr (*tp))
475 gimple_seq stmts;
476 *tp = force_gimple_operand_1 (*tp, &stmts, is_gimple_condexpr, NULL_TREE);
477 add_bb_predicate_gimplified_stmts (bb, stmts);
479 set_bb_predicate (bb, bc);
482 /* Add the condition COND to the previous condition PREV_COND, and add
483 this to the predicate list of the destination of edge E. LOOP is
484 the loop to be if-converted. */
486 static void
487 add_to_dst_predicate_list (struct loop *loop, edge e,
488 tree prev_cond, tree cond)
490 if (!flow_bb_inside_loop_p (loop, e->dest))
491 return;
493 if (!is_true_predicate (prev_cond))
494 cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
495 prev_cond, cond);
497 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, e->dest))
498 add_to_predicate_list (loop, e->dest, cond);
501 /* Return true if one of the successor edges of BB exits LOOP. */
503 static bool
504 bb_with_exit_edge_p (struct loop *loop, basic_block bb)
506 edge e;
507 edge_iterator ei;
509 FOR_EACH_EDGE (e, ei, bb->succs)
510 if (loop_exit_edge_p (loop, e))
511 return true;
513 return false;
516 /* Return true when PHI is if-convertible. PHI is part of loop LOOP
517 and it belongs to basic block BB.
519 PHI is not if-convertible if:
520 - it has more than 2 arguments.
522 When we didn't see if-convertible stores, PHI is not
523 if-convertible if:
524 - a virtual PHI is immediately used in another PHI node,
525 - there is a virtual PHI in a BB other than the loop->header.
526 When the aggressive_if_conv is set, PHI can have more than
527 two arguments. */
529 static bool
530 if_convertible_phi_p (struct loop *loop, basic_block bb, gphi *phi,
531 bool any_mask_load_store)
533 if (dump_file && (dump_flags & TDF_DETAILS))
535 fprintf (dump_file, "-------------------------\n");
536 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
539 if (bb != loop->header)
541 if (gimple_phi_num_args (phi) != 2
542 && !aggressive_if_conv)
544 if (dump_file && (dump_flags & TDF_DETAILS))
545 fprintf (dump_file, "More than two phi node args.\n");
546 return false;
550 if (any_mask_load_store)
551 return true;
553 /* When there were no if-convertible stores, check
554 that there are no memory writes in the branches of the loop to be
555 if-converted. */
556 if (virtual_operand_p (gimple_phi_result (phi)))
558 imm_use_iterator imm_iter;
559 use_operand_p use_p;
561 if (bb != loop->header)
563 if (dump_file && (dump_flags & TDF_DETAILS))
564 fprintf (dump_file, "Virtual phi not on loop->header.\n");
565 return false;
568 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_phi_result (phi))
570 if (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI
571 && USE_STMT (use_p) != phi)
573 if (dump_file && (dump_flags & TDF_DETAILS))
574 fprintf (dump_file, "Difficult to handle this virtual phi.\n");
575 return false;
580 return true;
583 /* Records the status of a data reference. This struct is attached to
584 each DR->aux field. */
586 struct ifc_dr {
587 bool rw_unconditionally;
588 bool w_unconditionally;
589 bool written_at_least_once;
591 tree rw_predicate;
592 tree w_predicate;
593 tree base_w_predicate;
596 #define IFC_DR(DR) ((struct ifc_dr *) (DR)->aux)
597 #define DR_BASE_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->written_at_least_once)
598 #define DR_RW_UNCONDITIONALLY(DR) (IFC_DR (DR)->rw_unconditionally)
599 #define DR_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->w_unconditionally)
601 /* Iterates over DR's and stores refs, DR and base refs, DR pairs in
602 HASH tables. While storing them in HASH table, it checks if the
603 reference is unconditionally read or written and stores that as a flag
604 information. For base reference it checks if it is written atlest once
605 unconditionally and stores it as flag information along with DR.
606 In other words for every data reference A in STMT there exist other
607 accesses to a data reference with the same base with predicates that
608 add up (OR-up) to the true predicate: this ensures that the data
609 reference A is touched (read or written) on every iteration of the
610 if-converted loop. */
611 static void
612 hash_memrefs_baserefs_and_store_DRs_read_written_info (data_reference_p a)
615 data_reference_p *master_dr, *base_master_dr;
616 tree ref = DR_REF (a);
617 tree base_ref = DR_BASE_OBJECT (a);
618 tree ca = bb_predicate (gimple_bb (DR_STMT (a)));
619 bool exist1, exist2;
621 while (TREE_CODE (ref) == COMPONENT_REF
622 || TREE_CODE (ref) == IMAGPART_EXPR
623 || TREE_CODE (ref) == REALPART_EXPR)
624 ref = TREE_OPERAND (ref, 0);
626 master_dr = &ref_DR_map->get_or_insert (ref, &exist1);
627 if (!exist1)
628 *master_dr = a;
630 if (DR_IS_WRITE (a))
632 IFC_DR (*master_dr)->w_predicate
633 = fold_or_predicates (UNKNOWN_LOCATION, ca,
634 IFC_DR (*master_dr)->w_predicate);
635 if (is_true_predicate (IFC_DR (*master_dr)->w_predicate))
636 DR_W_UNCONDITIONALLY (*master_dr) = true;
638 IFC_DR (*master_dr)->rw_predicate
639 = fold_or_predicates (UNKNOWN_LOCATION, ca,
640 IFC_DR (*master_dr)->rw_predicate);
641 if (is_true_predicate (IFC_DR (*master_dr)->rw_predicate))
642 DR_RW_UNCONDITIONALLY (*master_dr) = true;
644 if (DR_IS_WRITE (a))
646 base_master_dr = &baseref_DR_map->get_or_insert (base_ref, &exist2);
647 if (!exist2)
648 *base_master_dr = a;
649 IFC_DR (*base_master_dr)->base_w_predicate
650 = fold_or_predicates (UNKNOWN_LOCATION, ca,
651 IFC_DR (*base_master_dr)->base_w_predicate);
652 if (is_true_predicate (IFC_DR (*base_master_dr)->base_w_predicate))
653 DR_BASE_W_UNCONDITIONALLY (*base_master_dr) = true;
657 /* Return true when the memory references of STMT won't trap in the
658 if-converted code. There are two things that we have to check for:
660 - writes to memory occur to writable memory: if-conversion of
661 memory writes transforms the conditional memory writes into
662 unconditional writes, i.e. "if (cond) A[i] = foo" is transformed
663 into "A[i] = cond ? foo : A[i]", and as the write to memory may not
664 be executed at all in the original code, it may be a readonly
665 memory. To check that A is not const-qualified, we check that
666 there exists at least an unconditional write to A in the current
667 function.
669 - reads or writes to memory are valid memory accesses for every
670 iteration. To check that the memory accesses are correctly formed
671 and that we are allowed to read and write in these locations, we
672 check that the memory accesses to be if-converted occur at every
673 iteration unconditionally.
675 Returns true for the memory reference in STMT, same memory reference
676 is read or written unconditionally atleast once and the base memory
677 reference is written unconditionally once. This is to check reference
678 will not write fault. Also retuns true if the memory reference is
679 unconditionally read once then we are conditionally writing to memory
680 which is defined as read and write and is bound to the definition
681 we are seeing. */
682 static bool
683 ifcvt_memrefs_wont_trap (gimple *stmt, vec<data_reference_p> drs)
685 data_reference_p *master_dr, *base_master_dr;
686 data_reference_p a = drs[gimple_uid (stmt) - 1];
688 tree ref_base_a = DR_REF (a);
689 tree base = DR_BASE_OBJECT (a);
691 gcc_assert (DR_STMT (a) == stmt);
693 while (TREE_CODE (ref_base_a) == COMPONENT_REF
694 || TREE_CODE (ref_base_a) == IMAGPART_EXPR
695 || TREE_CODE (ref_base_a) == REALPART_EXPR)
696 ref_base_a = TREE_OPERAND (ref_base_a, 0);
698 master_dr = ref_DR_map->get (ref_base_a);
699 base_master_dr = baseref_DR_map->get (base);
701 gcc_assert (master_dr != NULL);
703 /* If a is unconditionally written to it doesn't trap. */
704 if (DR_W_UNCONDITIONALLY (*master_dr))
705 return true;
707 /* If a is unconditionally accessed then ... */
708 if (DR_RW_UNCONDITIONALLY (*master_dr))
710 /* an unconditional read won't trap. */
711 if (DR_IS_READ (a))
712 return true;
714 /* an unconditionaly write won't trap if the base is written
715 to unconditionally. */
716 if (base_master_dr
717 && DR_BASE_W_UNCONDITIONALLY (*base_master_dr))
718 return PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES);
719 else
721 /* or the base is know to be not readonly. */
722 tree base_tree = get_base_address (DR_REF (a));
723 if (DECL_P (base_tree)
724 && decl_binds_to_current_def_p (base_tree)
725 && ! TREE_READONLY (base_tree))
726 return PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES);
729 return false;
732 /* Return true if STMT could be converted into a masked load or store
733 (conditional load or store based on a mask computed from bb predicate). */
735 static bool
736 ifcvt_can_use_mask_load_store (gimple *stmt)
738 tree lhs, ref;
739 machine_mode mode;
740 basic_block bb = gimple_bb (stmt);
741 bool is_load;
743 if (!(flag_tree_loop_vectorize || bb->loop_father->force_vectorize)
744 || bb->loop_father->dont_vectorize
745 || !gimple_assign_single_p (stmt)
746 || gimple_has_volatile_ops (stmt))
747 return false;
749 /* Check whether this is a load or store. */
750 lhs = gimple_assign_lhs (stmt);
751 if (gimple_store_p (stmt))
753 if (!is_gimple_val (gimple_assign_rhs1 (stmt)))
754 return false;
755 is_load = false;
756 ref = lhs;
758 else if (gimple_assign_load_p (stmt))
760 is_load = true;
761 ref = gimple_assign_rhs1 (stmt);
763 else
764 return false;
766 if (may_be_nonaddressable_p (ref))
767 return false;
769 /* Mask should be integer mode of the same size as the load/store
770 mode. */
771 mode = TYPE_MODE (TREE_TYPE (lhs));
772 if (int_mode_for_mode (mode) == BLKmode
773 || VECTOR_MODE_P (mode))
774 return false;
776 if (can_vec_mask_load_store_p (mode, VOIDmode, is_load))
777 return true;
779 return false;
782 /* Return true when STMT is if-convertible.
784 GIMPLE_ASSIGN statement is not if-convertible if,
785 - it is not movable,
786 - it could trap,
787 - LHS is not var decl. */
789 static bool
790 if_convertible_gimple_assign_stmt_p (gimple *stmt,
791 vec<data_reference_p> refs,
792 bool *any_mask_load_store)
794 tree lhs = gimple_assign_lhs (stmt);
796 if (dump_file && (dump_flags & TDF_DETAILS))
798 fprintf (dump_file, "-------------------------\n");
799 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
802 if (!is_gimple_reg_type (TREE_TYPE (lhs)))
803 return false;
805 /* Some of these constrains might be too conservative. */
806 if (stmt_ends_bb_p (stmt)
807 || gimple_has_volatile_ops (stmt)
808 || (TREE_CODE (lhs) == SSA_NAME
809 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
810 || gimple_has_side_effects (stmt))
812 if (dump_file && (dump_flags & TDF_DETAILS))
813 fprintf (dump_file, "stmt not suitable for ifcvt\n");
814 return false;
817 /* tree-into-ssa.c uses GF_PLF_1, so avoid it, because
818 in between if_convertible_loop_p and combine_blocks
819 we can perform loop versioning. */
820 gimple_set_plf (stmt, GF_PLF_2, false);
822 if ((! gimple_vuse (stmt)
823 || gimple_could_trap_p_1 (stmt, false, false)
824 || ! ifcvt_memrefs_wont_trap (stmt, refs))
825 && gimple_could_trap_p (stmt))
827 if (ifcvt_can_use_mask_load_store (stmt))
829 gimple_set_plf (stmt, GF_PLF_2, true);
830 *any_mask_load_store = true;
831 return true;
833 if (dump_file && (dump_flags & TDF_DETAILS))
834 fprintf (dump_file, "tree could trap...\n");
835 return false;
838 /* When if-converting stores force versioning, likewise if we
839 ended up generating store data races. */
840 if (gimple_vdef (stmt))
841 *any_mask_load_store = true;
843 return true;
846 /* Return true when STMT is if-convertible.
848 A statement is if-convertible if:
849 - it is an if-convertible GIMPLE_ASSIGN,
850 - it is a GIMPLE_LABEL or a GIMPLE_COND,
851 - it is builtins call. */
853 static bool
854 if_convertible_stmt_p (gimple *stmt, vec<data_reference_p> refs,
855 bool *any_mask_load_store)
857 switch (gimple_code (stmt))
859 case GIMPLE_LABEL:
860 case GIMPLE_DEBUG:
861 case GIMPLE_COND:
862 return true;
864 case GIMPLE_ASSIGN:
865 return if_convertible_gimple_assign_stmt_p (stmt, refs,
866 any_mask_load_store);
868 case GIMPLE_CALL:
870 tree fndecl = gimple_call_fndecl (stmt);
871 if (fndecl)
873 int flags = gimple_call_flags (stmt);
874 if ((flags & ECF_CONST)
875 && !(flags & ECF_LOOPING_CONST_OR_PURE)
876 /* We can only vectorize some builtins at the moment,
877 so restrict if-conversion to those. */
878 && DECL_BUILT_IN (fndecl))
879 return true;
881 return false;
884 default:
885 /* Don't know what to do with 'em so don't do anything. */
886 if (dump_file && (dump_flags & TDF_DETAILS))
888 fprintf (dump_file, "don't know what to do\n");
889 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
891 return false;
892 break;
895 return true;
898 /* Assumes that BB has more than 1 predecessors.
899 Returns false if at least one successor is not on critical edge
900 and true otherwise. */
902 static inline bool
903 all_preds_critical_p (basic_block bb)
905 edge e;
906 edge_iterator ei;
908 FOR_EACH_EDGE (e, ei, bb->preds)
909 if (EDGE_COUNT (e->src->succs) == 1)
910 return false;
911 return true;
914 /* Returns true if at least one successor in on critical edge. */
915 static inline bool
916 has_pred_critical_p (basic_block bb)
918 edge e;
919 edge_iterator ei;
921 FOR_EACH_EDGE (e, ei, bb->preds)
922 if (EDGE_COUNT (e->src->succs) > 1)
923 return true;
924 return false;
927 /* Return true when BB is if-convertible. This routine does not check
928 basic block's statements and phis.
930 A basic block is not if-convertible if:
931 - it is non-empty and it is after the exit block (in BFS order),
932 - it is after the exit block but before the latch,
933 - its edges are not normal.
935 Last restriction is valid if aggressive_if_conv is false.
937 EXIT_BB is the basic block containing the exit of the LOOP. BB is
938 inside LOOP. */
940 static bool
941 if_convertible_bb_p (struct loop *loop, basic_block bb, basic_block exit_bb)
943 edge e;
944 edge_iterator ei;
946 if (dump_file && (dump_flags & TDF_DETAILS))
947 fprintf (dump_file, "----------[%d]-------------\n", bb->index);
949 if (EDGE_COUNT (bb->succs) > 2)
950 return false;
952 if (EDGE_COUNT (bb->preds) > 2
953 && !aggressive_if_conv)
954 return false;
956 if (exit_bb)
958 if (bb != loop->latch)
960 if (dump_file && (dump_flags & TDF_DETAILS))
961 fprintf (dump_file, "basic block after exit bb but before latch\n");
962 return false;
964 else if (!empty_block_p (bb))
966 if (dump_file && (dump_flags & TDF_DETAILS))
967 fprintf (dump_file, "non empty basic block after exit bb\n");
968 return false;
970 else if (bb == loop->latch
971 && bb != exit_bb
972 && !dominated_by_p (CDI_DOMINATORS, bb, exit_bb))
974 if (dump_file && (dump_flags & TDF_DETAILS))
975 fprintf (dump_file, "latch is not dominated by exit_block\n");
976 return false;
980 /* Be less adventurous and handle only normal edges. */
981 FOR_EACH_EDGE (e, ei, bb->succs)
982 if (e->flags & (EDGE_EH | EDGE_ABNORMAL | EDGE_IRREDUCIBLE_LOOP))
984 if (dump_file && (dump_flags & TDF_DETAILS))
985 fprintf (dump_file, "Difficult to handle edges\n");
986 return false;
989 /* At least one incoming edge has to be non-critical as otherwise edge
990 predicates are not equal to basic-block predicates of the edge
991 source. This check is skipped if aggressive_if_conv is true. */
992 if (!aggressive_if_conv
993 && EDGE_COUNT (bb->preds) > 1
994 && bb != loop->header
995 && all_preds_critical_p (bb))
997 if (dump_file && (dump_flags & TDF_DETAILS))
998 fprintf (dump_file, "only critical predecessors\n");
999 return false;
1002 return true;
1005 /* Return true when all predecessor blocks of BB are visited. The
1006 VISITED bitmap keeps track of the visited blocks. */
1008 static bool
1009 pred_blocks_visited_p (basic_block bb, bitmap *visited)
1011 edge e;
1012 edge_iterator ei;
1013 FOR_EACH_EDGE (e, ei, bb->preds)
1014 if (!bitmap_bit_p (*visited, e->src->index))
1015 return false;
1017 return true;
1020 /* Get body of a LOOP in suitable order for if-conversion. It is
1021 caller's responsibility to deallocate basic block list.
1022 If-conversion suitable order is, breadth first sort (BFS) order
1023 with an additional constraint: select a block only if all its
1024 predecessors are already selected. */
1026 static basic_block *
1027 get_loop_body_in_if_conv_order (const struct loop *loop)
1029 basic_block *blocks, *blocks_in_bfs_order;
1030 basic_block bb;
1031 bitmap visited;
1032 unsigned int index = 0;
1033 unsigned int visited_count = 0;
1035 gcc_assert (loop->num_nodes);
1036 gcc_assert (loop->latch != EXIT_BLOCK_PTR_FOR_FN (cfun));
1038 blocks = XCNEWVEC (basic_block, loop->num_nodes);
1039 visited = BITMAP_ALLOC (NULL);
1041 blocks_in_bfs_order = get_loop_body_in_bfs_order (loop);
1043 index = 0;
1044 while (index < loop->num_nodes)
1046 bb = blocks_in_bfs_order [index];
1048 if (bb->flags & BB_IRREDUCIBLE_LOOP)
1050 free (blocks_in_bfs_order);
1051 BITMAP_FREE (visited);
1052 free (blocks);
1053 return NULL;
1056 if (!bitmap_bit_p (visited, bb->index))
1058 if (pred_blocks_visited_p (bb, &visited)
1059 || bb == loop->header)
1061 /* This block is now visited. */
1062 bitmap_set_bit (visited, bb->index);
1063 blocks[visited_count++] = bb;
1067 index++;
1069 if (index == loop->num_nodes
1070 && visited_count != loop->num_nodes)
1071 /* Not done yet. */
1072 index = 0;
1074 free (blocks_in_bfs_order);
1075 BITMAP_FREE (visited);
1076 return blocks;
1079 /* Returns true when the analysis of the predicates for all the basic
1080 blocks in LOOP succeeded.
1082 predicate_bbs first allocates the predicates of the basic blocks.
1083 These fields are then initialized with the tree expressions
1084 representing the predicates under which a basic block is executed
1085 in the LOOP. As the loop->header is executed at each iteration, it
1086 has the "true" predicate. Other statements executed under a
1087 condition are predicated with that condition, for example
1089 | if (x)
1090 | S1;
1091 | else
1092 | S2;
1094 S1 will be predicated with "x", and
1095 S2 will be predicated with "!x". */
1097 static void
1098 predicate_bbs (loop_p loop)
1100 unsigned int i;
1102 for (i = 0; i < loop->num_nodes; i++)
1103 init_bb_predicate (ifc_bbs[i]);
1105 for (i = 0; i < loop->num_nodes; i++)
1107 basic_block bb = ifc_bbs[i];
1108 tree cond;
1109 gimple *stmt;
1111 /* The loop latch and loop exit block are always executed and
1112 have no extra conditions to be processed: skip them. */
1113 if (bb == loop->latch
1114 || bb_with_exit_edge_p (loop, bb))
1116 reset_bb_predicate (bb);
1117 continue;
1120 cond = bb_predicate (bb);
1121 stmt = last_stmt (bb);
1122 if (stmt && gimple_code (stmt) == GIMPLE_COND)
1124 tree c2;
1125 edge true_edge, false_edge;
1126 location_t loc = gimple_location (stmt);
1127 tree c = build2_loc (loc, gimple_cond_code (stmt),
1128 boolean_type_node,
1129 gimple_cond_lhs (stmt),
1130 gimple_cond_rhs (stmt));
1132 /* Add new condition into destination's predicate list. */
1133 extract_true_false_edges_from_block (gimple_bb (stmt),
1134 &true_edge, &false_edge);
1136 /* If C is true, then TRUE_EDGE is taken. */
1137 add_to_dst_predicate_list (loop, true_edge, unshare_expr (cond),
1138 unshare_expr (c));
1140 /* If C is false, then FALSE_EDGE is taken. */
1141 c2 = build1_loc (loc, TRUTH_NOT_EXPR, boolean_type_node,
1142 unshare_expr (c));
1143 add_to_dst_predicate_list (loop, false_edge,
1144 unshare_expr (cond), c2);
1146 cond = NULL_TREE;
1149 /* If current bb has only one successor, then consider it as an
1150 unconditional goto. */
1151 if (single_succ_p (bb))
1153 basic_block bb_n = single_succ (bb);
1155 /* The successor bb inherits the predicate of its
1156 predecessor. If there is no predicate in the predecessor
1157 bb, then consider the successor bb as always executed. */
1158 if (cond == NULL_TREE)
1159 cond = boolean_true_node;
1161 add_to_predicate_list (loop, bb_n, cond);
1165 /* The loop header is always executed. */
1166 reset_bb_predicate (loop->header);
1167 gcc_assert (bb_predicate_gimplified_stmts (loop->header) == NULL
1168 && bb_predicate_gimplified_stmts (loop->latch) == NULL);
1171 /* Return true when LOOP is if-convertible. This is a helper function
1172 for if_convertible_loop_p. REFS and DDRS are initialized and freed
1173 in if_convertible_loop_p. */
1175 static bool
1176 if_convertible_loop_p_1 (struct loop *loop,
1177 vec<data_reference_p> *refs,
1178 bool *any_mask_load_store)
1180 unsigned int i;
1181 basic_block exit_bb = NULL;
1183 if (find_data_references_in_loop (loop, refs) == chrec_dont_know)
1184 return false;
1186 calculate_dominance_info (CDI_DOMINATORS);
1187 calculate_dominance_info (CDI_POST_DOMINATORS);
1189 /* Allow statements that can be handled during if-conversion. */
1190 ifc_bbs = get_loop_body_in_if_conv_order (loop);
1191 if (!ifc_bbs)
1193 if (dump_file && (dump_flags & TDF_DETAILS))
1194 fprintf (dump_file, "Irreducible loop\n");
1195 return false;
1198 for (i = 0; i < loop->num_nodes; i++)
1200 basic_block bb = ifc_bbs[i];
1202 if (!if_convertible_bb_p (loop, bb, exit_bb))
1203 return false;
1205 if (bb_with_exit_edge_p (loop, bb))
1206 exit_bb = bb;
1209 for (i = 0; i < loop->num_nodes; i++)
1211 basic_block bb = ifc_bbs[i];
1212 gimple_stmt_iterator gsi;
1214 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1215 switch (gimple_code (gsi_stmt (gsi)))
1217 case GIMPLE_LABEL:
1218 case GIMPLE_ASSIGN:
1219 case GIMPLE_CALL:
1220 case GIMPLE_DEBUG:
1221 case GIMPLE_COND:
1222 gimple_set_uid (gsi_stmt (gsi), 0);
1223 break;
1224 default:
1225 return false;
1229 data_reference_p dr;
1231 ref_DR_map = new hash_map<tree_operand_hash, data_reference_p>;
1232 baseref_DR_map = new hash_map<tree_operand_hash, data_reference_p>;
1234 predicate_bbs (loop);
1236 for (i = 0; refs->iterate (i, &dr); i++)
1238 dr->aux = XNEW (struct ifc_dr);
1239 DR_BASE_W_UNCONDITIONALLY (dr) = false;
1240 DR_RW_UNCONDITIONALLY (dr) = false;
1241 DR_W_UNCONDITIONALLY (dr) = false;
1242 IFC_DR (dr)->rw_predicate = boolean_false_node;
1243 IFC_DR (dr)->w_predicate = boolean_false_node;
1244 IFC_DR (dr)->base_w_predicate = boolean_false_node;
1245 if (gimple_uid (DR_STMT (dr)) == 0)
1246 gimple_set_uid (DR_STMT (dr), i + 1);
1247 hash_memrefs_baserefs_and_store_DRs_read_written_info (dr);
1250 for (i = 0; i < loop->num_nodes; i++)
1252 basic_block bb = ifc_bbs[i];
1253 gimple_stmt_iterator itr;
1255 /* Check the if-convertibility of statements in predicated BBs. */
1256 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1257 for (itr = gsi_start_bb (bb); !gsi_end_p (itr); gsi_next (&itr))
1258 if (!if_convertible_stmt_p (gsi_stmt (itr), *refs,
1259 any_mask_load_store))
1260 return false;
1263 for (i = 0; i < loop->num_nodes; i++)
1264 free_bb_predicate (ifc_bbs[i]);
1266 /* Checking PHIs needs to be done after stmts, as the fact whether there
1267 are any masked loads or stores affects the tests. */
1268 for (i = 0; i < loop->num_nodes; i++)
1270 basic_block bb = ifc_bbs[i];
1271 gphi_iterator itr;
1273 for (itr = gsi_start_phis (bb); !gsi_end_p (itr); gsi_next (&itr))
1274 if (!if_convertible_phi_p (loop, bb, itr.phi (),
1275 *any_mask_load_store))
1276 return false;
1279 if (dump_file)
1280 fprintf (dump_file, "Applying if-conversion\n");
1282 return true;
1285 /* Return true when LOOP is if-convertible.
1286 LOOP is if-convertible if:
1287 - it is innermost,
1288 - it has two or more basic blocks,
1289 - it has only one exit,
1290 - loop header is not the exit edge,
1291 - if its basic blocks and phi nodes are if convertible. */
1293 static bool
1294 if_convertible_loop_p (struct loop *loop, bool *any_mask_load_store)
1296 edge e;
1297 edge_iterator ei;
1298 bool res = false;
1299 vec<data_reference_p> refs;
1301 /* Handle only innermost loop. */
1302 if (!loop || loop->inner)
1304 if (dump_file && (dump_flags & TDF_DETAILS))
1305 fprintf (dump_file, "not innermost loop\n");
1306 return false;
1309 /* If only one block, no need for if-conversion. */
1310 if (loop->num_nodes <= 2)
1312 if (dump_file && (dump_flags & TDF_DETAILS))
1313 fprintf (dump_file, "less than 2 basic blocks\n");
1314 return false;
1317 /* More than one loop exit is too much to handle. */
1318 if (!single_exit (loop))
1320 if (dump_file && (dump_flags & TDF_DETAILS))
1321 fprintf (dump_file, "multiple exits\n");
1322 return false;
1325 /* If one of the loop header's edge is an exit edge then do not
1326 apply if-conversion. */
1327 FOR_EACH_EDGE (e, ei, loop->header->succs)
1328 if (loop_exit_edge_p (loop, e))
1329 return false;
1331 refs.create (5);
1332 res = if_convertible_loop_p_1 (loop, &refs, any_mask_load_store);
1334 data_reference_p dr;
1335 unsigned int i;
1336 for (i = 0; refs.iterate (i, &dr); i++)
1337 free (dr->aux);
1339 free_data_refs (refs);
1341 delete ref_DR_map;
1342 ref_DR_map = NULL;
1344 delete baseref_DR_map;
1345 baseref_DR_map = NULL;
1347 return res;
1350 /* Returns true if def-stmt for phi argument ARG is simple increment/decrement
1351 which is in predicated basic block.
1352 In fact, the following PHI pattern is searching:
1353 loop-header:
1354 reduc_1 = PHI <..., reduc_2>
1356 if (...)
1357 reduc_3 = ...
1358 reduc_2 = PHI <reduc_1, reduc_3>
1360 ARG_0 and ARG_1 are correspondent PHI arguments.
1361 REDUC, OP0 and OP1 contain reduction stmt and its operands.
1362 EXTENDED is true if PHI has > 2 arguments. */
1364 static bool
1365 is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1,
1366 tree *op0, tree *op1, bool extended)
1368 tree lhs, r_op1, r_op2;
1369 gimple *stmt;
1370 gimple *header_phi = NULL;
1371 enum tree_code reduction_op;
1372 basic_block bb = gimple_bb (phi);
1373 struct loop *loop = bb->loop_father;
1374 edge latch_e = loop_latch_edge (loop);
1375 imm_use_iterator imm_iter;
1376 use_operand_p use_p;
1377 edge e;
1378 edge_iterator ei;
1379 bool result = false;
1380 if (TREE_CODE (arg_0) != SSA_NAME || TREE_CODE (arg_1) != SSA_NAME)
1381 return false;
1383 if (!extended && gimple_code (SSA_NAME_DEF_STMT (arg_0)) == GIMPLE_PHI)
1385 lhs = arg_1;
1386 header_phi = SSA_NAME_DEF_STMT (arg_0);
1387 stmt = SSA_NAME_DEF_STMT (arg_1);
1389 else if (gimple_code (SSA_NAME_DEF_STMT (arg_1)) == GIMPLE_PHI)
1391 lhs = arg_0;
1392 header_phi = SSA_NAME_DEF_STMT (arg_1);
1393 stmt = SSA_NAME_DEF_STMT (arg_0);
1395 else
1396 return false;
1397 if (gimple_bb (header_phi) != loop->header)
1398 return false;
1400 if (PHI_ARG_DEF_FROM_EDGE (header_phi, latch_e) != PHI_RESULT (phi))
1401 return false;
1403 if (gimple_code (stmt) != GIMPLE_ASSIGN
1404 || gimple_has_volatile_ops (stmt))
1405 return false;
1407 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
1408 return false;
1410 if (!is_predicated (gimple_bb (stmt)))
1411 return false;
1413 /* Check that stmt-block is predecessor of phi-block. */
1414 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->succs)
1415 if (e->dest == bb)
1417 result = true;
1418 break;
1420 if (!result)
1421 return false;
1423 if (!has_single_use (lhs))
1424 return false;
1426 reduction_op = gimple_assign_rhs_code (stmt);
1427 if (reduction_op != PLUS_EXPR && reduction_op != MINUS_EXPR)
1428 return false;
1429 r_op1 = gimple_assign_rhs1 (stmt);
1430 r_op2 = gimple_assign_rhs2 (stmt);
1432 /* Make R_OP1 to hold reduction variable. */
1433 if (r_op2 == PHI_RESULT (header_phi)
1434 && reduction_op == PLUS_EXPR)
1435 std::swap (r_op1, r_op2);
1436 else if (r_op1 != PHI_RESULT (header_phi))
1437 return false;
1439 /* Check that R_OP1 is used in reduction stmt or in PHI only. */
1440 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_op1)
1442 gimple *use_stmt = USE_STMT (use_p);
1443 if (is_gimple_debug (use_stmt))
1444 continue;
1445 if (use_stmt == stmt)
1446 continue;
1447 if (gimple_code (use_stmt) != GIMPLE_PHI)
1448 return false;
1451 *op0 = r_op1; *op1 = r_op2;
1452 *reduc = stmt;
1453 return true;
1456 /* Converts conditional scalar reduction into unconditional form, e.g.
1457 bb_4
1458 if (_5 != 0) goto bb_5 else goto bb_6
1459 end_bb_4
1460 bb_5
1461 res_6 = res_13 + 1;
1462 end_bb_5
1463 bb_6
1464 # res_2 = PHI <res_13(4), res_6(5)>
1465 end_bb_6
1467 will be converted into sequence
1468 _ifc__1 = _5 != 0 ? 1 : 0;
1469 res_2 = res_13 + _ifc__1;
1470 Argument SWAP tells that arguments of conditional expression should be
1471 swapped.
1472 Returns rhs of resulting PHI assignment. */
1474 static tree
1475 convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi,
1476 tree cond, tree op0, tree op1, bool swap)
1478 gimple_stmt_iterator stmt_it;
1479 gimple *new_assign;
1480 tree rhs;
1481 tree rhs1 = gimple_assign_rhs1 (reduc);
1482 tree tmp = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, "_ifc_");
1483 tree c;
1484 tree zero = build_zero_cst (TREE_TYPE (rhs1));
1486 if (dump_file && (dump_flags & TDF_DETAILS))
1488 fprintf (dump_file, "Found cond scalar reduction.\n");
1489 print_gimple_stmt (dump_file, reduc, 0, TDF_SLIM);
1492 /* Build cond expression using COND and constant operand
1493 of reduction rhs. */
1494 c = fold_build_cond_expr (TREE_TYPE (rhs1),
1495 unshare_expr (cond),
1496 swap ? zero : op1,
1497 swap ? op1 : zero);
1499 /* Create assignment stmt and insert it at GSI. */
1500 new_assign = gimple_build_assign (tmp, c);
1501 gsi_insert_before (gsi, new_assign, GSI_SAME_STMT);
1502 /* Build rhs for unconditional increment/decrement. */
1503 rhs = fold_build2 (gimple_assign_rhs_code (reduc),
1504 TREE_TYPE (rhs1), op0, tmp);
1506 /* Delete original reduction stmt. */
1507 stmt_it = gsi_for_stmt (reduc);
1508 gsi_remove (&stmt_it, true);
1509 release_defs (reduc);
1510 return rhs;
1513 /* Produce condition for all occurrences of ARG in PHI node. */
1515 static tree
1516 gen_phi_arg_condition (gphi *phi, vec<int> *occur,
1517 gimple_stmt_iterator *gsi)
1519 int len;
1520 int i;
1521 tree cond = NULL_TREE;
1522 tree c;
1523 edge e;
1525 len = occur->length ();
1526 gcc_assert (len > 0);
1527 for (i = 0; i < len; i++)
1529 e = gimple_phi_arg_edge (phi, (*occur)[i]);
1530 c = bb_predicate (e->src);
1531 if (is_true_predicate (c))
1532 continue;
1533 c = force_gimple_operand_gsi_1 (gsi, unshare_expr (c),
1534 is_gimple_condexpr, NULL_TREE,
1535 true, GSI_SAME_STMT);
1536 if (cond != NULL_TREE)
1538 /* Must build OR expression. */
1539 cond = fold_or_predicates (EXPR_LOCATION (c), c, cond);
1540 cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond),
1541 is_gimple_condexpr, NULL_TREE,
1542 true, GSI_SAME_STMT);
1544 else
1545 cond = c;
1547 gcc_assert (cond != NULL_TREE);
1548 return cond;
1551 /* Replace a scalar PHI node with a COND_EXPR using COND as condition.
1552 This routine can handle PHI nodes with more than two arguments.
1554 For example,
1555 S1: A = PHI <x1(1), x2(5)>
1556 is converted into,
1557 S2: A = cond ? x1 : x2;
1559 The generated code is inserted at GSI that points to the top of
1560 basic block's statement list.
1561 If PHI node has more than two arguments a chain of conditional
1562 expression is produced. */
1565 static void
1566 predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
1568 gimple *new_stmt = NULL, *reduc;
1569 tree rhs, res, arg0, arg1, op0, op1, scev;
1570 tree cond;
1571 unsigned int index0;
1572 unsigned int max, args_len;
1573 edge e;
1574 basic_block bb;
1575 unsigned int i;
1577 res = gimple_phi_result (phi);
1578 if (virtual_operand_p (res))
1579 return;
1581 if ((rhs = degenerate_phi_result (phi))
1582 || ((scev = analyze_scalar_evolution (gimple_bb (phi)->loop_father,
1583 res))
1584 && !chrec_contains_undetermined (scev)
1585 && scev != res
1586 && (rhs = gimple_phi_arg_def (phi, 0))))
1588 if (dump_file && (dump_flags & TDF_DETAILS))
1590 fprintf (dump_file, "Degenerate phi!\n");
1591 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
1593 new_stmt = gimple_build_assign (res, rhs);
1594 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1595 update_stmt (new_stmt);
1596 return;
1599 bb = gimple_bb (phi);
1600 if (EDGE_COUNT (bb->preds) == 2)
1602 /* Predicate ordinary PHI node with 2 arguments. */
1603 edge first_edge, second_edge;
1604 basic_block true_bb;
1605 first_edge = EDGE_PRED (bb, 0);
1606 second_edge = EDGE_PRED (bb, 1);
1607 cond = bb_predicate (first_edge->src);
1608 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1609 std::swap (first_edge, second_edge);
1610 if (EDGE_COUNT (first_edge->src->succs) > 1)
1612 cond = bb_predicate (second_edge->src);
1613 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1614 cond = TREE_OPERAND (cond, 0);
1615 else
1616 first_edge = second_edge;
1618 else
1619 cond = bb_predicate (first_edge->src);
1620 /* Gimplify the condition to a valid cond-expr conditonal operand. */
1621 cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond),
1622 is_gimple_condexpr, NULL_TREE,
1623 true, GSI_SAME_STMT);
1624 true_bb = first_edge->src;
1625 if (EDGE_PRED (bb, 1)->src == true_bb)
1627 arg0 = gimple_phi_arg_def (phi, 1);
1628 arg1 = gimple_phi_arg_def (phi, 0);
1630 else
1632 arg0 = gimple_phi_arg_def (phi, 0);
1633 arg1 = gimple_phi_arg_def (phi, 1);
1635 if (is_cond_scalar_reduction (phi, &reduc, arg0, arg1,
1636 &op0, &op1, false))
1637 /* Convert reduction stmt into vectorizable form. */
1638 rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
1639 true_bb != gimple_bb (reduc));
1640 else
1641 /* Build new RHS using selected condition and arguments. */
1642 rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
1643 arg0, arg1);
1644 new_stmt = gimple_build_assign (res, rhs);
1645 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1646 update_stmt (new_stmt);
1648 if (dump_file && (dump_flags & TDF_DETAILS))
1650 fprintf (dump_file, "new phi replacement stmt\n");
1651 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
1653 return;
1656 /* Create hashmap for PHI node which contain vector of argument indexes
1657 having the same value. */
1658 bool swap = false;
1659 hash_map<tree_operand_hash, auto_vec<int> > phi_arg_map;
1660 unsigned int num_args = gimple_phi_num_args (phi);
1661 int max_ind = -1;
1662 /* Vector of different PHI argument values. */
1663 auto_vec<tree> args (num_args);
1665 /* Compute phi_arg_map. */
1666 for (i = 0; i < num_args; i++)
1668 tree arg;
1670 arg = gimple_phi_arg_def (phi, i);
1671 if (!phi_arg_map.get (arg))
1672 args.quick_push (arg);
1673 phi_arg_map.get_or_insert (arg).safe_push (i);
1676 /* Determine element with max number of occurrences. */
1677 max_ind = -1;
1678 max = 1;
1679 args_len = args.length ();
1680 for (i = 0; i < args_len; i++)
1682 unsigned int len;
1683 if ((len = phi_arg_map.get (args[i])->length ()) > max)
1685 max_ind = (int) i;
1686 max = len;
1690 /* Put element with max number of occurences to the end of ARGS. */
1691 if (max_ind != -1 && max_ind +1 != (int) args_len)
1692 std::swap (args[args_len - 1], args[max_ind]);
1694 /* Handle one special case when number of arguments with different values
1695 is equal 2 and one argument has the only occurrence. Such PHI can be
1696 handled as if would have only 2 arguments. */
1697 if (args_len == 2 && phi_arg_map.get (args[0])->length () == 1)
1699 vec<int> *indexes;
1700 indexes = phi_arg_map.get (args[0]);
1701 index0 = (*indexes)[0];
1702 arg0 = args[0];
1703 arg1 = args[1];
1704 e = gimple_phi_arg_edge (phi, index0);
1705 cond = bb_predicate (e->src);
1706 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1708 swap = true;
1709 cond = TREE_OPERAND (cond, 0);
1711 /* Gimplify the condition to a valid cond-expr conditonal operand. */
1712 cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond),
1713 is_gimple_condexpr, NULL_TREE,
1714 true, GSI_SAME_STMT);
1715 if (!(is_cond_scalar_reduction (phi, &reduc, arg0 , arg1,
1716 &op0, &op1, true)))
1717 rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
1718 swap? arg1 : arg0,
1719 swap? arg0 : arg1);
1720 else
1721 /* Convert reduction stmt into vectorizable form. */
1722 rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
1723 swap);
1724 new_stmt = gimple_build_assign (res, rhs);
1725 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1726 update_stmt (new_stmt);
1728 else
1730 /* Common case. */
1731 vec<int> *indexes;
1732 tree type = TREE_TYPE (gimple_phi_result (phi));
1733 tree lhs;
1734 arg1 = args[1];
1735 for (i = 0; i < args_len; i++)
1737 arg0 = args[i];
1738 indexes = phi_arg_map.get (args[i]);
1739 if (i != args_len - 1)
1740 lhs = make_temp_ssa_name (type, NULL, "_ifc_");
1741 else
1742 lhs = res;
1743 cond = gen_phi_arg_condition (phi, indexes, gsi);
1744 rhs = fold_build_cond_expr (type, unshare_expr (cond),
1745 arg0, arg1);
1746 new_stmt = gimple_build_assign (lhs, rhs);
1747 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1748 update_stmt (new_stmt);
1749 arg1 = lhs;
1753 if (dump_file && (dump_flags & TDF_DETAILS))
1755 fprintf (dump_file, "new extended phi replacement stmt\n");
1756 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
1760 /* Replaces in LOOP all the scalar phi nodes other than those in the
1761 LOOP->header block with conditional modify expressions. */
1763 static void
1764 predicate_all_scalar_phis (struct loop *loop)
1766 basic_block bb;
1767 unsigned int orig_loop_num_nodes = loop->num_nodes;
1768 unsigned int i;
1770 for (i = 1; i < orig_loop_num_nodes; i++)
1772 gphi *phi;
1773 gimple_stmt_iterator gsi;
1774 gphi_iterator phi_gsi;
1775 bb = ifc_bbs[i];
1777 if (bb == loop->header)
1778 continue;
1780 if (EDGE_COUNT (bb->preds) == 1)
1781 continue;
1783 phi_gsi = gsi_start_phis (bb);
1784 if (gsi_end_p (phi_gsi))
1785 continue;
1787 gsi = gsi_after_labels (bb);
1788 while (!gsi_end_p (phi_gsi))
1790 phi = phi_gsi.phi ();
1791 predicate_scalar_phi (phi, &gsi);
1792 release_phi_node (phi);
1793 gsi_next (&phi_gsi);
1796 set_phi_nodes (bb, NULL);
1800 /* Insert in each basic block of LOOP the statements produced by the
1801 gimplification of the predicates. */
1803 static void
1804 insert_gimplified_predicates (loop_p loop, bool any_mask_load_store)
1806 unsigned int i;
1808 for (i = 0; i < loop->num_nodes; i++)
1810 basic_block bb = ifc_bbs[i];
1811 gimple_seq stmts;
1812 if (!is_predicated (bb))
1813 gcc_assert (bb_predicate_gimplified_stmts (bb) == NULL);
1814 if (!is_predicated (bb))
1816 /* Do not insert statements for a basic block that is not
1817 predicated. Also make sure that the predicate of the
1818 basic block is set to true. */
1819 reset_bb_predicate (bb);
1820 continue;
1823 stmts = bb_predicate_gimplified_stmts (bb);
1824 if (stmts)
1826 if (any_mask_load_store)
1828 /* Insert the predicate of the BB just after the label,
1829 as the if-conversion of memory writes will use this
1830 predicate. */
1831 gimple_stmt_iterator gsi = gsi_after_labels (bb);
1832 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1834 else
1836 /* Insert the predicate of the BB at the end of the BB
1837 as this would reduce the register pressure: the only
1838 use of this predicate will be in successor BBs. */
1839 gimple_stmt_iterator gsi = gsi_last_bb (bb);
1841 if (gsi_end_p (gsi)
1842 || stmt_ends_bb_p (gsi_stmt (gsi)))
1843 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1844 else
1845 gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
1848 /* Once the sequence is code generated, set it to NULL. */
1849 set_bb_predicate_gimplified_stmts (bb, NULL);
1854 /* Helper function for predicate_mem_writes. Returns index of existent
1855 mask if it was created for given SIZE and -1 otherwise. */
1857 static int
1858 mask_exists (int size, vec<int> vec)
1860 unsigned int ix;
1861 int v;
1862 FOR_EACH_VEC_ELT (vec, ix, v)
1863 if (v == size)
1864 return (int) ix;
1865 return -1;
1868 /* Predicate each write to memory in LOOP.
1870 This function transforms control flow constructs containing memory
1871 writes of the form:
1873 | for (i = 0; i < N; i++)
1874 | if (cond)
1875 | A[i] = expr;
1877 into the following form that does not contain control flow:
1879 | for (i = 0; i < N; i++)
1880 | A[i] = cond ? expr : A[i];
1882 The original CFG looks like this:
1884 | bb_0
1885 | i = 0
1886 | end_bb_0
1888 | bb_1
1889 | if (i < N) goto bb_5 else goto bb_2
1890 | end_bb_1
1892 | bb_2
1893 | cond = some_computation;
1894 | if (cond) goto bb_3 else goto bb_4
1895 | end_bb_2
1897 | bb_3
1898 | A[i] = expr;
1899 | goto bb_4
1900 | end_bb_3
1902 | bb_4
1903 | goto bb_1
1904 | end_bb_4
1906 insert_gimplified_predicates inserts the computation of the COND
1907 expression at the beginning of the destination basic block:
1909 | bb_0
1910 | i = 0
1911 | end_bb_0
1913 | bb_1
1914 | if (i < N) goto bb_5 else goto bb_2
1915 | end_bb_1
1917 | bb_2
1918 | cond = some_computation;
1919 | if (cond) goto bb_3 else goto bb_4
1920 | end_bb_2
1922 | bb_3
1923 | cond = some_computation;
1924 | A[i] = expr;
1925 | goto bb_4
1926 | end_bb_3
1928 | bb_4
1929 | goto bb_1
1930 | end_bb_4
1932 predicate_mem_writes is then predicating the memory write as follows:
1934 | bb_0
1935 | i = 0
1936 | end_bb_0
1938 | bb_1
1939 | if (i < N) goto bb_5 else goto bb_2
1940 | end_bb_1
1942 | bb_2
1943 | if (cond) goto bb_3 else goto bb_4
1944 | end_bb_2
1946 | bb_3
1947 | cond = some_computation;
1948 | A[i] = cond ? expr : A[i];
1949 | goto bb_4
1950 | end_bb_3
1952 | bb_4
1953 | goto bb_1
1954 | end_bb_4
1956 and finally combine_blocks removes the basic block boundaries making
1957 the loop vectorizable:
1959 | bb_0
1960 | i = 0
1961 | if (i < N) goto bb_5 else goto bb_1
1962 | end_bb_0
1964 | bb_1
1965 | cond = some_computation;
1966 | A[i] = cond ? expr : A[i];
1967 | if (i < N) goto bb_5 else goto bb_4
1968 | end_bb_1
1970 | bb_4
1971 | goto bb_1
1972 | end_bb_4
1975 static void
1976 predicate_mem_writes (loop_p loop)
1978 unsigned int i, orig_loop_num_nodes = loop->num_nodes;
1979 auto_vec<int, 1> vect_sizes;
1980 auto_vec<tree, 1> vect_masks;
1982 for (i = 1; i < orig_loop_num_nodes; i++)
1984 gimple_stmt_iterator gsi;
1985 basic_block bb = ifc_bbs[i];
1986 tree cond = bb_predicate (bb);
1987 bool swap;
1988 gimple *stmt;
1989 int index;
1991 if (is_true_predicate (cond))
1992 continue;
1994 swap = false;
1995 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1997 swap = true;
1998 cond = TREE_OPERAND (cond, 0);
2001 vect_sizes.truncate (0);
2002 vect_masks.truncate (0);
2004 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2005 if (!gimple_assign_single_p (stmt = gsi_stmt (gsi)))
2006 continue;
2007 else if (gimple_plf (stmt, GF_PLF_2))
2009 tree lhs = gimple_assign_lhs (stmt);
2010 tree rhs = gimple_assign_rhs1 (stmt);
2011 tree ref, addr, ptr, mask;
2012 gimple *new_stmt;
2013 gimple_seq stmts = NULL;
2014 int bitsize = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (lhs)));
2015 ref = TREE_CODE (lhs) == SSA_NAME ? rhs : lhs;
2016 mark_addressable (ref);
2017 addr = force_gimple_operand_gsi (&gsi, build_fold_addr_expr (ref),
2018 true, NULL_TREE, true,
2019 GSI_SAME_STMT);
2020 if (!vect_sizes.is_empty ()
2021 && (index = mask_exists (bitsize, vect_sizes)) != -1)
2022 /* Use created mask. */
2023 mask = vect_masks[index];
2024 else
2026 if (COMPARISON_CLASS_P (cond))
2027 mask = gimple_build (&stmts, TREE_CODE (cond),
2028 boolean_type_node,
2029 TREE_OPERAND (cond, 0),
2030 TREE_OPERAND (cond, 1));
2031 else
2033 gcc_assert (TREE_CODE (cond) == SSA_NAME);
2034 mask = cond;
2037 if (swap)
2039 tree true_val
2040 = constant_boolean_node (true, TREE_TYPE (mask));
2041 mask = gimple_build (&stmts, BIT_XOR_EXPR,
2042 TREE_TYPE (mask), mask, true_val);
2044 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2046 mask = ifc_temp_var (TREE_TYPE (mask), mask, &gsi);
2047 /* Save mask and its size for further use. */
2048 vect_sizes.safe_push (bitsize);
2049 vect_masks.safe_push (mask);
2051 ptr = build_int_cst (reference_alias_ptr_type (ref),
2052 get_object_alignment (ref));
2053 /* Copy points-to info if possible. */
2054 if (TREE_CODE (addr) == SSA_NAME && !SSA_NAME_PTR_INFO (addr))
2055 copy_ref_info (build2 (MEM_REF, TREE_TYPE (ref), addr, ptr),
2056 ref);
2057 if (TREE_CODE (lhs) == SSA_NAME)
2059 new_stmt
2060 = gimple_build_call_internal (IFN_MASK_LOAD, 3, addr,
2061 ptr, mask);
2062 gimple_call_set_lhs (new_stmt, lhs);
2064 else
2065 new_stmt
2066 = gimple_build_call_internal (IFN_MASK_STORE, 4, addr, ptr,
2067 mask, rhs);
2068 gsi_replace (&gsi, new_stmt, true);
2070 else if (gimple_vdef (stmt))
2072 tree lhs = gimple_assign_lhs (stmt);
2073 tree rhs = gimple_assign_rhs1 (stmt);
2074 tree type = TREE_TYPE (lhs);
2076 lhs = ifc_temp_var (type, unshare_expr (lhs), &gsi);
2077 rhs = ifc_temp_var (type, unshare_expr (rhs), &gsi);
2078 if (swap)
2079 std::swap (lhs, rhs);
2080 cond = force_gimple_operand_gsi_1 (&gsi, unshare_expr (cond),
2081 is_gimple_condexpr, NULL_TREE,
2082 true, GSI_SAME_STMT);
2083 rhs = fold_build_cond_expr (type, unshare_expr (cond), rhs, lhs);
2084 gimple_assign_set_rhs1 (stmt, ifc_temp_var (type, rhs, &gsi));
2085 update_stmt (stmt);
2090 /* Remove all GIMPLE_CONDs and GIMPLE_LABELs of all the basic blocks
2091 other than the exit and latch of the LOOP. Also resets the
2092 GIMPLE_DEBUG information. */
2094 static void
2095 remove_conditions_and_labels (loop_p loop)
2097 gimple_stmt_iterator gsi;
2098 unsigned int i;
2100 for (i = 0; i < loop->num_nodes; i++)
2102 basic_block bb = ifc_bbs[i];
2104 if (bb_with_exit_edge_p (loop, bb)
2105 || bb == loop->latch)
2106 continue;
2108 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
2109 switch (gimple_code (gsi_stmt (gsi)))
2111 case GIMPLE_COND:
2112 case GIMPLE_LABEL:
2113 gsi_remove (&gsi, true);
2114 break;
2116 case GIMPLE_DEBUG:
2117 /* ??? Should there be conditional GIMPLE_DEBUG_BINDs? */
2118 if (gimple_debug_bind_p (gsi_stmt (gsi)))
2120 gimple_debug_bind_reset_value (gsi_stmt (gsi));
2121 update_stmt (gsi_stmt (gsi));
2123 gsi_next (&gsi);
2124 break;
2126 default:
2127 gsi_next (&gsi);
2132 /* Combine all the basic blocks from LOOP into one or two super basic
2133 blocks. Replace PHI nodes with conditional modify expressions. */
2135 static void
2136 combine_blocks (struct loop *loop, bool any_mask_load_store)
2138 basic_block bb, exit_bb, merge_target_bb;
2139 unsigned int orig_loop_num_nodes = loop->num_nodes;
2140 unsigned int i;
2141 edge e;
2142 edge_iterator ei;
2144 predicate_bbs (loop);
2145 remove_conditions_and_labels (loop);
2146 insert_gimplified_predicates (loop, any_mask_load_store);
2147 predicate_all_scalar_phis (loop);
2149 if (any_mask_load_store)
2150 predicate_mem_writes (loop);
2152 /* Merge basic blocks: first remove all the edges in the loop,
2153 except for those from the exit block. */
2154 exit_bb = NULL;
2155 bool *predicated = XNEWVEC (bool, orig_loop_num_nodes);
2156 for (i = 0; i < orig_loop_num_nodes; i++)
2158 bb = ifc_bbs[i];
2159 predicated[i] = !is_true_predicate (bb_predicate (bb));
2160 free_bb_predicate (bb);
2161 if (bb_with_exit_edge_p (loop, bb))
2163 gcc_assert (exit_bb == NULL);
2164 exit_bb = bb;
2167 gcc_assert (exit_bb != loop->latch);
2169 for (i = 1; i < orig_loop_num_nodes; i++)
2171 bb = ifc_bbs[i];
2173 for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));)
2175 if (e->src == exit_bb)
2176 ei_next (&ei);
2177 else
2178 remove_edge (e);
2182 if (exit_bb != NULL)
2184 if (exit_bb != loop->header)
2186 /* Connect this node to loop header. */
2187 make_edge (loop->header, exit_bb, EDGE_FALLTHRU);
2188 set_immediate_dominator (CDI_DOMINATORS, exit_bb, loop->header);
2191 /* Redirect non-exit edges to loop->latch. */
2192 FOR_EACH_EDGE (e, ei, exit_bb->succs)
2194 if (!loop_exit_edge_p (loop, e))
2195 redirect_edge_and_branch (e, loop->latch);
2197 set_immediate_dominator (CDI_DOMINATORS, loop->latch, exit_bb);
2199 else
2201 /* If the loop does not have an exit, reconnect header and latch. */
2202 make_edge (loop->header, loop->latch, EDGE_FALLTHRU);
2203 set_immediate_dominator (CDI_DOMINATORS, loop->latch, loop->header);
2206 merge_target_bb = loop->header;
2207 for (i = 1; i < orig_loop_num_nodes; i++)
2209 gimple_stmt_iterator gsi;
2210 gimple_stmt_iterator last;
2212 bb = ifc_bbs[i];
2214 if (bb == exit_bb || bb == loop->latch)
2215 continue;
2217 /* Make stmts member of loop->header and clear range info from all stmts
2218 in BB which is now no longer executed conditional on a predicate we
2219 could have derived it from. */
2220 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2222 gimple *stmt = gsi_stmt (gsi);
2223 gimple_set_bb (stmt, merge_target_bb);
2224 if (predicated[i])
2226 ssa_op_iter i;
2227 tree op;
2228 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF)
2229 reset_flow_sensitive_info (op);
2233 /* Update stmt list. */
2234 last = gsi_last_bb (merge_target_bb);
2235 gsi_insert_seq_after (&last, bb_seq (bb), GSI_NEW_STMT);
2236 set_bb_seq (bb, NULL);
2238 delete_basic_block (bb);
2241 /* If possible, merge loop header to the block with the exit edge.
2242 This reduces the number of basic blocks to two, to please the
2243 vectorizer that handles only loops with two nodes. */
2244 if (exit_bb
2245 && exit_bb != loop->header
2246 && can_merge_blocks_p (loop->header, exit_bb))
2247 merge_blocks (loop->header, exit_bb);
2249 free (ifc_bbs);
2250 ifc_bbs = NULL;
2251 free (predicated);
2254 /* Version LOOP before if-converting it; the original loop
2255 will be if-converted, the new copy of the loop will not,
2256 and the LOOP_VECTORIZED internal call will be guarding which
2257 loop to execute. The vectorizer pass will fold this
2258 internal call into either true or false. */
2260 static bool
2261 version_loop_for_if_conversion (struct loop *loop)
2263 basic_block cond_bb;
2264 tree cond = make_ssa_name (boolean_type_node);
2265 struct loop *new_loop;
2266 gimple *g;
2267 gimple_stmt_iterator gsi;
2269 g = gimple_build_call_internal (IFN_LOOP_VECTORIZED, 2,
2270 build_int_cst (integer_type_node, loop->num),
2271 integer_zero_node);
2272 gimple_call_set_lhs (g, cond);
2274 initialize_original_copy_tables ();
2275 new_loop = loop_version (loop, cond, &cond_bb,
2276 REG_BR_PROB_BASE, REG_BR_PROB_BASE,
2277 REG_BR_PROB_BASE, true);
2278 free_original_copy_tables ();
2279 if (new_loop == NULL)
2280 return false;
2281 new_loop->dont_vectorize = true;
2282 new_loop->force_vectorize = false;
2283 gsi = gsi_last_bb (cond_bb);
2284 gimple_call_set_arg (g, 1, build_int_cst (integer_type_node, new_loop->num));
2285 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2286 update_ssa (TODO_update_ssa);
2287 return true;
2290 /* Performs splitting of critical edges if aggressive_if_conv is true.
2291 Returns false if loop won't be if converted and true otherwise. */
2293 static bool
2294 ifcvt_split_critical_edges (struct loop *loop)
2296 basic_block *body;
2297 basic_block bb;
2298 unsigned int num = loop->num_nodes;
2299 unsigned int i;
2300 gimple *stmt;
2301 edge e;
2302 edge_iterator ei;
2304 if (num <= 2)
2305 return false;
2306 if (loop->inner)
2307 return false;
2308 if (!single_exit (loop))
2309 return false;
2311 body = get_loop_body (loop);
2312 for (i = 0; i < num; i++)
2314 bb = body[i];
2315 if (bb == loop->latch
2316 || bb_with_exit_edge_p (loop, bb))
2317 continue;
2318 stmt = last_stmt (bb);
2319 /* Skip basic blocks not ending with conditional branch. */
2320 if (!(stmt && gimple_code (stmt) == GIMPLE_COND))
2321 continue;
2322 FOR_EACH_EDGE (e, ei, bb->succs)
2323 if (EDGE_CRITICAL_P (e) && e->dest->loop_father == loop)
2324 split_edge (e);
2326 free (body);
2327 return true;
2330 /* Assumes that lhs of DEF_STMT have multiple uses.
2331 Delete one use by (1) creation of copy DEF_STMT with
2332 unique lhs; (2) change original use of lhs in one
2333 use statement with newly created lhs. */
2335 static void
2336 ifcvt_split_def_stmt (gimple *def_stmt, gimple *use_stmt)
2338 tree var;
2339 tree lhs;
2340 gimple *copy_stmt;
2341 gimple_stmt_iterator gsi;
2342 use_operand_p use_p;
2343 imm_use_iterator imm_iter;
2345 var = gimple_assign_lhs (def_stmt);
2346 copy_stmt = gimple_copy (def_stmt);
2347 lhs = make_temp_ssa_name (TREE_TYPE (var), NULL, "_ifc_");
2348 gimple_assign_set_lhs (copy_stmt, lhs);
2349 SSA_NAME_DEF_STMT (lhs) = copy_stmt;
2350 /* Insert copy of DEF_STMT. */
2351 gsi = gsi_for_stmt (def_stmt);
2352 gsi_insert_after (&gsi, copy_stmt, GSI_SAME_STMT);
2353 /* Change use of var to lhs in use_stmt. */
2354 if (dump_file && (dump_flags & TDF_DETAILS))
2356 fprintf (dump_file, "Change use of var ");
2357 print_generic_expr (dump_file, var, TDF_SLIM);
2358 fprintf (dump_file, " to ");
2359 print_generic_expr (dump_file, lhs, TDF_SLIM);
2360 fprintf (dump_file, "\n");
2362 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, var)
2364 if (USE_STMT (use_p) != use_stmt)
2365 continue;
2366 SET_USE (use_p, lhs);
2367 break;
2371 /* Traverse bool pattern recursively starting from VAR.
2372 Save its def and use statements to defuse_list if VAR does
2373 not have single use. */
2375 static void
2376 ifcvt_walk_pattern_tree (tree var, vec<gimple *> *defuse_list,
2377 gimple *use_stmt)
2379 tree rhs1, rhs2;
2380 enum tree_code code;
2381 gimple *def_stmt;
2383 def_stmt = SSA_NAME_DEF_STMT (var);
2384 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
2385 return;
2386 if (!has_single_use (var))
2388 /* Put def and use stmts into defuse_list. */
2389 defuse_list->safe_push (def_stmt);
2390 defuse_list->safe_push (use_stmt);
2391 if (dump_file && (dump_flags & TDF_DETAILS))
2393 fprintf (dump_file, "Multiple lhs uses in stmt\n");
2394 print_gimple_stmt (dump_file, def_stmt, 0, TDF_SLIM);
2397 rhs1 = gimple_assign_rhs1 (def_stmt);
2398 code = gimple_assign_rhs_code (def_stmt);
2399 switch (code)
2401 case SSA_NAME:
2402 ifcvt_walk_pattern_tree (rhs1, defuse_list, def_stmt);
2403 break;
2404 CASE_CONVERT:
2405 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2406 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2407 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2408 break;
2409 ifcvt_walk_pattern_tree (rhs1, defuse_list, def_stmt);
2410 break;
2411 case BIT_NOT_EXPR:
2412 ifcvt_walk_pattern_tree (rhs1, defuse_list, def_stmt);
2413 break;
2414 case BIT_AND_EXPR:
2415 case BIT_IOR_EXPR:
2416 case BIT_XOR_EXPR:
2417 ifcvt_walk_pattern_tree (rhs1, defuse_list, def_stmt);
2418 rhs2 = gimple_assign_rhs2 (def_stmt);
2419 ifcvt_walk_pattern_tree (rhs2, defuse_list, def_stmt);
2420 break;
2421 default:
2422 break;
2424 return;
2427 /* Returns true if STMT can be a root of bool pattern applied
2428 by vectorizer. */
2430 static bool
2431 stmt_is_root_of_bool_pattern (gimple *stmt)
2433 enum tree_code code;
2434 tree lhs, rhs;
2436 code = gimple_assign_rhs_code (stmt);
2437 if (CONVERT_EXPR_CODE_P (code))
2439 lhs = gimple_assign_lhs (stmt);
2440 rhs = gimple_assign_rhs1 (stmt);
2441 if (TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2442 return false;
2443 if (TREE_CODE (TREE_TYPE (lhs)) == BOOLEAN_TYPE)
2444 return false;
2445 return true;
2447 else if (code == COND_EXPR)
2449 rhs = gimple_assign_rhs1 (stmt);
2450 if (TREE_CODE (rhs) != SSA_NAME)
2451 return false;
2452 return true;
2454 return false;
2457 /* Traverse all statements in BB which correspond to loop header to
2458 find out all statements which can start bool pattern applied by
2459 vectorizer and convert multiple uses in it to conform pattern
2460 restrictions. Such case can occur if the same predicate is used both
2461 for phi node conversion and load/store mask. */
2463 static void
2464 ifcvt_repair_bool_pattern (basic_block bb)
2466 tree rhs;
2467 gimple *stmt;
2468 gimple_stmt_iterator gsi;
2469 vec<gimple *> defuse_list = vNULL;
2470 vec<gimple *> pattern_roots = vNULL;
2471 bool repeat = true;
2472 int niter = 0;
2473 unsigned int ix;
2475 /* Collect all root pattern statements. */
2476 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2478 stmt = gsi_stmt (gsi);
2479 if (gimple_code (stmt) != GIMPLE_ASSIGN)
2480 continue;
2481 if (!stmt_is_root_of_bool_pattern (stmt))
2482 continue;
2483 pattern_roots.safe_push (stmt);
2486 if (pattern_roots.is_empty ())
2487 return;
2489 /* Split all statements with multiple uses iteratively since splitting
2490 may create new multiple uses. */
2491 while (repeat)
2493 repeat = false;
2494 niter++;
2495 FOR_EACH_VEC_ELT (pattern_roots, ix, stmt)
2497 rhs = gimple_assign_rhs1 (stmt);
2498 ifcvt_walk_pattern_tree (rhs, &defuse_list, stmt);
2499 while (defuse_list.length () > 0)
2501 repeat = true;
2502 gimple *def_stmt, *use_stmt;
2503 use_stmt = defuse_list.pop ();
2504 def_stmt = defuse_list.pop ();
2505 ifcvt_split_def_stmt (def_stmt, use_stmt);
2510 if (dump_file && (dump_flags & TDF_DETAILS))
2511 fprintf (dump_file, "Repair bool pattern takes %d iterations. \n",
2512 niter);
2515 /* Delete redundant statements produced by predication which prevents
2516 loop vectorization. */
2518 static void
2519 ifcvt_local_dce (basic_block bb)
2521 gimple *stmt;
2522 gimple *stmt1;
2523 gimple *phi;
2524 gimple_stmt_iterator gsi;
2525 auto_vec<gimple *> worklist;
2526 enum gimple_code code;
2527 use_operand_p use_p;
2528 imm_use_iterator imm_iter;
2530 worklist.create (64);
2531 /* Consider all phi as live statements. */
2532 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2534 phi = gsi_stmt (gsi);
2535 gimple_set_plf (phi, GF_PLF_2, true);
2536 worklist.safe_push (phi);
2538 /* Consider load/store statements, CALL and COND as live. */
2539 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2541 stmt = gsi_stmt (gsi);
2542 if (gimple_store_p (stmt)
2543 || gimple_assign_load_p (stmt)
2544 || is_gimple_debug (stmt))
2546 gimple_set_plf (stmt, GF_PLF_2, true);
2547 worklist.safe_push (stmt);
2548 continue;
2550 code = gimple_code (stmt);
2551 if (code == GIMPLE_COND || code == GIMPLE_CALL)
2553 gimple_set_plf (stmt, GF_PLF_2, true);
2554 worklist.safe_push (stmt);
2555 continue;
2557 gimple_set_plf (stmt, GF_PLF_2, false);
2559 if (code == GIMPLE_ASSIGN)
2561 tree lhs = gimple_assign_lhs (stmt);
2562 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2564 stmt1 = USE_STMT (use_p);
2565 if (gimple_bb (stmt1) != bb)
2567 gimple_set_plf (stmt, GF_PLF_2, true);
2568 worklist.safe_push (stmt);
2569 break;
2574 /* Propagate liveness through arguments of live stmt. */
2575 while (worklist.length () > 0)
2577 ssa_op_iter iter;
2578 use_operand_p use_p;
2579 tree use;
2581 stmt = worklist.pop ();
2582 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2584 use = USE_FROM_PTR (use_p);
2585 if (TREE_CODE (use) != SSA_NAME)
2586 continue;
2587 stmt1 = SSA_NAME_DEF_STMT (use);
2588 if (gimple_bb (stmt1) != bb
2589 || gimple_plf (stmt1, GF_PLF_2))
2590 continue;
2591 gimple_set_plf (stmt1, GF_PLF_2, true);
2592 worklist.safe_push (stmt1);
2595 /* Delete dead statements. */
2596 gsi = gsi_start_bb (bb);
2597 while (!gsi_end_p (gsi))
2599 stmt = gsi_stmt (gsi);
2600 if (gimple_plf (stmt, GF_PLF_2))
2602 gsi_next (&gsi);
2603 continue;
2605 if (dump_file && (dump_flags & TDF_DETAILS))
2607 fprintf (dump_file, "Delete dead stmt in bb#%d\n", bb->index);
2608 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2610 gsi_remove (&gsi, true);
2611 release_defs (stmt);
2615 /* If-convert LOOP when it is legal. For the moment this pass has no
2616 profitability analysis. Returns non-zero todo flags when something
2617 changed. */
2619 static unsigned int
2620 tree_if_conversion (struct loop *loop)
2622 unsigned int todo = 0;
2623 ifc_bbs = NULL;
2624 bool any_mask_load_store = false;
2626 /* Set up aggressive if-conversion for loops marked with simd pragma. */
2627 aggressive_if_conv = loop->force_vectorize;
2628 /* Check either outer loop was marked with simd pragma. */
2629 if (!aggressive_if_conv)
2631 struct loop *outer_loop = loop_outer (loop);
2632 if (outer_loop && outer_loop->force_vectorize)
2633 aggressive_if_conv = true;
2636 if (aggressive_if_conv)
2637 if (!ifcvt_split_critical_edges (loop))
2638 goto cleanup;
2640 if (!if_convertible_loop_p (loop, &any_mask_load_store)
2641 || !dbg_cnt (if_conversion_tree))
2642 goto cleanup;
2644 if (any_mask_load_store
2645 && ((!flag_tree_loop_vectorize && !loop->force_vectorize)
2646 || loop->dont_vectorize))
2647 goto cleanup;
2649 if (any_mask_load_store && !version_loop_for_if_conversion (loop))
2650 goto cleanup;
2652 /* Now all statements are if-convertible. Combine all the basic
2653 blocks into one huge basic block doing the if-conversion
2654 on-the-fly. */
2655 combine_blocks (loop, any_mask_load_store);
2657 /* Delete dead predicate computations and repair tree correspondent
2658 to bool pattern to delete multiple uses of predicates. */
2659 if (aggressive_if_conv)
2661 ifcvt_local_dce (loop->header);
2662 ifcvt_repair_bool_pattern (loop->header);
2665 todo |= TODO_cleanup_cfg;
2666 if (any_mask_load_store)
2668 mark_virtual_operands_for_renaming (cfun);
2669 todo |= TODO_update_ssa_only_virtuals;
2672 cleanup:
2673 if (ifc_bbs)
2675 unsigned int i;
2677 for (i = 0; i < loop->num_nodes; i++)
2678 free_bb_predicate (ifc_bbs[i]);
2680 free (ifc_bbs);
2681 ifc_bbs = NULL;
2683 free_dominance_info (CDI_POST_DOMINATORS);
2685 return todo;
2688 /* Tree if-conversion pass management. */
2690 namespace {
2692 const pass_data pass_data_if_conversion =
2694 GIMPLE_PASS, /* type */
2695 "ifcvt", /* name */
2696 OPTGROUP_NONE, /* optinfo_flags */
2697 TV_NONE, /* tv_id */
2698 ( PROP_cfg | PROP_ssa ), /* properties_required */
2699 0, /* properties_provided */
2700 0, /* properties_destroyed */
2701 0, /* todo_flags_start */
2702 0, /* todo_flags_finish */
2705 class pass_if_conversion : public gimple_opt_pass
2707 public:
2708 pass_if_conversion (gcc::context *ctxt)
2709 : gimple_opt_pass (pass_data_if_conversion, ctxt)
2712 /* opt_pass methods: */
2713 virtual bool gate (function *);
2714 virtual unsigned int execute (function *);
2716 }; // class pass_if_conversion
2718 bool
2719 pass_if_conversion::gate (function *fun)
2721 return (((flag_tree_loop_vectorize || fun->has_force_vectorize_loops)
2722 && flag_tree_loop_if_convert != 0)
2723 || flag_tree_loop_if_convert == 1
2724 || flag_tree_loop_if_convert_stores == 1);
2727 unsigned int
2728 pass_if_conversion::execute (function *fun)
2730 struct loop *loop;
2731 unsigned todo = 0;
2733 if (number_of_loops (fun) <= 1)
2734 return 0;
2736 FOR_EACH_LOOP (loop, 0)
2737 if (flag_tree_loop_if_convert == 1
2738 || flag_tree_loop_if_convert_stores == 1
2739 || ((flag_tree_loop_vectorize || loop->force_vectorize)
2740 && !loop->dont_vectorize))
2741 todo |= tree_if_conversion (loop);
2743 if (flag_checking)
2745 basic_block bb;
2746 FOR_EACH_BB_FN (bb, fun)
2747 gcc_assert (!bb->aux);
2750 return todo;
2753 } // anon namespace
2755 gimple_opt_pass *
2756 make_pass_if_conversion (gcc::context *ctxt)
2758 return new pass_if_conversion (ctxt);