1 /* Vectorizer Specific Loop Manipulations
2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
29 #include "tree-pass.h"
31 #include "fold-const.h"
34 #include "gimple-iterator.h"
35 #include "gimplify-me.h"
37 #include "tree-ssa-loop-manip.h"
38 #include "tree-into-ssa.h"
41 #include "tree-scalar-evolution.h"
42 #include "tree-vectorizer.h"
43 #include "tree-ssa-loop-ivopts.h"
44 #include "gimple-fold.h"
46 /*************************************************************************
47 Simple Loop Peeling Utilities
49 Utilities to support loop peeling for vectorization purposes.
50 *************************************************************************/
53 /* Renames the use *OP_P. */
56 rename_use_op (use_operand_p op_p
)
60 if (TREE_CODE (USE_FROM_PTR (op_p
)) != SSA_NAME
)
63 new_name
= get_current_def (USE_FROM_PTR (op_p
));
65 /* Something defined outside of the loop. */
69 /* An ordinary ssa name defined in the loop. */
71 SET_USE (op_p
, new_name
);
75 /* Renames the variables in basic block BB. Allow renaming of PHI arguments
76 on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
80 rename_variables_in_bb (basic_block bb
, bool rename_from_outer_loop
)
87 struct loop
*loop
= bb
->loop_father
;
88 struct loop
*outer_loop
= NULL
;
90 if (rename_from_outer_loop
)
93 outer_loop
= loop_outer (loop
);
96 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);
99 stmt
= gsi_stmt (gsi
);
100 FOR_EACH_SSA_USE_OPERAND (use_p
, stmt
, iter
, SSA_OP_ALL_USES
)
101 rename_use_op (use_p
);
104 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
106 if (!flow_bb_inside_loop_p (loop
, e
->src
))
108 if (!rename_from_outer_loop
)
110 if (e
->src
!= outer_loop
->header
)
112 if (outer_loop
->inner
->next
)
114 /* If outer_loop has 2 inner loops, allow there to
115 be an extra basic block which decides which of the
116 two loops to use using LOOP_VECTORIZED. */
117 if (!single_pred_p (e
->src
)
118 || single_pred (e
->src
) != outer_loop
->header
)
123 for (gphi_iterator gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
);
125 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi
.phi (), e
));
136 /* A stack of values to be adjusted in debug stmts. We have to
137 process them LIFO, so that the closest substitution applies. If we
138 processed them FIFO, without the stack, we might substitute uses
139 with a PHI DEF that would soon become non-dominant, and when we got
140 to the suitable one, it wouldn't have anything to substitute any
142 static vec
<adjust_info
, va_heap
> adjust_vec
;
144 /* Adjust any debug stmts that referenced AI->from values to use the
145 loop-closed AI->to, if the references are dominated by AI->bb and
146 not by the definition of AI->from. */
149 adjust_debug_stmts_now (adjust_info
*ai
)
151 basic_block bbphi
= ai
->bb
;
152 tree orig_def
= ai
->from
;
153 tree new_def
= ai
->to
;
154 imm_use_iterator imm_iter
;
156 basic_block bbdef
= gimple_bb (SSA_NAME_DEF_STMT (orig_def
));
158 gcc_assert (dom_info_available_p (CDI_DOMINATORS
));
160 /* Adjust any debug stmts that held onto non-loop-closed
162 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, orig_def
)
167 if (!is_gimple_debug (stmt
))
170 gcc_assert (gimple_debug_bind_p (stmt
));
172 bbuse
= gimple_bb (stmt
);
175 || dominated_by_p (CDI_DOMINATORS
, bbuse
, bbphi
))
177 || dominated_by_p (CDI_DOMINATORS
, bbuse
, bbdef
)))
180 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
181 SET_USE (use_p
, new_def
);
184 gimple_debug_bind_reset_value (stmt
);
191 /* Adjust debug stmts as scheduled before. */
194 adjust_vec_debug_stmts (void)
196 if (!MAY_HAVE_DEBUG_BIND_STMTS
)
199 gcc_assert (adjust_vec
.exists ());
201 while (!adjust_vec
.is_empty ())
203 adjust_debug_stmts_now (&adjust_vec
.last ());
208 /* Adjust any debug stmts that referenced FROM values to use the
209 loop-closed TO, if the references are dominated by BB and not by
210 the definition of FROM. If adjust_vec is non-NULL, adjustments
211 will be postponed until adjust_vec_debug_stmts is called. */
214 adjust_debug_stmts (tree from
, tree to
, basic_block bb
)
218 if (MAY_HAVE_DEBUG_BIND_STMTS
219 && TREE_CODE (from
) == SSA_NAME
220 && ! SSA_NAME_IS_DEFAULT_DEF (from
)
221 && ! virtual_operand_p (from
))
227 if (adjust_vec
.exists ())
228 adjust_vec
.safe_push (ai
);
230 adjust_debug_stmts_now (&ai
);
234 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
235 to adjust any debug stmts that referenced the old phi arg,
236 presumably non-loop-closed references left over from other
240 adjust_phi_and_debug_stmts (gimple
*update_phi
, edge e
, tree new_def
)
242 tree orig_def
= PHI_ARG_DEF_FROM_EDGE (update_phi
, e
);
244 SET_PHI_ARG_DEF (update_phi
, e
->dest_idx
, new_def
);
246 if (MAY_HAVE_DEBUG_BIND_STMTS
)
247 adjust_debug_stmts (orig_def
, PHI_RESULT (update_phi
),
248 gimple_bb (update_phi
));
251 /* Make LOOP iterate N == (NITERS - STEP) / STEP + 1 times,
252 where NITERS is known to be outside the range [1, STEP - 1].
253 This is equivalent to making the loop execute NITERS / STEP
254 times when NITERS is nonzero and (1 << M) / STEP times otherwise,
255 where M is the precision of NITERS.
257 NITERS_MAYBE_ZERO is true if NITERS can be zero, false it is known
258 to be >= STEP. In the latter case N is always NITERS / STEP.
260 If FINAL_IV is nonnull, it is an SSA name that should be set to
261 N * STEP on exit from the loop.
263 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
266 slpeel_make_loop_iterate_ntimes (struct loop
*loop
, tree niters
, tree step
,
267 tree final_iv
, bool niters_maybe_zero
)
269 tree indx_before_incr
, indx_after_incr
;
272 edge pe
= loop_preheader_edge (loop
);
273 edge exit_edge
= single_exit (loop
);
274 gimple_stmt_iterator loop_cond_gsi
;
275 gimple_stmt_iterator incr_gsi
;
277 source_location loop_loc
;
279 tree niters_type
= TREE_TYPE (niters
);
281 orig_cond
= get_loop_exit_condition (loop
);
282 gcc_assert (orig_cond
);
283 loop_cond_gsi
= gsi_for_stmt (orig_cond
);
286 if (!niters_maybe_zero
&& integer_onep (step
))
288 /* In this case we can use a simple 0-based IV:
297 while (x < NITERS); */
298 code
= (exit_edge
->flags
& EDGE_TRUE_VALUE
) ? GE_EXPR
: LT_EXPR
;
299 init
= build_zero_cst (niters_type
);
304 /* The following works for all values of NITERS except 0:
313 while (x <= NITERS - STEP);
315 so that the loop continues to iterate if x + STEP - 1 < NITERS
316 but stops if x + STEP - 1 >= NITERS.
318 However, if NITERS is zero, x never hits a value above NITERS - STEP
319 before wrapping around. There are two obvious ways of dealing with
322 - start at STEP - 1 and compare x before incrementing it
323 - start at -1 and compare x after incrementing it
325 The latter is simpler and is what we use. The loop in this case
335 while (x < NITERS - STEP);
337 In both cases the loop limit is NITERS - STEP. */
338 gimple_seq seq
= NULL
;
339 limit
= force_gimple_operand (niters
, &seq
, true, NULL_TREE
);
340 limit
= gimple_build (&seq
, MINUS_EXPR
, TREE_TYPE (limit
), limit
, step
);
343 basic_block new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
344 gcc_assert (!new_bb
);
346 if (niters_maybe_zero
)
349 code
= (exit_edge
->flags
& EDGE_TRUE_VALUE
) ? GE_EXPR
: LT_EXPR
;
350 init
= build_all_ones_cst (niters_type
);
355 code
= (exit_edge
->flags
& EDGE_TRUE_VALUE
) ? GT_EXPR
: LE_EXPR
;
356 init
= build_zero_cst (niters_type
);
360 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
361 create_iv (init
, step
, NULL_TREE
, loop
,
362 &incr_gsi
, insert_after
, &indx_before_incr
, &indx_after_incr
);
364 indx_after_incr
= force_gimple_operand_gsi (&loop_cond_gsi
, indx_after_incr
,
365 true, NULL_TREE
, true,
367 limit
= force_gimple_operand_gsi (&loop_cond_gsi
, limit
, true, NULL_TREE
,
368 true, GSI_SAME_STMT
);
370 cond_stmt
= gimple_build_cond (code
, indx_after_incr
, limit
, NULL_TREE
,
373 gsi_insert_before (&loop_cond_gsi
, cond_stmt
, GSI_SAME_STMT
);
375 /* Remove old loop exit test: */
376 gsi_remove (&loop_cond_gsi
, true);
377 free_stmt_vec_info (orig_cond
);
379 loop_loc
= find_loop_location (loop
);
380 if (dump_enabled_p ())
382 if (LOCATION_LOCUS (loop_loc
) != UNKNOWN_LOCATION
)
383 dump_printf (MSG_NOTE
, "\nloop at %s:%d: ", LOCATION_FILE (loop_loc
),
384 LOCATION_LINE (loop_loc
));
385 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, cond_stmt
, 0);
388 /* Record the number of latch iterations. */
390 /* Case A: the loop iterates NITERS times. Subtract one to get the
392 loop
->nb_iterations
= fold_build2 (MINUS_EXPR
, niters_type
, niters
,
393 build_int_cst (niters_type
, 1));
395 /* Case B or C: the loop iterates (NITERS - STEP) / STEP + 1 times.
396 Subtract one from this to get the latch count. */
397 loop
->nb_iterations
= fold_build2 (TRUNC_DIV_EXPR
, niters_type
,
402 gassign
*assign
= gimple_build_assign (final_iv
, MINUS_EXPR
,
403 indx_after_incr
, init
);
404 gsi_insert_on_edge_immediate (single_exit (loop
), assign
);
408 /* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg.
409 For all PHI arguments in FROM->dest and TO->dest from those
410 edges ensure that TO->dest PHI arguments have current_def
414 slpeel_duplicate_current_defs_from_edges (edge from
, edge to
)
416 gimple_stmt_iterator gsi_from
, gsi_to
;
418 for (gsi_from
= gsi_start_phis (from
->dest
),
419 gsi_to
= gsi_start_phis (to
->dest
);
420 !gsi_end_p (gsi_from
) && !gsi_end_p (gsi_to
);)
422 gimple
*from_phi
= gsi_stmt (gsi_from
);
423 gimple
*to_phi
= gsi_stmt (gsi_to
);
424 tree from_arg
= PHI_ARG_DEF_FROM_EDGE (from_phi
, from
);
425 tree to_arg
= PHI_ARG_DEF_FROM_EDGE (to_phi
, to
);
426 if (virtual_operand_p (from_arg
))
428 gsi_next (&gsi_from
);
431 if (virtual_operand_p (to_arg
))
436 if (TREE_CODE (from_arg
) != SSA_NAME
)
437 gcc_assert (operand_equal_p (from_arg
, to_arg
, 0));
440 if (get_current_def (to_arg
) == NULL_TREE
)
441 set_current_def (to_arg
, get_current_def (from_arg
));
443 gsi_next (&gsi_from
);
447 gphi
*from_phi
= get_virtual_phi (from
->dest
);
448 gphi
*to_phi
= get_virtual_phi (to
->dest
);
450 set_current_def (PHI_ARG_DEF_FROM_EDGE (to_phi
, to
),
451 get_current_def (PHI_ARG_DEF_FROM_EDGE (from_phi
, from
)));
455 /* Given LOOP this function generates a new copy of it and puts it
456 on E which is either the entry or exit of LOOP. If SCALAR_LOOP is
457 non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
458 basic blocks from SCALAR_LOOP instead of LOOP, but to either the
459 entry or exit of LOOP. */
462 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop
*loop
,
463 struct loop
*scalar_loop
, edge e
)
465 struct loop
*new_loop
;
466 basic_block
*new_bbs
, *bbs
, *pbbs
;
469 basic_block exit_dest
;
471 bool duplicate_outer_loop
= false;
473 exit
= single_exit (loop
);
474 at_exit
= (e
== exit
);
475 if (!at_exit
&& e
!= loop_preheader_edge (loop
))
478 if (scalar_loop
== NULL
)
481 bbs
= XNEWVEC (basic_block
, scalar_loop
->num_nodes
+ 1);
483 get_loop_body_with_size (scalar_loop
, pbbs
, scalar_loop
->num_nodes
);
484 /* Allow duplication of outer loops. */
485 if (scalar_loop
->inner
)
486 duplicate_outer_loop
= true;
487 /* Check whether duplication is possible. */
488 if (!can_copy_bbs_p (pbbs
, scalar_loop
->num_nodes
))
494 /* Generate new loop structure. */
495 new_loop
= duplicate_loop (scalar_loop
, loop_outer (scalar_loop
));
496 duplicate_subloops (scalar_loop
, new_loop
);
498 exit_dest
= exit
->dest
;
499 was_imm_dom
= (get_immediate_dominator (CDI_DOMINATORS
,
500 exit_dest
) == loop
->header
?
503 /* Also copy the pre-header, this avoids jumping through hoops to
504 duplicate the loop entry PHI arguments. Create an empty
505 pre-header unconditionally for this. */
506 basic_block preheader
= split_edge (loop_preheader_edge (scalar_loop
));
507 edge entry_e
= single_pred_edge (preheader
);
509 new_bbs
= XNEWVEC (basic_block
, scalar_loop
->num_nodes
+ 1);
511 exit
= single_exit (scalar_loop
);
512 copy_bbs (bbs
, scalar_loop
->num_nodes
+ 1, new_bbs
,
513 &exit
, 1, &new_exit
, NULL
,
514 at_exit
? loop
->latch
: e
->src
, true);
515 exit
= single_exit (loop
);
516 basic_block new_preheader
= new_bbs
[0];
518 add_phi_args_after_copy (new_bbs
, scalar_loop
->num_nodes
+ 1, NULL
);
520 if (scalar_loop
!= loop
)
522 /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
523 SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop,
524 but LOOP will not. slpeel_update_phi_nodes_for_guard{1,2} expects
525 the LOOP SSA_NAMEs (on the exit edge and edge from latch to
526 header) to have current_def set, so copy them over. */
527 slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop
),
529 slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop
->latch
,
531 EDGE_SUCC (loop
->latch
, 0));
534 if (at_exit
) /* Add the loop copy at exit. */
536 if (scalar_loop
!= loop
)
539 new_exit
= redirect_edge_and_branch (new_exit
, exit_dest
);
541 for (gsi
= gsi_start_phis (exit_dest
); !gsi_end_p (gsi
);
544 gphi
*phi
= gsi
.phi ();
545 tree orig_arg
= PHI_ARG_DEF_FROM_EDGE (phi
, e
);
546 location_t orig_locus
547 = gimple_phi_arg_location_from_edge (phi
, e
);
549 add_phi_arg (phi
, orig_arg
, new_exit
, orig_locus
);
552 redirect_edge_and_branch_force (e
, new_preheader
);
553 flush_pending_stmts (e
);
554 set_immediate_dominator (CDI_DOMINATORS
, new_preheader
, e
->src
);
555 if (was_imm_dom
|| duplicate_outer_loop
)
556 set_immediate_dominator (CDI_DOMINATORS
, exit_dest
, new_exit
->src
);
558 /* And remove the non-necessary forwarder again. Keep the other
559 one so we have a proper pre-header for the loop at the exit edge. */
560 redirect_edge_pred (single_succ_edge (preheader
),
561 single_pred (preheader
));
562 delete_basic_block (preheader
);
563 set_immediate_dominator (CDI_DOMINATORS
, scalar_loop
->header
,
564 loop_preheader_edge (scalar_loop
)->src
);
566 else /* Add the copy at entry. */
568 if (scalar_loop
!= loop
)
570 /* Remove the non-necessary forwarder of scalar_loop again. */
571 redirect_edge_pred (single_succ_edge (preheader
),
572 single_pred (preheader
));
573 delete_basic_block (preheader
);
574 set_immediate_dominator (CDI_DOMINATORS
, scalar_loop
->header
,
575 loop_preheader_edge (scalar_loop
)->src
);
576 preheader
= split_edge (loop_preheader_edge (loop
));
577 entry_e
= single_pred_edge (preheader
);
580 redirect_edge_and_branch_force (entry_e
, new_preheader
);
581 flush_pending_stmts (entry_e
);
582 set_immediate_dominator (CDI_DOMINATORS
, new_preheader
, entry_e
->src
);
584 redirect_edge_and_branch_force (new_exit
, preheader
);
585 flush_pending_stmts (new_exit
);
586 set_immediate_dominator (CDI_DOMINATORS
, preheader
, new_exit
->src
);
588 /* And remove the non-necessary forwarder again. Keep the other
589 one so we have a proper pre-header for the loop at the exit edge. */
590 redirect_edge_pred (single_succ_edge (new_preheader
),
591 single_pred (new_preheader
));
592 delete_basic_block (new_preheader
);
593 set_immediate_dominator (CDI_DOMINATORS
, new_loop
->header
,
594 loop_preheader_edge (new_loop
)->src
);
597 /* Skip new preheader since it's deleted if copy loop is added at entry. */
598 for (unsigned i
= (at_exit
? 0 : 1); i
< scalar_loop
->num_nodes
+ 1; i
++)
599 rename_variables_in_bb (new_bbs
[i
], duplicate_outer_loop
);
601 if (scalar_loop
!= loop
)
603 /* Update new_loop->header PHIs, so that on the preheader
604 edge they are the ones from loop rather than scalar_loop. */
605 gphi_iterator gsi_orig
, gsi_new
;
606 edge orig_e
= loop_preheader_edge (loop
);
607 edge new_e
= loop_preheader_edge (new_loop
);
609 for (gsi_orig
= gsi_start_phis (loop
->header
),
610 gsi_new
= gsi_start_phis (new_loop
->header
);
611 !gsi_end_p (gsi_orig
) && !gsi_end_p (gsi_new
);
612 gsi_next (&gsi_orig
), gsi_next (&gsi_new
))
614 gphi
*orig_phi
= gsi_orig
.phi ();
615 gphi
*new_phi
= gsi_new
.phi ();
616 tree orig_arg
= PHI_ARG_DEF_FROM_EDGE (orig_phi
, orig_e
);
617 location_t orig_locus
618 = gimple_phi_arg_location_from_edge (orig_phi
, orig_e
);
620 add_phi_arg (new_phi
, orig_arg
, new_e
, orig_locus
);
627 checking_verify_dominators (CDI_DOMINATORS
);
633 /* Given the condition expression COND, put it as the last statement of
634 GUARD_BB; set both edges' probability; set dominator of GUARD_TO to
635 DOM_BB; return the skip edge. GUARD_TO is the target basic block to
636 skip the loop. PROBABILITY is the skip edge's probability. Mark the
637 new edge as irreducible if IRREDUCIBLE_P is true. */
640 slpeel_add_loop_guard (basic_block guard_bb
, tree cond
,
641 basic_block guard_to
, basic_block dom_bb
,
642 profile_probability probability
, bool irreducible_p
)
644 gimple_stmt_iterator gsi
;
647 gimple_seq gimplify_stmt_list
= NULL
;
649 enter_e
= EDGE_SUCC (guard_bb
, 0);
650 enter_e
->flags
&= ~EDGE_FALLTHRU
;
651 enter_e
->flags
|= EDGE_FALSE_VALUE
;
652 gsi
= gsi_last_bb (guard_bb
);
654 cond
= force_gimple_operand_1 (cond
, &gimplify_stmt_list
, is_gimple_condexpr
,
656 if (gimplify_stmt_list
)
657 gsi_insert_seq_after (&gsi
, gimplify_stmt_list
, GSI_NEW_STMT
);
659 cond_stmt
= gimple_build_cond_from_tree (cond
, NULL_TREE
, NULL_TREE
);
660 gsi
= gsi_last_bb (guard_bb
);
661 gsi_insert_after (&gsi
, cond_stmt
, GSI_NEW_STMT
);
663 /* Add new edge to connect guard block to the merge/loop-exit block. */
664 new_e
= make_edge (guard_bb
, guard_to
, EDGE_TRUE_VALUE
);
666 new_e
->probability
= probability
;
668 new_e
->flags
|= EDGE_IRREDUCIBLE_LOOP
;
670 enter_e
->probability
= probability
.invert ();
671 set_immediate_dominator (CDI_DOMINATORS
, guard_to
, dom_bb
);
673 /* Split enter_e to preserve LOOPS_HAVE_PREHEADERS. */
674 if (enter_e
->dest
->loop_father
->header
== enter_e
->dest
)
675 split_edge (enter_e
);
681 /* This function verifies that the following restrictions apply to LOOP:
682 (1) it consists of exactly 2 basic blocks - header, and an empty latch
683 for innermost loop and 5 basic blocks for outer-loop.
684 (2) it is single entry, single exit
685 (3) its exit condition is the last stmt in the header
686 (4) E is the entry/exit edge of LOOP.
690 slpeel_can_duplicate_loop_p (const struct loop
*loop
, const_edge e
)
692 edge exit_e
= single_exit (loop
);
693 edge entry_e
= loop_preheader_edge (loop
);
694 gcond
*orig_cond
= get_loop_exit_condition (loop
);
695 gimple_stmt_iterator loop_exit_gsi
= gsi_last_bb (exit_e
->src
);
696 unsigned int num_bb
= loop
->inner
? 5 : 2;
698 /* All loops have an outer scope; the only case loop->outer is NULL is for
699 the function itself. */
700 if (!loop_outer (loop
)
701 || loop
->num_nodes
!= num_bb
702 || !empty_block_p (loop
->latch
)
703 || !single_exit (loop
)
704 /* Verify that new loop exit condition can be trivially modified. */
705 || (!orig_cond
|| orig_cond
!= gsi_stmt (loop_exit_gsi
))
706 || (e
!= exit_e
&& e
!= entry_e
))
712 /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
713 in the exit bb and rename all the uses after the loop. This simplifies
714 the *guard[12] routines, which assume loop closed SSA form for all PHIs
715 (but normally loop closed SSA form doesn't require virtual PHIs to be
716 in the same form). Doing this early simplifies the checking what
717 uses should be renamed. */
720 create_lcssa_for_virtual_phi (struct loop
*loop
)
723 edge exit_e
= single_exit (loop
);
725 for (gsi
= gsi_start_phis (loop
->header
); !gsi_end_p (gsi
); gsi_next (&gsi
))
726 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi
))))
728 gphi
*phi
= gsi
.phi ();
729 for (gsi
= gsi_start_phis (exit_e
->dest
);
730 !gsi_end_p (gsi
); gsi_next (&gsi
))
731 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi
))))
735 tree new_vop
= copy_ssa_name (PHI_RESULT (phi
));
736 gphi
*new_phi
= create_phi_node (new_vop
, exit_e
->dest
);
737 tree vop
= PHI_ARG_DEF_FROM_EDGE (phi
, EDGE_SUCC (loop
->latch
, 0));
738 imm_use_iterator imm_iter
;
742 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (new_vop
)
743 = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (vop
);
744 add_phi_arg (new_phi
, vop
, exit_e
, UNKNOWN_LOCATION
);
745 gimple_phi_set_result (new_phi
, new_vop
);
746 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, vop
)
748 && !flow_bb_inside_loop_p (loop
, gimple_bb (stmt
)))
749 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
750 SET_USE (use_p
, new_vop
);
757 /* Function vect_get_loop_location.
759 Extract the location of the loop in the source code.
760 If the loop is not well formed for vectorization, an estimated
761 location is calculated.
762 Return the loop location if succeed and NULL if not. */
765 find_loop_location (struct loop
*loop
)
769 gimple_stmt_iterator si
;
772 return UNKNOWN_LOCATION
;
774 stmt
= get_loop_exit_condition (loop
);
777 && LOCATION_LOCUS (gimple_location (stmt
)) > BUILTINS_LOCATION
)
778 return gimple_location (stmt
);
780 /* If we got here the loop is probably not "well formed",
781 try to estimate the loop location */
784 return UNKNOWN_LOCATION
;
788 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
790 stmt
= gsi_stmt (si
);
791 if (LOCATION_LOCUS (gimple_location (stmt
)) > BUILTINS_LOCATION
)
792 return gimple_location (stmt
);
795 return UNKNOWN_LOCATION
;
798 /* Return true if PHI defines an IV of the loop to be vectorized. */
803 if (virtual_operand_p (PHI_RESULT (phi
)))
806 stmt_vec_info stmt_info
= vinfo_for_stmt (phi
);
807 gcc_assert (stmt_info
!= NULL
);
808 if (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
809 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_double_reduction_def
)
815 /* Function vect_can_advance_ivs_p
817 In case the number of iterations that LOOP iterates is unknown at compile
818 time, an epilog loop will be generated, and the loop induction variables
819 (IVs) will be "advanced" to the value they are supposed to take just before
820 the epilog loop. Here we check that the access function of the loop IVs
821 and the expression that represents the loop bound are simple enough.
822 These restrictions will be relaxed in the future. */
825 vect_can_advance_ivs_p (loop_vec_info loop_vinfo
)
827 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
828 basic_block bb
= loop
->header
;
831 /* Analyze phi functions of the loop header. */
833 if (dump_enabled_p ())
834 dump_printf_loc (MSG_NOTE
, vect_location
, "vect_can_advance_ivs_p:\n");
835 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
839 gphi
*phi
= gsi
.phi ();
840 if (dump_enabled_p ())
842 dump_printf_loc (MSG_NOTE
, vect_location
, "Analyze phi: ");
843 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
846 /* Skip virtual phi's. The data dependences that are associated with
847 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.
849 Skip reduction phis. */
852 if (dump_enabled_p ())
853 dump_printf_loc (MSG_NOTE
, vect_location
,
854 "reduc or virtual phi. skip.\n");
858 /* Analyze the evolution function. */
861 = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi
));
862 if (evolution_part
== NULL_TREE
)
864 if (dump_enabled_p ())
865 dump_printf (MSG_MISSED_OPTIMIZATION
,
866 "No access function or evolution.\n");
870 /* FORNOW: We do not transform initial conditions of IVs
871 which evolution functions are not invariants in the loop. */
873 if (!expr_invariant_in_loop_p (loop
, evolution_part
))
875 if (dump_enabled_p ())
876 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
877 "evolution not invariant in loop.\n");
881 /* FORNOW: We do not transform initial conditions of IVs
882 which evolution functions are a polynomial of degree >= 2. */
884 if (tree_is_chrec (evolution_part
))
886 if (dump_enabled_p ())
887 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
888 "evolution is chrec.\n");
897 /* Function vect_update_ivs_after_vectorizer.
899 "Advance" the induction variables of LOOP to the value they should take
900 after the execution of LOOP. This is currently necessary because the
901 vectorizer does not handle induction variables that are used after the
902 loop. Such a situation occurs when the last iterations of LOOP are
904 1. We introduced new uses after LOOP for IVs that were not originally used
905 after LOOP: the IVs of LOOP are now used by an epilog loop.
906 2. LOOP is going to be vectorized; this means that it will iterate N/VF
907 times, whereas the loop IVs should be bumped N times.
910 - LOOP - a loop that is going to be vectorized. The last few iterations
912 - NITERS - the number of iterations that LOOP executes (before it is
913 vectorized). i.e, the number of times the ivs should be bumped.
914 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
915 coming out from LOOP on which there are uses of the LOOP ivs
916 (this is the path from LOOP->exit to epilog_loop->preheader).
918 The new definitions of the ivs are placed in LOOP->exit.
919 The phi args associated with the edge UPDATE_E in the bb
920 UPDATE_E->dest are updated accordingly.
922 Assumption 1: Like the rest of the vectorizer, this function assumes
923 a single loop exit that has a single predecessor.
925 Assumption 2: The phi nodes in the LOOP header and in update_bb are
926 organized in the same order.
928 Assumption 3: The access function of the ivs is simple enough (see
929 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
931 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
932 coming out of LOOP on which the ivs of LOOP are used (this is the path
933 that leads to the epilog loop; other paths skip the epilog loop). This
934 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
935 needs to have its phis updated.
939 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo
,
940 tree niters
, edge update_e
)
942 gphi_iterator gsi
, gsi1
;
943 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
944 basic_block update_bb
= update_e
->dest
;
945 basic_block exit_bb
= single_exit (loop
)->dest
;
947 /* Make sure there exists a single-predecessor exit bb: */
948 gcc_assert (single_pred_p (exit_bb
));
949 gcc_assert (single_succ_edge (exit_bb
) == update_e
);
951 for (gsi
= gsi_start_phis (loop
->header
), gsi1
= gsi_start_phis (update_bb
);
952 !gsi_end_p (gsi
) && !gsi_end_p (gsi1
);
953 gsi_next (&gsi
), gsi_next (&gsi1
))
958 tree var
, ni
, ni_name
;
959 gimple_stmt_iterator last_gsi
;
961 gphi
*phi
= gsi
.phi ();
962 gphi
*phi1
= gsi1
.phi ();
963 if (dump_enabled_p ())
965 dump_printf_loc (MSG_NOTE
, vect_location
,
966 "vect_update_ivs_after_vectorizer: phi: ");
967 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
970 /* Skip reduction and virtual phis. */
973 if (dump_enabled_p ())
974 dump_printf_loc (MSG_NOTE
, vect_location
,
975 "reduc or virtual phi. skip.\n");
979 type
= TREE_TYPE (gimple_phi_result (phi
));
980 step_expr
= STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi
));
981 step_expr
= unshare_expr (step_expr
);
983 /* FORNOW: We do not support IVs whose evolution function is a polynomial
984 of degree >= 2 or exponential. */
985 gcc_assert (!tree_is_chrec (step_expr
));
987 init_expr
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
989 off
= fold_build2 (MULT_EXPR
, TREE_TYPE (step_expr
),
990 fold_convert (TREE_TYPE (step_expr
), niters
),
992 if (POINTER_TYPE_P (type
))
993 ni
= fold_build_pointer_plus (init_expr
, off
);
995 ni
= fold_build2 (PLUS_EXPR
, type
,
996 init_expr
, fold_convert (type
, off
));
998 var
= create_tmp_var (type
, "tmp");
1000 last_gsi
= gsi_last_bb (exit_bb
);
1001 gimple_seq new_stmts
= NULL
;
1002 ni_name
= force_gimple_operand (ni
, &new_stmts
, false, var
);
1003 /* Exit_bb shouldn't be empty. */
1004 if (!gsi_end_p (last_gsi
))
1005 gsi_insert_seq_after (&last_gsi
, new_stmts
, GSI_SAME_STMT
);
1007 gsi_insert_seq_before (&last_gsi
, new_stmts
, GSI_SAME_STMT
);
1009 /* Fix phi expressions in the successor bb. */
1010 adjust_phi_and_debug_stmts (phi1
, update_e
, ni_name
);
1014 /* Function vect_gen_prolog_loop_niters
1016 Generate the number of iterations which should be peeled as prolog for the
1017 loop represented by LOOP_VINFO. It is calculated as the misalignment of
1018 DR - the data reference recorded in LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).
1019 As a result, after the execution of this loop, the data reference DR will
1020 refer to an aligned location. The following computation is generated:
1022 If the misalignment of DR is known at compile time:
1023 addr_mis = int mis = DR_MISALIGNMENT (dr);
1024 Else, compute address misalignment in bytes:
1025 addr_mis = addr & (vectype_align - 1)
1027 prolog_niters = ((VF - addr_mis/elem_size)&(VF-1))/step
1029 (elem_size = element type size; an element is the scalar element whose type
1030 is the inner type of the vectype)
1032 The computations will be emitted at the end of BB. We also compute and
1033 store upper bound (included) of the result in BOUND.
1035 When the step of the data-ref in the loop is not 1 (as in interleaved data
1036 and SLP), the number of iterations of the prolog must be divided by the step
1037 (which is equal to the size of interleaved group).
1039 The above formulas assume that VF == number of elements in the vector. This
1040 may not hold when there are multiple-types in the loop.
1041 In this case, for some data-references in the loop the VF does not represent
1042 the number of elements that fit in the vector. Therefore, instead of VF we
1043 use TYPE_VECTOR_SUBPARTS. */
1046 vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo
,
1047 basic_block bb
, int *bound
)
1049 struct data_reference
*dr
= LOOP_VINFO_UNALIGNED_DR (loop_vinfo
);
1051 tree niters_type
= TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo
));
1052 gimple_seq stmts
= NULL
, new_stmts
= NULL
;
1053 tree iters
, iters_name
;
1054 gimple
*dr_stmt
= DR_STMT (dr
);
1055 stmt_vec_info stmt_info
= vinfo_for_stmt (dr_stmt
);
1056 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1057 unsigned int target_align
= DR_TARGET_ALIGNMENT (dr
);
1059 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo
) > 0)
1061 int npeel
= LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo
);
1063 if (dump_enabled_p ())
1064 dump_printf_loc (MSG_NOTE
, vect_location
,
1065 "known peeling = %d.\n", npeel
);
1067 iters
= build_int_cst (niters_type
, npeel
);
1068 *bound
= LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo
);
1072 bool negative
= tree_int_cst_compare (DR_STEP (dr
), size_zero_node
) < 0;
1073 tree offset
= negative
1074 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1) : size_zero_node
;
1075 tree start_addr
= vect_create_addr_base_for_vector_ref (dr_stmt
,
1077 tree type
= unsigned_type_for (TREE_TYPE (start_addr
));
1078 tree target_align_minus_1
= build_int_cst (type
, target_align
- 1);
1079 HOST_WIDE_INT elem_size
1080 = int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
1081 tree elem_size_log
= build_int_cst (type
, exact_log2 (elem_size
));
1082 HOST_WIDE_INT align_in_elems
= target_align
/ elem_size
;
1083 tree align_in_elems_minus_1
= build_int_cst (type
, align_in_elems
- 1);
1084 tree align_in_elems_tree
= build_int_cst (type
, align_in_elems
);
1085 tree misalign_in_bytes
;
1086 tree misalign_in_elems
;
1088 /* Create: misalign_in_bytes = addr & (target_align - 1). */
1090 = fold_build2 (BIT_AND_EXPR
, type
, fold_convert (type
, start_addr
),
1091 target_align_minus_1
);
1093 /* Create: misalign_in_elems = misalign_in_bytes / element_size. */
1095 = fold_build2 (RSHIFT_EXPR
, type
, misalign_in_bytes
, elem_size_log
);
1097 /* Create: (niters_type) ((align_in_elems - misalign_in_elems)
1098 & (align_in_elems - 1)). */
1100 iters
= fold_build2 (MINUS_EXPR
, type
, misalign_in_elems
,
1101 align_in_elems_tree
);
1103 iters
= fold_build2 (MINUS_EXPR
, type
, align_in_elems_tree
,
1105 iters
= fold_build2 (BIT_AND_EXPR
, type
, iters
, align_in_elems_minus_1
);
1106 iters
= fold_convert (niters_type
, iters
);
1107 *bound
= align_in_elems
- 1;
1110 if (dump_enabled_p ())
1112 dump_printf_loc (MSG_NOTE
, vect_location
,
1113 "niters for prolog loop: ");
1114 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, iters
);
1115 dump_printf (MSG_NOTE
, "\n");
1118 var
= create_tmp_var (niters_type
, "prolog_loop_niters");
1119 iters_name
= force_gimple_operand (iters
, &new_stmts
, false, var
);
1122 gimple_seq_add_seq (&stmts
, new_stmts
);
1125 gcc_assert (single_succ_p (bb
));
1126 gimple_stmt_iterator gsi
= gsi_last_bb (bb
);
1127 if (gsi_end_p (gsi
))
1128 gsi_insert_seq_before (&gsi
, stmts
, GSI_SAME_STMT
);
1130 gsi_insert_seq_after (&gsi
, stmts
, GSI_SAME_STMT
);
1136 /* Function vect_update_init_of_dr
1138 NITERS iterations were peeled from LOOP. DR represents a data reference
1139 in LOOP. This function updates the information recorded in DR to
1140 account for the fact that the first NITERS iterations had already been
1141 executed. Specifically, it updates the OFFSET field of DR. */
1144 vect_update_init_of_dr (struct data_reference
*dr
, tree niters
)
1146 tree offset
= DR_OFFSET (dr
);
1148 niters
= fold_build2 (MULT_EXPR
, sizetype
,
1149 fold_convert (sizetype
, niters
),
1150 fold_convert (sizetype
, DR_STEP (dr
)));
1151 offset
= fold_build2 (PLUS_EXPR
, sizetype
,
1152 fold_convert (sizetype
, offset
), niters
);
1153 DR_OFFSET (dr
) = offset
;
1157 /* Function vect_update_inits_of_drs
1159 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1160 This function updates the information recorded for the data references in
1161 the loop to account for the fact that the first NITERS iterations had
1162 already been executed. Specifically, it updates the initial_condition of
1163 the access_function of all the data_references in the loop. */
1166 vect_update_inits_of_drs (loop_vec_info loop_vinfo
, tree niters
)
1169 vec
<data_reference_p
> datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1170 struct data_reference
*dr
;
1172 if (dump_enabled_p ())
1173 dump_printf_loc (MSG_NOTE
, vect_location
,
1174 "=== vect_update_inits_of_dr ===\n");
1176 /* Adjust niters to sizetype and insert stmts on loop preheader edge. */
1177 if (!types_compatible_p (sizetype
, TREE_TYPE (niters
)))
1180 edge pe
= loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo
));
1181 tree var
= create_tmp_var (sizetype
, "prolog_loop_adjusted_niters");
1183 niters
= fold_convert (sizetype
, niters
);
1184 niters
= force_gimple_operand (niters
, &seq
, false, var
);
1187 basic_block new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
1188 gcc_assert (!new_bb
);
1192 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
1193 vect_update_init_of_dr (dr
, niters
);
1197 /* This function builds ni_name = number of iterations. Statements
1198 are emitted on the loop preheader edge. If NEW_VAR_P is not NULL, set
1199 it to TRUE if new ssa_var is generated. */
1202 vect_build_loop_niters (loop_vec_info loop_vinfo
, bool *new_var_p
)
1204 tree ni
= unshare_expr (LOOP_VINFO_NITERS (loop_vinfo
));
1205 if (TREE_CODE (ni
) == INTEGER_CST
)
1210 gimple_seq stmts
= NULL
;
1211 edge pe
= loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo
));
1213 var
= create_tmp_var (TREE_TYPE (ni
), "niters");
1214 ni_name
= force_gimple_operand (ni
, &stmts
, false, var
);
1217 gsi_insert_seq_on_edge_immediate (pe
, stmts
);
1218 if (new_var_p
!= NULL
)
1226 /* Calculate the number of iterations above which vectorized loop will be
1227 preferred than scalar loop. NITERS_PROLOG is the number of iterations
1228 of prolog loop. If it's integer const, the integer number is also passed
1229 in INT_NITERS_PROLOG. BOUND_PROLOG is the upper bound (included) of
1230 number of iterations of prolog loop. VFM1 is vector factor minus one.
1231 If CHECK_PROFITABILITY is true, TH is the threshold below which scalar
1232 (rather than vectorized) loop will be executed. This function stores
1233 upper bound (included) of the result in BOUND_SCALAR. */
1236 vect_gen_scalar_loop_niters (tree niters_prolog
, int int_niters_prolog
,
1237 int bound_prolog
, poly_int64 vfm1
, int th
,
1238 poly_uint64
*bound_scalar
,
1239 bool check_profitability
)
1241 tree type
= TREE_TYPE (niters_prolog
);
1242 tree niters
= fold_build2 (PLUS_EXPR
, type
, niters_prolog
,
1243 build_int_cst (type
, vfm1
));
1245 *bound_scalar
= vfm1
+ bound_prolog
;
1246 if (check_profitability
)
1248 /* TH indicates the minimum niters of vectorized loop, while we
1249 compute the maximum niters of scalar loop. */
1251 /* Peeling for constant times. */
1252 if (int_niters_prolog
>= 0)
1254 *bound_scalar
= upper_bound (int_niters_prolog
+ vfm1
, th
);
1255 return build_int_cst (type
, *bound_scalar
);
1257 /* Peeling for unknown times. Note BOUND_PROLOG is the upper
1258 bound (inlcuded) of niters of prolog loop. */
1259 if (known_ge (th
, vfm1
+ bound_prolog
))
1262 return build_int_cst (type
, th
);
1264 /* Need to do runtime comparison. */
1265 else if (maybe_gt (th
, vfm1
))
1267 *bound_scalar
= upper_bound (*bound_scalar
, th
);
1268 return fold_build2 (MAX_EXPR
, type
,
1269 build_int_cst (type
, th
), niters
);
1275 /* NITERS is the number of times that the original scalar loop executes
1276 after peeling. Work out the maximum number of iterations N that can
1277 be handled by the vectorized form of the loop and then either:
1279 a) set *STEP_VECTOR_PTR to the vectorization factor and generate:
1283 b) set *STEP_VECTOR_PTR to one and generate:
1285 niters_vector = N / vf
1287 In both cases, store niters_vector in *NITERS_VECTOR_PTR and add
1288 any new statements on the loop preheader edge. NITERS_NO_OVERFLOW
1289 is true if NITERS doesn't overflow (i.e. if NITERS is always nonzero). */
1292 vect_gen_vector_loop_niters (loop_vec_info loop_vinfo
, tree niters
,
1293 tree
*niters_vector_ptr
, tree
*step_vector_ptr
,
1294 bool niters_no_overflow
)
1296 tree ni_minus_gap
, var
;
1297 tree niters_vector
, step_vector
, type
= TREE_TYPE (niters
);
1298 poly_uint64 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1299 edge pe
= loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo
));
1300 tree log_vf
= NULL_TREE
;
1302 /* If epilogue loop is required because of data accesses with gaps, we
1303 subtract one iteration from the total number of iterations here for
1304 correct calculation of RATIO. */
1305 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
))
1307 ni_minus_gap
= fold_build2 (MINUS_EXPR
, type
, niters
,
1308 build_one_cst (type
));
1309 if (!is_gimple_val (ni_minus_gap
))
1311 var
= create_tmp_var (type
, "ni_gap");
1312 gimple
*stmts
= NULL
;
1313 ni_minus_gap
= force_gimple_operand (ni_minus_gap
, &stmts
,
1315 gsi_insert_seq_on_edge_immediate (pe
, stmts
);
1319 ni_minus_gap
= niters
;
1321 unsigned HOST_WIDE_INT const_vf
;
1322 if (vf
.is_constant (&const_vf
))
1324 /* Create: niters >> log2(vf) */
1325 /* If it's known that niters == number of latch executions + 1 doesn't
1326 overflow, we can generate niters >> log2(vf); otherwise we generate
1327 (niters - vf) >> log2(vf) + 1 by using the fact that we know ratio
1328 will be at least one. */
1329 log_vf
= build_int_cst (type
, exact_log2 (const_vf
));
1330 if (niters_no_overflow
)
1331 niters_vector
= fold_build2 (RSHIFT_EXPR
, type
, ni_minus_gap
, log_vf
);
1334 = fold_build2 (PLUS_EXPR
, type
,
1335 fold_build2 (RSHIFT_EXPR
, type
,
1336 fold_build2 (MINUS_EXPR
, type
,
1338 build_int_cst (type
, vf
)),
1340 build_int_cst (type
, 1));
1341 step_vector
= build_one_cst (type
);
1345 niters_vector
= ni_minus_gap
;
1346 step_vector
= build_int_cst (type
, vf
);
1349 if (!is_gimple_val (niters_vector
))
1351 var
= create_tmp_var (type
, "bnd");
1352 gimple_seq stmts
= NULL
;
1353 niters_vector
= force_gimple_operand (niters_vector
, &stmts
, true, var
);
1354 gsi_insert_seq_on_edge_immediate (pe
, stmts
);
1355 /* Peeling algorithm guarantees that vector loop bound is at least ONE,
1356 we set range information to make niters analyzer's life easier. */
1357 if (stmts
!= NULL
&& log_vf
)
1358 set_range_info (niters_vector
, VR_RANGE
,
1359 wi::to_wide (build_int_cst (type
, 1)),
1360 wi::to_wide (fold_build2 (RSHIFT_EXPR
, type
,
1361 TYPE_MAX_VALUE (type
),
1364 *niters_vector_ptr
= niters_vector
;
1365 *step_vector_ptr
= step_vector
;
1370 /* Given NITERS_VECTOR which is the number of iterations for vectorized
1371 loop specified by LOOP_VINFO after vectorization, compute the number
1372 of iterations before vectorization (niters_vector * vf) and store it
1373 to NITERS_VECTOR_MULT_VF_PTR. */
1376 vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo
,
1378 tree
*niters_vector_mult_vf_ptr
)
1380 /* We should be using a step_vector of VF if VF is variable. */
1381 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
).to_constant ();
1382 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1383 tree type
= TREE_TYPE (niters_vector
);
1384 tree log_vf
= build_int_cst (type
, exact_log2 (vf
));
1385 basic_block exit_bb
= single_exit (loop
)->dest
;
1387 gcc_assert (niters_vector_mult_vf_ptr
!= NULL
);
1388 tree niters_vector_mult_vf
= fold_build2 (LSHIFT_EXPR
, type
,
1389 niters_vector
, log_vf
);
1390 if (!is_gimple_val (niters_vector_mult_vf
))
1392 tree var
= create_tmp_var (type
, "niters_vector_mult_vf");
1393 gimple_seq stmts
= NULL
;
1394 niters_vector_mult_vf
= force_gimple_operand (niters_vector_mult_vf
,
1396 gimple_stmt_iterator gsi
= gsi_start_bb (exit_bb
);
1397 gsi_insert_seq_before (&gsi
, stmts
, GSI_SAME_STMT
);
1399 *niters_vector_mult_vf_ptr
= niters_vector_mult_vf
;
1402 /* Function slpeel_tree_duplicate_loop_to_edge_cfg duplciates FIRST/SECOND
1403 from SECOND/FIRST and puts it at the original loop's preheader/exit
1404 edge, the two loops are arranged as below:
1409 i_1 = PHI<i_0, i_2>;
1420 ;; i_x = PHI<i_2>; ;; LCSSA phi node to be created for FIRST,
1424 i_3 = PHI<i_0, i_4>; ;; Use of i_0 to be replaced with i_x,
1425 or with i_2 if no LCSSA phi is created
1426 under condition of CREATE_LCSSA_FOR_IV_PHIS.
1438 This function creates loop closed SSA for the first loop; update the
1439 second loop's PHI nodes by replacing argument on incoming edge with the
1440 result of newly created lcssa PHI nodes. IF CREATE_LCSSA_FOR_IV_PHIS
1441 is false, Loop closed ssa phis will only be created for non-iv phis for
1444 This function assumes exit bb of the first loop is preheader bb of the
1445 second loop, i.e, between_bb in the example code. With PHIs updated,
1446 the second loop will execute rest iterations of the first. */
1449 slpeel_update_phi_nodes_for_loops (loop_vec_info loop_vinfo
,
1450 struct loop
*first
, struct loop
*second
,
1451 bool create_lcssa_for_iv_phis
)
1453 gphi_iterator gsi_update
, gsi_orig
;
1454 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1456 edge first_latch_e
= EDGE_SUCC (first
->latch
, 0);
1457 edge second_preheader_e
= loop_preheader_edge (second
);
1458 basic_block between_bb
= single_exit (first
)->dest
;
1460 gcc_assert (between_bb
== second_preheader_e
->src
);
1461 gcc_assert (single_pred_p (between_bb
) && single_succ_p (between_bb
));
1462 /* Either the first loop or the second is the loop to be vectorized. */
1463 gcc_assert (loop
== first
|| loop
== second
);
1465 for (gsi_orig
= gsi_start_phis (first
->header
),
1466 gsi_update
= gsi_start_phis (second
->header
);
1467 !gsi_end_p (gsi_orig
) && !gsi_end_p (gsi_update
);
1468 gsi_next (&gsi_orig
), gsi_next (&gsi_update
))
1470 gphi
*orig_phi
= gsi_orig
.phi ();
1471 gphi
*update_phi
= gsi_update
.phi ();
1473 tree arg
= PHI_ARG_DEF_FROM_EDGE (orig_phi
, first_latch_e
);
1474 /* Generate lcssa PHI node for the first loop. */
1475 gphi
*vect_phi
= (loop
== first
) ? orig_phi
: update_phi
;
1476 if (create_lcssa_for_iv_phis
|| !iv_phi_p (vect_phi
))
1478 tree new_res
= copy_ssa_name (PHI_RESULT (orig_phi
));
1479 gphi
*lcssa_phi
= create_phi_node (new_res
, between_bb
);
1480 add_phi_arg (lcssa_phi
, arg
, single_exit (first
), UNKNOWN_LOCATION
);
1484 /* Update PHI node in the second loop by replacing arg on the loop's
1486 adjust_phi_and_debug_stmts (update_phi
, second_preheader_e
, arg
);
1490 /* Function slpeel_add_loop_guard adds guard skipping from the beginning
1491 of SKIP_LOOP to the beginning of UPDATE_LOOP. GUARD_EDGE and MERGE_EDGE
1492 are two pred edges of the merge point before UPDATE_LOOP. The two loops
1503 i_1 = PHI<i_0, i_2>;
1517 ;; PHI (i_x = PHI<i_0, i_5>) to be created at merge point.
1521 i_3 = PHI<i_5, i_4>; ;; Use of i_5 to be replaced with i_x.
1533 This function creates PHI nodes at merge_bb and replaces the use of i_5
1534 in the update_loop's PHI node with the result of new PHI result. */
1537 slpeel_update_phi_nodes_for_guard1 (struct loop
*skip_loop
,
1538 struct loop
*update_loop
,
1539 edge guard_edge
, edge merge_edge
)
1541 source_location merge_loc
, guard_loc
;
1542 edge orig_e
= loop_preheader_edge (skip_loop
);
1543 edge update_e
= loop_preheader_edge (update_loop
);
1544 gphi_iterator gsi_orig
, gsi_update
;
1546 for ((gsi_orig
= gsi_start_phis (skip_loop
->header
),
1547 gsi_update
= gsi_start_phis (update_loop
->header
));
1548 !gsi_end_p (gsi_orig
) && !gsi_end_p (gsi_update
);
1549 gsi_next (&gsi_orig
), gsi_next (&gsi_update
))
1551 gphi
*orig_phi
= gsi_orig
.phi ();
1552 gphi
*update_phi
= gsi_update
.phi ();
1554 /* Generate new phi node at merge bb of the guard. */
1555 tree new_res
= copy_ssa_name (PHI_RESULT (orig_phi
));
1556 gphi
*new_phi
= create_phi_node (new_res
, guard_edge
->dest
);
1558 /* Merge bb has two incoming edges: GUARD_EDGE and MERGE_EDGE. Set the
1559 args in NEW_PHI for these edges. */
1560 tree merge_arg
= PHI_ARG_DEF_FROM_EDGE (update_phi
, update_e
);
1561 tree guard_arg
= PHI_ARG_DEF_FROM_EDGE (orig_phi
, orig_e
);
1562 merge_loc
= gimple_phi_arg_location_from_edge (update_phi
, update_e
);
1563 guard_loc
= gimple_phi_arg_location_from_edge (orig_phi
, orig_e
);
1564 add_phi_arg (new_phi
, merge_arg
, merge_edge
, merge_loc
);
1565 add_phi_arg (new_phi
, guard_arg
, guard_edge
, guard_loc
);
1567 /* Update phi in UPDATE_PHI. */
1568 adjust_phi_and_debug_stmts (update_phi
, update_e
, new_res
);
1572 /* LCSSA_PHI is a lcssa phi of EPILOG loop which is copied from LOOP,
1573 this function searches for the corresponding lcssa phi node in exit
1574 bb of LOOP. If it is found, return the phi result; otherwise return
1578 find_guard_arg (struct loop
*loop
, struct loop
*epilog ATTRIBUTE_UNUSED
,
1582 edge e
= single_exit (loop
);
1584 gcc_assert (single_pred_p (e
->dest
));
1585 for (gsi
= gsi_start_phis (e
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1587 gphi
*phi
= gsi
.phi ();
1588 if (operand_equal_p (PHI_ARG_DEF (phi
, 0),
1589 PHI_ARG_DEF (lcssa_phi
, 0), 0))
1590 return PHI_RESULT (phi
);
1595 /* LOOP and EPILOG are two consecutive loops in CFG and EPILOG is copied
1596 from LOOP. Function slpeel_add_loop_guard adds guard skipping from a
1597 point between the two loops to the end of EPILOG. Edges GUARD_EDGE
1598 and MERGE_EDGE are the two pred edges of merge_bb at the end of EPILOG.
1603 i_1 = PHI<i_0, i_2>;
1625 i_3 = PHI<i_2, i_4>;
1636 ; PHI node (i_y = PHI<i_2, i_4>) to be created at merge point.
1639 i_x = PHI<i_4>; ;Use of i_4 to be replaced with i_y in merge_bb.
1641 For each name used out side EPILOG (i.e - for each name that has a lcssa
1642 phi in exit_bb) we create a new PHI in merge_bb. The new PHI has two
1643 args corresponding to GUARD_EDGE and MERGE_EDGE. Arg for MERGE_EDGE is
1644 the arg of the original PHI in exit_bb, arg for GUARD_EDGE is defined
1645 by LOOP and is found in the exit bb of LOOP. Arg of the original PHI
1646 in exit_bb will also be updated. */
1649 slpeel_update_phi_nodes_for_guard2 (struct loop
*loop
, struct loop
*epilog
,
1650 edge guard_edge
, edge merge_edge
)
1653 basic_block merge_bb
= guard_edge
->dest
;
1655 gcc_assert (single_succ_p (merge_bb
));
1656 edge e
= single_succ_edge (merge_bb
);
1657 basic_block exit_bb
= e
->dest
;
1658 gcc_assert (single_pred_p (exit_bb
));
1659 gcc_assert (single_pred (exit_bb
) == single_exit (epilog
)->dest
);
1661 for (gsi
= gsi_start_phis (exit_bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1663 gphi
*update_phi
= gsi
.phi ();
1664 tree old_arg
= PHI_ARG_DEF (update_phi
, 0);
1665 /* This loop-closed-phi actually doesn't represent a use out of the
1666 loop - the phi arg is a constant. */
1667 if (TREE_CODE (old_arg
) != SSA_NAME
)
1670 tree merge_arg
= get_current_def (old_arg
);
1672 merge_arg
= old_arg
;
1674 tree guard_arg
= find_guard_arg (loop
, epilog
, update_phi
);
1675 /* If the var is live after loop but not a reduction, we simply
1678 guard_arg
= old_arg
;
1680 /* Create new phi node in MERGE_BB: */
1681 tree new_res
= copy_ssa_name (PHI_RESULT (update_phi
));
1682 gphi
*merge_phi
= create_phi_node (new_res
, merge_bb
);
1684 /* MERGE_BB has two incoming edges: GUARD_EDGE and MERGE_EDGE, Set
1685 the two PHI args in merge_phi for these edges. */
1686 add_phi_arg (merge_phi
, merge_arg
, merge_edge
, UNKNOWN_LOCATION
);
1687 add_phi_arg (merge_phi
, guard_arg
, guard_edge
, UNKNOWN_LOCATION
);
1689 /* Update the original phi in exit_bb. */
1690 adjust_phi_and_debug_stmts (update_phi
, e
, new_res
);
1694 /* EPILOG loop is duplicated from the original loop for vectorizing,
1695 the arg of its loop closed ssa PHI needs to be updated. */
1698 slpeel_update_phi_nodes_for_lcssa (struct loop
*epilog
)
1701 basic_block exit_bb
= single_exit (epilog
)->dest
;
1703 gcc_assert (single_pred_p (exit_bb
));
1704 edge e
= EDGE_PRED (exit_bb
, 0);
1705 for (gsi
= gsi_start_phis (exit_bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1706 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi
.phi (), e
));
1709 /* Function vect_do_peeling.
1712 - LOOP_VINFO: Represent a loop to be vectorized, which looks like:
1718 if (exit_loop_cond) goto exit_bb
1722 - NITERS: The number of iterations of the loop.
1723 - NITERSM1: The number of iterations of the loop's latch.
1724 - NITERS_NO_OVERFLOW: No overflow in computing NITERS.
1725 - TH, CHECK_PROFITABILITY: Threshold of niters to vectorize loop if
1726 CHECK_PROFITABILITY is true.
1728 - *NITERS_VECTOR and *STEP_VECTOR describe how the main loop should
1729 iterate after vectorization; see slpeel_make_loop_iterate_ntimes
1731 - *NITERS_VECTOR_MULT_VF_VAR is either null or an SSA name that
1732 should be set to the number of scalar iterations handled by the
1733 vector loop. The SSA name is only used on exit from the loop.
1735 This function peels prolog and epilog from the loop, adds guards skipping
1736 PROLOG and EPILOG for various conditions. As a result, the changed CFG
1740 if (prefer_scalar_loop) goto merge_bb_1
1741 else goto guard_bb_2
1744 if (skip_prolog) goto merge_bb_2
1745 else goto prolog_preheader
1751 if (exit_prolog_cond) goto prolog_exit_bb
1752 else goto prolog_header_bb
1761 if (exit_vector_cond) goto vector_exit_bb
1762 else goto vector_header_bb
1766 if (skip_epilog) goto merge_bb_3
1767 else goto epilog_preheader
1775 if (exit_epilog_cond) goto merge_bb_3
1776 else goto epilog_header_bb
1780 Note this function peels prolog and epilog only if it's necessary,
1782 Returns created epilogue or NULL.
1784 TODO: Guard for prefer_scalar_loop should be emitted along with
1785 versioning conditions if loop versioning is needed. */
1789 vect_do_peeling (loop_vec_info loop_vinfo
, tree niters
, tree nitersm1
,
1790 tree
*niters_vector
, tree
*step_vector
,
1791 tree
*niters_vector_mult_vf_var
, int th
,
1792 bool check_profitability
, bool niters_no_overflow
)
1795 tree type
= TREE_TYPE (niters
), guard_cond
;
1796 basic_block guard_bb
, guard_to
;
1797 profile_probability prob_prolog
, prob_vector
, prob_epilog
;
1798 int bound_prolog
= 0;
1799 poly_uint64 bound_scalar
= 0;
1801 int prolog_peeling
= LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo
);
1802 bool epilog_peeling
= (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo
)
1803 || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
));
1805 if (!prolog_peeling
&& !epilog_peeling
)
1808 prob_vector
= profile_probability::guessed_always ().apply_scale (9, 10);
1809 estimated_vf
= vect_vf_for_cost (loop_vinfo
);
1810 if (estimated_vf
== 2)
1812 prob_prolog
= prob_epilog
= profile_probability::guessed_always ()
1813 .apply_scale (estimated_vf
- 1, estimated_vf
);
1814 poly_uint64 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1816 struct loop
*prolog
, *epilog
= NULL
, *loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1817 struct loop
*first_loop
= loop
;
1818 bool irred_flag
= loop_preheader_edge (loop
)->flags
& EDGE_IRREDUCIBLE_LOOP
;
1819 create_lcssa_for_virtual_phi (loop
);
1820 update_ssa (TODO_update_ssa_only_virtuals
);
1822 if (MAY_HAVE_DEBUG_BIND_STMTS
)
1824 gcc_assert (!adjust_vec
.exists ());
1825 adjust_vec
.create (32);
1827 initialize_original_copy_tables ();
1829 /* Prolog loop may be skipped. */
1830 bool skip_prolog
= (prolog_peeling
!= 0);
1831 /* Skip to epilog if scalar loop may be preferred. It's only needed
1832 when we peel for epilog loop and when it hasn't been checked with
1834 bool skip_vector
= ((!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
1835 && !LOOP_REQUIRES_VERSIONING (loop_vinfo
))
1836 || !vf
.is_constant ());
1837 /* Epilog loop must be executed if the number of iterations for epilog
1838 loop is known at compile time, otherwise we need to add a check at
1839 the end of vector loop and skip to the end of epilog loop. */
1840 bool skip_epilog
= (prolog_peeling
< 0
1841 || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
1842 || !vf
.is_constant ());
1843 /* PEELING_FOR_GAPS is special because epilog loop must be executed. */
1844 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
))
1845 skip_epilog
= false;
1847 /* Record the anchor bb at which guard should be placed if scalar loop
1848 may be preferred. */
1849 basic_block anchor
= loop_preheader_edge (loop
)->src
;
1852 split_edge (loop_preheader_edge (loop
));
1854 /* Due to the order in which we peel prolog and epilog, we first
1855 propagate probability to the whole loop. The purpose is to
1856 avoid adjusting probabilities of both prolog and vector loops
1857 separately. Note in this case, the probability of epilog loop
1858 needs to be scaled back later. */
1859 basic_block bb_before_loop
= loop_preheader_edge (loop
)->src
;
1860 if (prob_vector
.initialized_p ())
1862 scale_bbs_frequencies (&bb_before_loop
, 1, prob_vector
);
1863 scale_loop_profile (loop
, prob_vector
, 0);
1867 tree niters_prolog
= build_int_cst (type
, 0);
1868 source_location loop_loc
= find_loop_location (loop
);
1869 struct loop
*scalar_loop
= LOOP_VINFO_SCALAR_LOOP (loop_vinfo
);
1872 e
= loop_preheader_edge (loop
);
1873 if (!slpeel_can_duplicate_loop_p (loop
, e
))
1875 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, loop_loc
,
1876 "loop can't be duplicated to preheader edge.\n");
1879 /* Peel prolog and put it on preheader edge of loop. */
1880 prolog
= slpeel_tree_duplicate_loop_to_edge_cfg (loop
, scalar_loop
, e
);
1883 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, loop_loc
,
1884 "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1887 slpeel_update_phi_nodes_for_loops (loop_vinfo
, prolog
, loop
, true);
1888 first_loop
= prolog
;
1889 reset_original_copy_tables ();
1891 /* Generate and update the number of iterations for prolog loop. */
1892 niters_prolog
= vect_gen_prolog_loop_niters (loop_vinfo
, anchor
,
1894 tree step_prolog
= build_one_cst (TREE_TYPE (niters_prolog
));
1895 slpeel_make_loop_iterate_ntimes (prolog
, niters_prolog
, step_prolog
,
1898 /* Skip the prolog loop. */
1901 guard_cond
= fold_build2 (EQ_EXPR
, boolean_type_node
,
1902 niters_prolog
, build_int_cst (type
, 0));
1903 guard_bb
= loop_preheader_edge (prolog
)->src
;
1904 basic_block bb_after_prolog
= loop_preheader_edge (loop
)->src
;
1905 guard_to
= split_edge (loop_preheader_edge (loop
));
1906 guard_e
= slpeel_add_loop_guard (guard_bb
, guard_cond
,
1908 prob_prolog
.invert (),
1910 e
= EDGE_PRED (guard_to
, 0);
1911 e
= (e
!= guard_e
? e
: EDGE_PRED (guard_to
, 1));
1912 slpeel_update_phi_nodes_for_guard1 (prolog
, loop
, guard_e
, e
);
1914 scale_bbs_frequencies (&bb_after_prolog
, 1, prob_prolog
);
1915 scale_loop_profile (prolog
, prob_prolog
, bound_prolog
);
1917 /* Update init address of DRs. */
1918 vect_update_inits_of_drs (loop_vinfo
, niters_prolog
);
1919 /* Update niters for vector loop. */
1920 LOOP_VINFO_NITERS (loop_vinfo
)
1921 = fold_build2 (MINUS_EXPR
, type
, niters
, niters_prolog
);
1922 LOOP_VINFO_NITERSM1 (loop_vinfo
)
1923 = fold_build2 (MINUS_EXPR
, type
,
1924 LOOP_VINFO_NITERSM1 (loop_vinfo
), niters_prolog
);
1925 bool new_var_p
= false;
1926 niters
= vect_build_loop_niters (loop_vinfo
, &new_var_p
);
1927 /* It's guaranteed that vector loop bound before vectorization is at
1928 least VF, so set range information for newly generated var. */
1930 set_range_info (niters
, VR_RANGE
,
1931 wi::to_wide (build_int_cst (type
, vf
)),
1932 wi::to_wide (TYPE_MAX_VALUE (type
)));
1934 /* Prolog iterates at most bound_prolog times, latch iterates at
1935 most bound_prolog - 1 times. */
1936 record_niter_bound (prolog
, bound_prolog
- 1, false, true);
1937 delete_update_ssa ();
1938 adjust_vec_debug_stmts ();
1944 e
= single_exit (loop
);
1945 if (!slpeel_can_duplicate_loop_p (loop
, e
))
1947 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, loop_loc
,
1948 "loop can't be duplicated to exit edge.\n");
1951 /* Peel epilog and put it on exit edge of loop. */
1952 epilog
= slpeel_tree_duplicate_loop_to_edge_cfg (loop
, scalar_loop
, e
);
1955 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, loop_loc
,
1956 "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1959 slpeel_update_phi_nodes_for_loops (loop_vinfo
, loop
, epilog
, false);
1961 /* Scalar version loop may be preferred. In this case, add guard
1962 and skip to epilog. Note this only happens when the number of
1963 iterations of loop is unknown at compile time, otherwise this
1964 won't be vectorized. */
1967 /* Additional epilogue iteration is peeled if gap exists. */
1968 bool peel_for_gaps
= LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
);
1969 tree t
= vect_gen_scalar_loop_niters (niters_prolog
, prolog_peeling
,
1971 peel_for_gaps
? vf
: vf
- 1,
1973 check_profitability
);
1974 /* Build guard against NITERSM1 since NITERS may overflow. */
1975 guard_cond
= fold_build2 (LT_EXPR
, boolean_type_node
, nitersm1
, t
);
1977 guard_to
= split_edge (loop_preheader_edge (epilog
));
1978 guard_e
= slpeel_add_loop_guard (guard_bb
, guard_cond
,
1980 prob_vector
.invert (),
1982 e
= EDGE_PRED (guard_to
, 0);
1983 e
= (e
!= guard_e
? e
: EDGE_PRED (guard_to
, 1));
1984 slpeel_update_phi_nodes_for_guard1 (first_loop
, epilog
, guard_e
, e
);
1986 /* Simply propagate profile info from guard_bb to guard_to which is
1987 a merge point of control flow. */
1988 guard_to
->count
= guard_bb
->count
;
1990 /* Scale probability of epilog loop back.
1991 FIXME: We should avoid scaling down and back up. Profile may
1992 get lost if we scale down to 0. */
1993 basic_block
*bbs
= get_loop_body (epilog
);
1994 for (unsigned int i
= 0; i
< epilog
->num_nodes
; i
++)
1995 bbs
[i
]->count
= bbs
[i
]->count
.apply_scale
1997 bbs
[i
]->count
.apply_probability
2002 basic_block bb_before_epilog
= loop_preheader_edge (epilog
)->src
;
2003 tree niters_vector_mult_vf
;
2004 /* If loop is peeled for non-zero constant times, now niters refers to
2005 orig_niters - prolog_peeling, it won't overflow even the orig_niters
2007 niters_no_overflow
|= (prolog_peeling
> 0);
2008 vect_gen_vector_loop_niters (loop_vinfo
, niters
,
2009 niters_vector
, step_vector
,
2010 niters_no_overflow
);
2011 if (!integer_onep (*step_vector
))
2013 /* On exit from the loop we will have an easy way of calcalating
2014 NITERS_VECTOR / STEP * STEP. Install a dummy definition
2016 niters_vector_mult_vf
= make_ssa_name (TREE_TYPE (*niters_vector
));
2017 SSA_NAME_DEF_STMT (niters_vector_mult_vf
) = gimple_build_nop ();
2018 *niters_vector_mult_vf_var
= niters_vector_mult_vf
;
2021 vect_gen_vector_loop_niters_mult_vf (loop_vinfo
, *niters_vector
,
2022 &niters_vector_mult_vf
);
2023 /* Update IVs of original loop as if they were advanced by
2024 niters_vector_mult_vf steps. */
2025 gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo
));
2026 edge update_e
= skip_vector
? e
: loop_preheader_edge (epilog
);
2027 vect_update_ivs_after_vectorizer (loop_vinfo
, niters_vector_mult_vf
,
2032 guard_cond
= fold_build2 (EQ_EXPR
, boolean_type_node
,
2033 niters
, niters_vector_mult_vf
);
2034 guard_bb
= single_exit (loop
)->dest
;
2035 guard_to
= split_edge (single_exit (epilog
));
2036 guard_e
= slpeel_add_loop_guard (guard_bb
, guard_cond
, guard_to
,
2037 skip_vector
? anchor
: guard_bb
,
2038 prob_epilog
.invert (),
2040 slpeel_update_phi_nodes_for_guard2 (loop
, epilog
, guard_e
,
2041 single_exit (epilog
));
2042 /* Only need to handle basic block before epilog loop if it's not
2043 the guard_bb, which is the case when skip_vector is true. */
2044 if (guard_bb
!= bb_before_epilog
)
2046 prob_epilog
= prob_vector
* prob_epilog
+ prob_vector
.invert ();
2048 scale_bbs_frequencies (&bb_before_epilog
, 1, prob_epilog
);
2050 scale_loop_profile (epilog
, prob_epilog
, 0);
2053 slpeel_update_phi_nodes_for_lcssa (epilog
);
2055 unsigned HOST_WIDE_INT bound1
, bound2
;
2056 if (vf
.is_constant (&bound1
) && bound_scalar
.is_constant (&bound2
))
2058 bound1
-= LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) ? 1 : 2;
2060 /* We share epilog loop with scalar version loop. */
2061 bound1
= MAX (bound1
, bound2
- 1);
2062 record_niter_bound (epilog
, bound1
, false, true);
2065 delete_update_ssa ();
2066 adjust_vec_debug_stmts ();
2069 adjust_vec
.release ();
2070 free_original_copy_tables ();
2075 /* Function vect_create_cond_for_niters_checks.
2077 Create a conditional expression that represents the run-time checks for
2078 loop's niter. The loop is guaranteed to to terminate if the run-time
2082 COND_EXPR - input conditional expression. New conditions will be chained
2083 with logical AND operation. If it is NULL, then the function
2084 is used to return the number of alias checks.
2085 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2089 COND_EXPR - conditional expression.
2091 The returned COND_EXPR is the conditional expression to be used in the
2092 if statement that controls which version of the loop gets executed at
2096 vect_create_cond_for_niters_checks (loop_vec_info loop_vinfo
, tree
*cond_expr
)
2098 tree part_cond_expr
= LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo
);
2101 *cond_expr
= fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
,
2102 *cond_expr
, part_cond_expr
);
2104 *cond_expr
= part_cond_expr
;
2107 /* Set *COND_EXPR to a tree that is true when both the original *COND_EXPR
2108 and PART_COND_EXPR are true. Treat a null *COND_EXPR as "true". */
2111 chain_cond_expr (tree
*cond_expr
, tree part_cond_expr
)
2114 *cond_expr
= fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
,
2115 *cond_expr
, part_cond_expr
);
2117 *cond_expr
= part_cond_expr
;
2120 /* Function vect_create_cond_for_align_checks.
2122 Create a conditional expression that represents the alignment checks for
2123 all of data references (array element references) whose alignment must be
2127 COND_EXPR - input conditional expression. New conditions will be chained
2128 with logical AND operation.
2129 LOOP_VINFO - two fields of the loop information are used.
2130 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
2131 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
2134 COND_EXPR_STMT_LIST - statements needed to construct the conditional
2136 The returned value is the conditional expression to be used in the if
2137 statement that controls which version of the loop gets executed at runtime.
2139 The algorithm makes two assumptions:
2140 1) The number of bytes "n" in a vector is a power of 2.
2141 2) An address "a" is aligned if a%n is zero and that this
2142 test can be done as a&(n-1) == 0. For example, for 16
2143 byte vectors the test is a&0xf == 0. */
2146 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo
,
2148 gimple_seq
*cond_expr_stmt_list
)
2150 vec
<gimple
*> may_misalign_stmts
2151 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
);
2153 int mask
= LOOP_VINFO_PTR_MASK (loop_vinfo
);
2156 tree int_ptrsize_type
;
2158 tree or_tmp_name
= NULL_TREE
;
2162 tree part_cond_expr
;
2164 /* Check that mask is one less than a power of 2, i.e., mask is
2165 all zeros followed by all ones. */
2166 gcc_assert ((mask
!= 0) && ((mask
& (mask
+1)) == 0));
2168 int_ptrsize_type
= signed_type_for (ptr_type_node
);
2170 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2171 of the first vector of the i'th data reference. */
2173 FOR_EACH_VEC_ELT (may_misalign_stmts
, i
, ref_stmt
)
2175 gimple_seq new_stmt_list
= NULL
;
2178 tree new_or_tmp_name
;
2179 gimple
*addr_stmt
, *or_stmt
;
2180 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (ref_stmt
);
2181 tree vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
2182 bool negative
= tree_int_cst_compare
2183 (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo
)), size_zero_node
) < 0;
2184 tree offset
= negative
2185 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1) : size_zero_node
;
2187 /* create: addr_tmp = (int)(address_of_first_vector) */
2189 vect_create_addr_base_for_vector_ref (ref_stmt
, &new_stmt_list
,
2191 if (new_stmt_list
!= NULL
)
2192 gimple_seq_add_seq (cond_expr_stmt_list
, new_stmt_list
);
2194 sprintf (tmp_name
, "addr2int%d", i
);
2195 addr_tmp_name
= make_temp_ssa_name (int_ptrsize_type
, NULL
, tmp_name
);
2196 addr_stmt
= gimple_build_assign (addr_tmp_name
, NOP_EXPR
, addr_base
);
2197 gimple_seq_add_stmt (cond_expr_stmt_list
, addr_stmt
);
2199 /* The addresses are OR together. */
2201 if (or_tmp_name
!= NULL_TREE
)
2203 /* create: or_tmp = or_tmp | addr_tmp */
2204 sprintf (tmp_name
, "orptrs%d", i
);
2205 new_or_tmp_name
= make_temp_ssa_name (int_ptrsize_type
, NULL
, tmp_name
);
2206 or_stmt
= gimple_build_assign (new_or_tmp_name
, BIT_IOR_EXPR
,
2207 or_tmp_name
, addr_tmp_name
);
2208 gimple_seq_add_stmt (cond_expr_stmt_list
, or_stmt
);
2209 or_tmp_name
= new_or_tmp_name
;
2212 or_tmp_name
= addr_tmp_name
;
2216 mask_cst
= build_int_cst (int_ptrsize_type
, mask
);
2218 /* create: and_tmp = or_tmp & mask */
2219 and_tmp_name
= make_temp_ssa_name (int_ptrsize_type
, NULL
, "andmask");
2221 and_stmt
= gimple_build_assign (and_tmp_name
, BIT_AND_EXPR
,
2222 or_tmp_name
, mask_cst
);
2223 gimple_seq_add_stmt (cond_expr_stmt_list
, and_stmt
);
2225 /* Make and_tmp the left operand of the conditional test against zero.
2226 if and_tmp has a nonzero bit then some address is unaligned. */
2227 ptrsize_zero
= build_int_cst (int_ptrsize_type
, 0);
2228 part_cond_expr
= fold_build2 (EQ_EXPR
, boolean_type_node
,
2229 and_tmp_name
, ptrsize_zero
);
2230 chain_cond_expr (cond_expr
, part_cond_expr
);
2233 /* If LOOP_VINFO_CHECK_UNEQUAL_ADDRS contains <A1, B1>, ..., <An, Bn>,
2234 create a tree representation of: (&A1 != &B1) && ... && (&An != &Bn).
2235 Set *COND_EXPR to a tree that is true when both the original *COND_EXPR
2236 and this new condition are true. Treat a null *COND_EXPR as "true". */
2239 vect_create_cond_for_unequal_addrs (loop_vec_info loop_vinfo
, tree
*cond_expr
)
2241 vec
<vec_object_pair
> pairs
= LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo
);
2243 vec_object_pair
*pair
;
2244 FOR_EACH_VEC_ELT (pairs
, i
, pair
)
2246 tree addr1
= build_fold_addr_expr (pair
->first
);
2247 tree addr2
= build_fold_addr_expr (pair
->second
);
2248 tree part_cond_expr
= fold_build2 (NE_EXPR
, boolean_type_node
,
2250 chain_cond_expr (cond_expr
, part_cond_expr
);
2254 /* Function vect_create_cond_for_alias_checks.
2256 Create a conditional expression that represents the run-time checks for
2257 overlapping of address ranges represented by a list of data references
2258 relations passed as input.
2261 COND_EXPR - input conditional expression. New conditions will be chained
2262 with logical AND operation. If it is NULL, then the function
2263 is used to return the number of alias checks.
2264 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2268 COND_EXPR - conditional expression.
2270 The returned COND_EXPR is the conditional expression to be used in the if
2271 statement that controls which version of the loop gets executed at runtime.
2275 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo
, tree
* cond_expr
)
2277 vec
<dr_with_seg_len_pair_t
> comp_alias_ddrs
=
2278 LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo
);
2280 if (comp_alias_ddrs
.is_empty ())
2283 create_runtime_alias_checks (LOOP_VINFO_LOOP (loop_vinfo
),
2284 &comp_alias_ddrs
, cond_expr
);
2285 if (dump_enabled_p ())
2286 dump_printf_loc (MSG_NOTE
, vect_location
,
2287 "created %u versioning for alias checks.\n",
2288 comp_alias_ddrs
.length ());
2292 /* Function vect_loop_versioning.
2294 If the loop has data references that may or may not be aligned or/and
2295 has data reference relations whose independence was not proven then
2296 two versions of the loop need to be generated, one which is vectorized
2297 and one which isn't. A test is then generated to control which of the
2298 loops is executed. The test checks for the alignment of all of the
2299 data references that may or may not be aligned. An additional
2300 sequence of runtime tests is generated for each pairs of DDRs whose
2301 independence was not proven. The vectorized version of loop is
2302 executed only if both alias and alignment tests are passed.
2304 The test generated to check which version of loop is executed
2305 is modified to also check for profitability as indicated by the
2306 cost model threshold TH.
2308 The versioning precondition(s) are placed in *COND_EXPR and
2309 *COND_EXPR_STMT_LIST. */
2312 vect_loop_versioning (loop_vec_info loop_vinfo
,
2313 unsigned int th
, bool check_profitability
,
2314 poly_uint64 versioning_threshold
)
2316 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
), *nloop
;
2317 struct loop
*scalar_loop
= LOOP_VINFO_SCALAR_LOOP (loop_vinfo
);
2318 basic_block condition_bb
;
2320 gimple_stmt_iterator cond_exp_gsi
;
2321 basic_block merge_bb
;
2322 basic_block new_exit_bb
;
2324 gphi
*orig_phi
, *new_phi
;
2325 tree cond_expr
= NULL_TREE
;
2326 gimple_seq cond_expr_stmt_list
= NULL
;
2328 profile_probability prob
= profile_probability::likely ();
2329 gimple_seq gimplify_stmt_list
= NULL
;
2330 tree scalar_loop_iters
= LOOP_VINFO_NITERSM1 (loop_vinfo
);
2331 bool version_align
= LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo
);
2332 bool version_alias
= LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo
);
2333 bool version_niter
= LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo
);
2335 if (check_profitability
)
2336 cond_expr
= fold_build2 (GE_EXPR
, boolean_type_node
, scalar_loop_iters
,
2337 build_int_cst (TREE_TYPE (scalar_loop_iters
),
2339 if (maybe_ne (versioning_threshold
, 0U))
2341 tree expr
= fold_build2 (GE_EXPR
, boolean_type_node
, scalar_loop_iters
,
2342 build_int_cst (TREE_TYPE (scalar_loop_iters
),
2343 versioning_threshold
- 1));
2345 cond_expr
= fold_build2 (BIT_AND_EXPR
, boolean_type_node
,
2352 vect_create_cond_for_niters_checks (loop_vinfo
, &cond_expr
);
2355 cond_expr
= force_gimple_operand_1 (cond_expr
, &cond_expr_stmt_list
,
2356 is_gimple_condexpr
, NULL_TREE
);
2359 vect_create_cond_for_align_checks (loop_vinfo
, &cond_expr
,
2360 &cond_expr_stmt_list
);
2364 vect_create_cond_for_unequal_addrs (loop_vinfo
, &cond_expr
);
2365 vect_create_cond_for_alias_checks (loop_vinfo
, &cond_expr
);
2368 cond_expr
= force_gimple_operand_1 (cond_expr
, &gimplify_stmt_list
,
2369 is_gimple_condexpr
, NULL_TREE
);
2370 gimple_seq_add_seq (&cond_expr_stmt_list
, gimplify_stmt_list
);
2372 initialize_original_copy_tables ();
2376 basic_block preheader
, scalar_preheader
;
2378 /* We don't want to scale SCALAR_LOOP's frequencies, we need to
2379 scale LOOP's frequencies instead. */
2380 nloop
= loop_version (scalar_loop
, cond_expr
, &condition_bb
,
2381 prob
, prob
.invert (), prob
, prob
.invert (), true);
2382 scale_loop_frequencies (loop
, prob
);
2383 /* CONDITION_BB was created above SCALAR_LOOP's preheader,
2384 while we need to move it above LOOP's preheader. */
2385 e
= loop_preheader_edge (loop
);
2386 scalar_e
= loop_preheader_edge (scalar_loop
);
2387 gcc_assert (empty_block_p (e
->src
)
2388 && single_pred_p (e
->src
));
2389 gcc_assert (empty_block_p (scalar_e
->src
)
2390 && single_pred_p (scalar_e
->src
));
2391 gcc_assert (single_pred_p (condition_bb
));
2393 scalar_preheader
= scalar_e
->src
;
2394 scalar_e
= find_edge (condition_bb
, scalar_preheader
);
2395 e
= single_pred_edge (preheader
);
2396 redirect_edge_and_branch_force (single_pred_edge (condition_bb
),
2398 redirect_edge_and_branch_force (scalar_e
, preheader
);
2399 redirect_edge_and_branch_force (e
, condition_bb
);
2400 set_immediate_dominator (CDI_DOMINATORS
, condition_bb
,
2401 single_pred (condition_bb
));
2402 set_immediate_dominator (CDI_DOMINATORS
, scalar_preheader
,
2403 single_pred (scalar_preheader
));
2404 set_immediate_dominator (CDI_DOMINATORS
, preheader
,
2408 nloop
= loop_version (loop
, cond_expr
, &condition_bb
,
2409 prob
, prob
.invert (), prob
, prob
.invert (), true);
2413 /* The versioned loop could be infinite, we need to clear existing
2414 niter information which is copied from the original loop. */
2415 gcc_assert (loop_constraint_set_p (loop
, LOOP_C_FINITE
));
2416 vect_free_loop_info_assumptions (nloop
);
2417 /* And set constraint LOOP_C_INFINITE for niter analyzer. */
2418 loop_constraint_set (loop
, LOOP_C_INFINITE
);
2421 if (LOCATION_LOCUS (vect_location
) != UNKNOWN_LOCATION
2422 && dump_enabled_p ())
2425 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
2426 "loop versioned for vectorization because of "
2427 "possible aliasing\n");
2429 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
2430 "loop versioned for vectorization to enhance "
2434 free_original_copy_tables ();
2436 /* Loop versioning violates an assumption we try to maintain during
2437 vectorization - that the loop exit block has a single predecessor.
2438 After versioning, the exit block of both loop versions is the same
2439 basic block (i.e. it has two predecessors). Just in order to simplify
2440 following transformations in the vectorizer, we fix this situation
2441 here by adding a new (empty) block on the exit-edge of the loop,
2442 with the proper loop-exit phis to maintain loop-closed-form.
2443 If loop versioning wasn't done from loop, but scalar_loop instead,
2444 merge_bb will have already just a single successor. */
2446 merge_bb
= single_exit (loop
)->dest
;
2447 if (scalar_loop
== NULL
|| EDGE_COUNT (merge_bb
->preds
) >= 2)
2449 gcc_assert (EDGE_COUNT (merge_bb
->preds
) >= 2);
2450 new_exit_bb
= split_edge (single_exit (loop
));
2451 new_exit_e
= single_exit (loop
);
2452 e
= EDGE_SUCC (new_exit_bb
, 0);
2454 for (gsi
= gsi_start_phis (merge_bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2457 orig_phi
= gsi
.phi ();
2458 new_res
= copy_ssa_name (PHI_RESULT (orig_phi
));
2459 new_phi
= create_phi_node (new_res
, new_exit_bb
);
2460 arg
= PHI_ARG_DEF_FROM_EDGE (orig_phi
, e
);
2461 add_phi_arg (new_phi
, arg
, new_exit_e
,
2462 gimple_phi_arg_location_from_edge (orig_phi
, e
));
2463 adjust_phi_and_debug_stmts (orig_phi
, e
, PHI_RESULT (new_phi
));
2467 /* End loop-exit-fixes after versioning. */
2469 if (cond_expr_stmt_list
)
2471 cond_exp_gsi
= gsi_last_bb (condition_bb
);
2472 gsi_insert_seq_before (&cond_exp_gsi
, cond_expr_stmt_list
,
2475 update_ssa (TODO_update_ssa
);