poly_int: GET_MODE_NUNITS
[official-gcc.git] / gcc / tree-vect-loop-manip.c
blob558a47e8a9a7e558c283a53b1c051f6c3b1e0da1
1 /* Vectorizer Specific Loop Manipulations
2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "cfghooks.h"
29 #include "tree-pass.h"
30 #include "ssa.h"
31 #include "fold-const.h"
32 #include "cfganal.h"
33 #include "gimplify.h"
34 #include "gimple-iterator.h"
35 #include "gimplify-me.h"
36 #include "tree-cfg.h"
37 #include "tree-ssa-loop-manip.h"
38 #include "tree-into-ssa.h"
39 #include "tree-ssa.h"
40 #include "cfgloop.h"
41 #include "tree-scalar-evolution.h"
42 #include "tree-vectorizer.h"
43 #include "tree-ssa-loop-ivopts.h"
44 #include "gimple-fold.h"
46 /*************************************************************************
47 Simple Loop Peeling Utilities
49 Utilities to support loop peeling for vectorization purposes.
50 *************************************************************************/
53 /* Renames the use *OP_P. */
55 static void
56 rename_use_op (use_operand_p op_p)
58 tree new_name;
60 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
61 return;
63 new_name = get_current_def (USE_FROM_PTR (op_p));
65 /* Something defined outside of the loop. */
66 if (!new_name)
67 return;
69 /* An ordinary ssa name defined in the loop. */
71 SET_USE (op_p, new_name);
75 /* Renames the variables in basic block BB. Allow renaming of PHI arguments
76 on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
77 true. */
79 static void
80 rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop)
82 gimple *stmt;
83 use_operand_p use_p;
84 ssa_op_iter iter;
85 edge e;
86 edge_iterator ei;
87 struct loop *loop = bb->loop_father;
88 struct loop *outer_loop = NULL;
90 if (rename_from_outer_loop)
92 gcc_assert (loop);
93 outer_loop = loop_outer (loop);
96 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
97 gsi_next (&gsi))
99 stmt = gsi_stmt (gsi);
100 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
101 rename_use_op (use_p);
104 FOR_EACH_EDGE (e, ei, bb->preds)
106 if (!flow_bb_inside_loop_p (loop, e->src))
108 if (!rename_from_outer_loop)
109 continue;
110 if (e->src != outer_loop->header)
112 if (outer_loop->inner->next)
114 /* If outer_loop has 2 inner loops, allow there to
115 be an extra basic block which decides which of the
116 two loops to use using LOOP_VECTORIZED. */
117 if (!single_pred_p (e->src)
118 || single_pred (e->src) != outer_loop->header)
119 continue;
123 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
124 gsi_next (&gsi))
125 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
130 struct adjust_info
132 tree from, to;
133 basic_block bb;
136 /* A stack of values to be adjusted in debug stmts. We have to
137 process them LIFO, so that the closest substitution applies. If we
138 processed them FIFO, without the stack, we might substitute uses
139 with a PHI DEF that would soon become non-dominant, and when we got
140 to the suitable one, it wouldn't have anything to substitute any
141 more. */
142 static vec<adjust_info, va_heap> adjust_vec;
144 /* Adjust any debug stmts that referenced AI->from values to use the
145 loop-closed AI->to, if the references are dominated by AI->bb and
146 not by the definition of AI->from. */
148 static void
149 adjust_debug_stmts_now (adjust_info *ai)
151 basic_block bbphi = ai->bb;
152 tree orig_def = ai->from;
153 tree new_def = ai->to;
154 imm_use_iterator imm_iter;
155 gimple *stmt;
156 basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
158 gcc_assert (dom_info_available_p (CDI_DOMINATORS));
160 /* Adjust any debug stmts that held onto non-loop-closed
161 references. */
162 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
164 use_operand_p use_p;
165 basic_block bbuse;
167 if (!is_gimple_debug (stmt))
168 continue;
170 gcc_assert (gimple_debug_bind_p (stmt));
172 bbuse = gimple_bb (stmt);
174 if ((bbuse == bbphi
175 || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
176 && !(bbuse == bbdef
177 || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
179 if (new_def)
180 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
181 SET_USE (use_p, new_def);
182 else
184 gimple_debug_bind_reset_value (stmt);
185 update_stmt (stmt);
191 /* Adjust debug stmts as scheduled before. */
193 static void
194 adjust_vec_debug_stmts (void)
196 if (!MAY_HAVE_DEBUG_BIND_STMTS)
197 return;
199 gcc_assert (adjust_vec.exists ());
201 while (!adjust_vec.is_empty ())
203 adjust_debug_stmts_now (&adjust_vec.last ());
204 adjust_vec.pop ();
208 /* Adjust any debug stmts that referenced FROM values to use the
209 loop-closed TO, if the references are dominated by BB and not by
210 the definition of FROM. If adjust_vec is non-NULL, adjustments
211 will be postponed until adjust_vec_debug_stmts is called. */
213 static void
214 adjust_debug_stmts (tree from, tree to, basic_block bb)
216 adjust_info ai;
218 if (MAY_HAVE_DEBUG_BIND_STMTS
219 && TREE_CODE (from) == SSA_NAME
220 && ! SSA_NAME_IS_DEFAULT_DEF (from)
221 && ! virtual_operand_p (from))
223 ai.from = from;
224 ai.to = to;
225 ai.bb = bb;
227 if (adjust_vec.exists ())
228 adjust_vec.safe_push (ai);
229 else
230 adjust_debug_stmts_now (&ai);
234 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
235 to adjust any debug stmts that referenced the old phi arg,
236 presumably non-loop-closed references left over from other
237 transformations. */
239 static void
240 adjust_phi_and_debug_stmts (gimple *update_phi, edge e, tree new_def)
242 tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
244 SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
246 if (MAY_HAVE_DEBUG_BIND_STMTS)
247 adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
248 gimple_bb (update_phi));
251 /* Make LOOP iterate N == (NITERS - STEP) / STEP + 1 times,
252 where NITERS is known to be outside the range [1, STEP - 1].
253 This is equivalent to making the loop execute NITERS / STEP
254 times when NITERS is nonzero and (1 << M) / STEP times otherwise,
255 where M is the precision of NITERS.
257 NITERS_MAYBE_ZERO is true if NITERS can be zero, false it is known
258 to be >= STEP. In the latter case N is always NITERS / STEP.
260 If FINAL_IV is nonnull, it is an SSA name that should be set to
261 N * STEP on exit from the loop.
263 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
265 void
266 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters, tree step,
267 tree final_iv, bool niters_maybe_zero)
269 tree indx_before_incr, indx_after_incr;
270 gcond *cond_stmt;
271 gcond *orig_cond;
272 edge pe = loop_preheader_edge (loop);
273 edge exit_edge = single_exit (loop);
274 gimple_stmt_iterator loop_cond_gsi;
275 gimple_stmt_iterator incr_gsi;
276 bool insert_after;
277 source_location loop_loc;
278 enum tree_code code;
279 tree niters_type = TREE_TYPE (niters);
281 orig_cond = get_loop_exit_condition (loop);
282 gcc_assert (orig_cond);
283 loop_cond_gsi = gsi_for_stmt (orig_cond);
285 tree init, limit;
286 if (!niters_maybe_zero && integer_onep (step))
288 /* In this case we can use a simple 0-based IV:
291 x = 0;
295 x += 1;
297 while (x < NITERS); */
298 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
299 init = build_zero_cst (niters_type);
300 limit = niters;
302 else
304 /* The following works for all values of NITERS except 0:
307 x = 0;
311 x += STEP;
313 while (x <= NITERS - STEP);
315 so that the loop continues to iterate if x + STEP - 1 < NITERS
316 but stops if x + STEP - 1 >= NITERS.
318 However, if NITERS is zero, x never hits a value above NITERS - STEP
319 before wrapping around. There are two obvious ways of dealing with
320 this:
322 - start at STEP - 1 and compare x before incrementing it
323 - start at -1 and compare x after incrementing it
325 The latter is simpler and is what we use. The loop in this case
326 looks like:
329 x = -1;
333 x += STEP;
335 while (x < NITERS - STEP);
337 In both cases the loop limit is NITERS - STEP. */
338 gimple_seq seq = NULL;
339 limit = force_gimple_operand (niters, &seq, true, NULL_TREE);
340 limit = gimple_build (&seq, MINUS_EXPR, TREE_TYPE (limit), limit, step);
341 if (seq)
343 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
344 gcc_assert (!new_bb);
346 if (niters_maybe_zero)
348 /* Case C. */
349 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
350 init = build_all_ones_cst (niters_type);
352 else
354 /* Case B. */
355 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GT_EXPR : LE_EXPR;
356 init = build_zero_cst (niters_type);
360 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
361 create_iv (init, step, NULL_TREE, loop,
362 &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
364 indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
365 true, NULL_TREE, true,
366 GSI_SAME_STMT);
367 limit = force_gimple_operand_gsi (&loop_cond_gsi, limit, true, NULL_TREE,
368 true, GSI_SAME_STMT);
370 cond_stmt = gimple_build_cond (code, indx_after_incr, limit, NULL_TREE,
371 NULL_TREE);
373 gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
375 /* Remove old loop exit test: */
376 gsi_remove (&loop_cond_gsi, true);
377 free_stmt_vec_info (orig_cond);
379 loop_loc = find_loop_location (loop);
380 if (dump_enabled_p ())
382 if (LOCATION_LOCUS (loop_loc) != UNKNOWN_LOCATION)
383 dump_printf (MSG_NOTE, "\nloop at %s:%d: ", LOCATION_FILE (loop_loc),
384 LOCATION_LINE (loop_loc));
385 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, cond_stmt, 0);
388 /* Record the number of latch iterations. */
389 if (limit == niters)
390 /* Case A: the loop iterates NITERS times. Subtract one to get the
391 latch count. */
392 loop->nb_iterations = fold_build2 (MINUS_EXPR, niters_type, niters,
393 build_int_cst (niters_type, 1));
394 else
395 /* Case B or C: the loop iterates (NITERS - STEP) / STEP + 1 times.
396 Subtract one from this to get the latch count. */
397 loop->nb_iterations = fold_build2 (TRUNC_DIV_EXPR, niters_type,
398 limit, step);
400 if (final_iv)
402 gassign *assign = gimple_build_assign (final_iv, MINUS_EXPR,
403 indx_after_incr, init);
404 gsi_insert_on_edge_immediate (single_exit (loop), assign);
408 /* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg.
409 For all PHI arguments in FROM->dest and TO->dest from those
410 edges ensure that TO->dest PHI arguments have current_def
411 to that in from. */
413 static void
414 slpeel_duplicate_current_defs_from_edges (edge from, edge to)
416 gimple_stmt_iterator gsi_from, gsi_to;
418 for (gsi_from = gsi_start_phis (from->dest),
419 gsi_to = gsi_start_phis (to->dest);
420 !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);)
422 gimple *from_phi = gsi_stmt (gsi_from);
423 gimple *to_phi = gsi_stmt (gsi_to);
424 tree from_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, from);
425 tree to_arg = PHI_ARG_DEF_FROM_EDGE (to_phi, to);
426 if (virtual_operand_p (from_arg))
428 gsi_next (&gsi_from);
429 continue;
431 if (virtual_operand_p (to_arg))
433 gsi_next (&gsi_to);
434 continue;
436 if (TREE_CODE (from_arg) != SSA_NAME)
437 gcc_assert (operand_equal_p (from_arg, to_arg, 0));
438 else
440 if (get_current_def (to_arg) == NULL_TREE)
441 set_current_def (to_arg, get_current_def (from_arg));
443 gsi_next (&gsi_from);
444 gsi_next (&gsi_to);
447 gphi *from_phi = get_virtual_phi (from->dest);
448 gphi *to_phi = get_virtual_phi (to->dest);
449 if (from_phi)
450 set_current_def (PHI_ARG_DEF_FROM_EDGE (to_phi, to),
451 get_current_def (PHI_ARG_DEF_FROM_EDGE (from_phi, from)));
455 /* Given LOOP this function generates a new copy of it and puts it
456 on E which is either the entry or exit of LOOP. If SCALAR_LOOP is
457 non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
458 basic blocks from SCALAR_LOOP instead of LOOP, but to either the
459 entry or exit of LOOP. */
461 struct loop *
462 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop,
463 struct loop *scalar_loop, edge e)
465 struct loop *new_loop;
466 basic_block *new_bbs, *bbs, *pbbs;
467 bool at_exit;
468 bool was_imm_dom;
469 basic_block exit_dest;
470 edge exit, new_exit;
471 bool duplicate_outer_loop = false;
473 exit = single_exit (loop);
474 at_exit = (e == exit);
475 if (!at_exit && e != loop_preheader_edge (loop))
476 return NULL;
478 if (scalar_loop == NULL)
479 scalar_loop = loop;
481 bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
482 pbbs = bbs + 1;
483 get_loop_body_with_size (scalar_loop, pbbs, scalar_loop->num_nodes);
484 /* Allow duplication of outer loops. */
485 if (scalar_loop->inner)
486 duplicate_outer_loop = true;
487 /* Check whether duplication is possible. */
488 if (!can_copy_bbs_p (pbbs, scalar_loop->num_nodes))
490 free (bbs);
491 return NULL;
494 /* Generate new loop structure. */
495 new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
496 duplicate_subloops (scalar_loop, new_loop);
498 exit_dest = exit->dest;
499 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
500 exit_dest) == loop->header ?
501 true : false);
503 /* Also copy the pre-header, this avoids jumping through hoops to
504 duplicate the loop entry PHI arguments. Create an empty
505 pre-header unconditionally for this. */
506 basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
507 edge entry_e = single_pred_edge (preheader);
508 bbs[0] = preheader;
509 new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
511 exit = single_exit (scalar_loop);
512 copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
513 &exit, 1, &new_exit, NULL,
514 at_exit ? loop->latch : e->src, true);
515 exit = single_exit (loop);
516 basic_block new_preheader = new_bbs[0];
518 add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
520 if (scalar_loop != loop)
522 /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
523 SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop,
524 but LOOP will not. slpeel_update_phi_nodes_for_guard{1,2} expects
525 the LOOP SSA_NAMEs (on the exit edge and edge from latch to
526 header) to have current_def set, so copy them over. */
527 slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop),
528 exit);
529 slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop->latch,
531 EDGE_SUCC (loop->latch, 0));
534 if (at_exit) /* Add the loop copy at exit. */
536 if (scalar_loop != loop)
538 gphi_iterator gsi;
539 new_exit = redirect_edge_and_branch (new_exit, exit_dest);
541 for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi);
542 gsi_next (&gsi))
544 gphi *phi = gsi.phi ();
545 tree orig_arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
546 location_t orig_locus
547 = gimple_phi_arg_location_from_edge (phi, e);
549 add_phi_arg (phi, orig_arg, new_exit, orig_locus);
552 redirect_edge_and_branch_force (e, new_preheader);
553 flush_pending_stmts (e);
554 set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
555 if (was_imm_dom || duplicate_outer_loop)
556 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
558 /* And remove the non-necessary forwarder again. Keep the other
559 one so we have a proper pre-header for the loop at the exit edge. */
560 redirect_edge_pred (single_succ_edge (preheader),
561 single_pred (preheader));
562 delete_basic_block (preheader);
563 set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
564 loop_preheader_edge (scalar_loop)->src);
566 else /* Add the copy at entry. */
568 if (scalar_loop != loop)
570 /* Remove the non-necessary forwarder of scalar_loop again. */
571 redirect_edge_pred (single_succ_edge (preheader),
572 single_pred (preheader));
573 delete_basic_block (preheader);
574 set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
575 loop_preheader_edge (scalar_loop)->src);
576 preheader = split_edge (loop_preheader_edge (loop));
577 entry_e = single_pred_edge (preheader);
580 redirect_edge_and_branch_force (entry_e, new_preheader);
581 flush_pending_stmts (entry_e);
582 set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
584 redirect_edge_and_branch_force (new_exit, preheader);
585 flush_pending_stmts (new_exit);
586 set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
588 /* And remove the non-necessary forwarder again. Keep the other
589 one so we have a proper pre-header for the loop at the exit edge. */
590 redirect_edge_pred (single_succ_edge (new_preheader),
591 single_pred (new_preheader));
592 delete_basic_block (new_preheader);
593 set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
594 loop_preheader_edge (new_loop)->src);
597 /* Skip new preheader since it's deleted if copy loop is added at entry. */
598 for (unsigned i = (at_exit ? 0 : 1); i < scalar_loop->num_nodes + 1; i++)
599 rename_variables_in_bb (new_bbs[i], duplicate_outer_loop);
601 if (scalar_loop != loop)
603 /* Update new_loop->header PHIs, so that on the preheader
604 edge they are the ones from loop rather than scalar_loop. */
605 gphi_iterator gsi_orig, gsi_new;
606 edge orig_e = loop_preheader_edge (loop);
607 edge new_e = loop_preheader_edge (new_loop);
609 for (gsi_orig = gsi_start_phis (loop->header),
610 gsi_new = gsi_start_phis (new_loop->header);
611 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_new);
612 gsi_next (&gsi_orig), gsi_next (&gsi_new))
614 gphi *orig_phi = gsi_orig.phi ();
615 gphi *new_phi = gsi_new.phi ();
616 tree orig_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
617 location_t orig_locus
618 = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
620 add_phi_arg (new_phi, orig_arg, new_e, orig_locus);
624 free (new_bbs);
625 free (bbs);
627 checking_verify_dominators (CDI_DOMINATORS);
629 return new_loop;
633 /* Given the condition expression COND, put it as the last statement of
634 GUARD_BB; set both edges' probability; set dominator of GUARD_TO to
635 DOM_BB; return the skip edge. GUARD_TO is the target basic block to
636 skip the loop. PROBABILITY is the skip edge's probability. Mark the
637 new edge as irreducible if IRREDUCIBLE_P is true. */
639 static edge
640 slpeel_add_loop_guard (basic_block guard_bb, tree cond,
641 basic_block guard_to, basic_block dom_bb,
642 profile_probability probability, bool irreducible_p)
644 gimple_stmt_iterator gsi;
645 edge new_e, enter_e;
646 gcond *cond_stmt;
647 gimple_seq gimplify_stmt_list = NULL;
649 enter_e = EDGE_SUCC (guard_bb, 0);
650 enter_e->flags &= ~EDGE_FALLTHRU;
651 enter_e->flags |= EDGE_FALSE_VALUE;
652 gsi = gsi_last_bb (guard_bb);
654 cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
655 NULL_TREE);
656 if (gimplify_stmt_list)
657 gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
659 cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
660 gsi = gsi_last_bb (guard_bb);
661 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
663 /* Add new edge to connect guard block to the merge/loop-exit block. */
664 new_e = make_edge (guard_bb, guard_to, EDGE_TRUE_VALUE);
666 new_e->probability = probability;
667 if (irreducible_p)
668 new_e->flags |= EDGE_IRREDUCIBLE_LOOP;
670 enter_e->probability = probability.invert ();
671 set_immediate_dominator (CDI_DOMINATORS, guard_to, dom_bb);
673 /* Split enter_e to preserve LOOPS_HAVE_PREHEADERS. */
674 if (enter_e->dest->loop_father->header == enter_e->dest)
675 split_edge (enter_e);
677 return new_e;
681 /* This function verifies that the following restrictions apply to LOOP:
682 (1) it consists of exactly 2 basic blocks - header, and an empty latch
683 for innermost loop and 5 basic blocks for outer-loop.
684 (2) it is single entry, single exit
685 (3) its exit condition is the last stmt in the header
686 (4) E is the entry/exit edge of LOOP.
689 bool
690 slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
692 edge exit_e = single_exit (loop);
693 edge entry_e = loop_preheader_edge (loop);
694 gcond *orig_cond = get_loop_exit_condition (loop);
695 gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
696 unsigned int num_bb = loop->inner? 5 : 2;
698 /* All loops have an outer scope; the only case loop->outer is NULL is for
699 the function itself. */
700 if (!loop_outer (loop)
701 || loop->num_nodes != num_bb
702 || !empty_block_p (loop->latch)
703 || !single_exit (loop)
704 /* Verify that new loop exit condition can be trivially modified. */
705 || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
706 || (e != exit_e && e != entry_e))
707 return false;
709 return true;
712 /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
713 in the exit bb and rename all the uses after the loop. This simplifies
714 the *guard[12] routines, which assume loop closed SSA form for all PHIs
715 (but normally loop closed SSA form doesn't require virtual PHIs to be
716 in the same form). Doing this early simplifies the checking what
717 uses should be renamed. */
719 static void
720 create_lcssa_for_virtual_phi (struct loop *loop)
722 gphi_iterator gsi;
723 edge exit_e = single_exit (loop);
725 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
726 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
728 gphi *phi = gsi.phi ();
729 for (gsi = gsi_start_phis (exit_e->dest);
730 !gsi_end_p (gsi); gsi_next (&gsi))
731 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
732 break;
733 if (gsi_end_p (gsi))
735 tree new_vop = copy_ssa_name (PHI_RESULT (phi));
736 gphi *new_phi = create_phi_node (new_vop, exit_e->dest);
737 tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
738 imm_use_iterator imm_iter;
739 gimple *stmt;
740 use_operand_p use_p;
742 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (new_vop)
743 = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (vop);
744 add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
745 gimple_phi_set_result (new_phi, new_vop);
746 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
747 if (stmt != new_phi
748 && !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
749 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
750 SET_USE (use_p, new_vop);
752 break;
757 /* Function vect_get_loop_location.
759 Extract the location of the loop in the source code.
760 If the loop is not well formed for vectorization, an estimated
761 location is calculated.
762 Return the loop location if succeed and NULL if not. */
764 source_location
765 find_loop_location (struct loop *loop)
767 gimple *stmt = NULL;
768 basic_block bb;
769 gimple_stmt_iterator si;
771 if (!loop)
772 return UNKNOWN_LOCATION;
774 stmt = get_loop_exit_condition (loop);
776 if (stmt
777 && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
778 return gimple_location (stmt);
780 /* If we got here the loop is probably not "well formed",
781 try to estimate the loop location */
783 if (!loop->header)
784 return UNKNOWN_LOCATION;
786 bb = loop->header;
788 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
790 stmt = gsi_stmt (si);
791 if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
792 return gimple_location (stmt);
795 return UNKNOWN_LOCATION;
798 /* Return true if PHI defines an IV of the loop to be vectorized. */
800 static bool
801 iv_phi_p (gphi *phi)
803 if (virtual_operand_p (PHI_RESULT (phi)))
804 return false;
806 stmt_vec_info stmt_info = vinfo_for_stmt (phi);
807 gcc_assert (stmt_info != NULL);
808 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
809 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
810 return false;
812 return true;
815 /* Function vect_can_advance_ivs_p
817 In case the number of iterations that LOOP iterates is unknown at compile
818 time, an epilog loop will be generated, and the loop induction variables
819 (IVs) will be "advanced" to the value they are supposed to take just before
820 the epilog loop. Here we check that the access function of the loop IVs
821 and the expression that represents the loop bound are simple enough.
822 These restrictions will be relaxed in the future. */
824 bool
825 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
827 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
828 basic_block bb = loop->header;
829 gphi_iterator gsi;
831 /* Analyze phi functions of the loop header. */
833 if (dump_enabled_p ())
834 dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
835 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
837 tree evolution_part;
839 gphi *phi = gsi.phi ();
840 if (dump_enabled_p ())
842 dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
843 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
846 /* Skip virtual phi's. The data dependences that are associated with
847 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.
849 Skip reduction phis. */
850 if (!iv_phi_p (phi))
852 if (dump_enabled_p ())
853 dump_printf_loc (MSG_NOTE, vect_location,
854 "reduc or virtual phi. skip.\n");
855 continue;
858 /* Analyze the evolution function. */
860 evolution_part
861 = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
862 if (evolution_part == NULL_TREE)
864 if (dump_enabled_p ())
865 dump_printf (MSG_MISSED_OPTIMIZATION,
866 "No access function or evolution.\n");
867 return false;
870 /* FORNOW: We do not transform initial conditions of IVs
871 which evolution functions are not invariants in the loop. */
873 if (!expr_invariant_in_loop_p (loop, evolution_part))
875 if (dump_enabled_p ())
876 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
877 "evolution not invariant in loop.\n");
878 return false;
881 /* FORNOW: We do not transform initial conditions of IVs
882 which evolution functions are a polynomial of degree >= 2. */
884 if (tree_is_chrec (evolution_part))
886 if (dump_enabled_p ())
887 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
888 "evolution is chrec.\n");
889 return false;
893 return true;
897 /* Function vect_update_ivs_after_vectorizer.
899 "Advance" the induction variables of LOOP to the value they should take
900 after the execution of LOOP. This is currently necessary because the
901 vectorizer does not handle induction variables that are used after the
902 loop. Such a situation occurs when the last iterations of LOOP are
903 peeled, because:
904 1. We introduced new uses after LOOP for IVs that were not originally used
905 after LOOP: the IVs of LOOP are now used by an epilog loop.
906 2. LOOP is going to be vectorized; this means that it will iterate N/VF
907 times, whereas the loop IVs should be bumped N times.
909 Input:
910 - LOOP - a loop that is going to be vectorized. The last few iterations
911 of LOOP were peeled.
912 - NITERS - the number of iterations that LOOP executes (before it is
913 vectorized). i.e, the number of times the ivs should be bumped.
914 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
915 coming out from LOOP on which there are uses of the LOOP ivs
916 (this is the path from LOOP->exit to epilog_loop->preheader).
918 The new definitions of the ivs are placed in LOOP->exit.
919 The phi args associated with the edge UPDATE_E in the bb
920 UPDATE_E->dest are updated accordingly.
922 Assumption 1: Like the rest of the vectorizer, this function assumes
923 a single loop exit that has a single predecessor.
925 Assumption 2: The phi nodes in the LOOP header and in update_bb are
926 organized in the same order.
928 Assumption 3: The access function of the ivs is simple enough (see
929 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
931 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
932 coming out of LOOP on which the ivs of LOOP are used (this is the path
933 that leads to the epilog loop; other paths skip the epilog loop). This
934 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
935 needs to have its phis updated.
938 static void
939 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo,
940 tree niters, edge update_e)
942 gphi_iterator gsi, gsi1;
943 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
944 basic_block update_bb = update_e->dest;
945 basic_block exit_bb = single_exit (loop)->dest;
947 /* Make sure there exists a single-predecessor exit bb: */
948 gcc_assert (single_pred_p (exit_bb));
949 gcc_assert (single_succ_edge (exit_bb) == update_e);
951 for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
952 !gsi_end_p (gsi) && !gsi_end_p (gsi1);
953 gsi_next (&gsi), gsi_next (&gsi1))
955 tree init_expr;
956 tree step_expr, off;
957 tree type;
958 tree var, ni, ni_name;
959 gimple_stmt_iterator last_gsi;
961 gphi *phi = gsi.phi ();
962 gphi *phi1 = gsi1.phi ();
963 if (dump_enabled_p ())
965 dump_printf_loc (MSG_NOTE, vect_location,
966 "vect_update_ivs_after_vectorizer: phi: ");
967 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
970 /* Skip reduction and virtual phis. */
971 if (!iv_phi_p (phi))
973 if (dump_enabled_p ())
974 dump_printf_loc (MSG_NOTE, vect_location,
975 "reduc or virtual phi. skip.\n");
976 continue;
979 type = TREE_TYPE (gimple_phi_result (phi));
980 step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
981 step_expr = unshare_expr (step_expr);
983 /* FORNOW: We do not support IVs whose evolution function is a polynomial
984 of degree >= 2 or exponential. */
985 gcc_assert (!tree_is_chrec (step_expr));
987 init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
989 off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
990 fold_convert (TREE_TYPE (step_expr), niters),
991 step_expr);
992 if (POINTER_TYPE_P (type))
993 ni = fold_build_pointer_plus (init_expr, off);
994 else
995 ni = fold_build2 (PLUS_EXPR, type,
996 init_expr, fold_convert (type, off));
998 var = create_tmp_var (type, "tmp");
1000 last_gsi = gsi_last_bb (exit_bb);
1001 gimple_seq new_stmts = NULL;
1002 ni_name = force_gimple_operand (ni, &new_stmts, false, var);
1003 /* Exit_bb shouldn't be empty. */
1004 if (!gsi_end_p (last_gsi))
1005 gsi_insert_seq_after (&last_gsi, new_stmts, GSI_SAME_STMT);
1006 else
1007 gsi_insert_seq_before (&last_gsi, new_stmts, GSI_SAME_STMT);
1009 /* Fix phi expressions in the successor bb. */
1010 adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
1014 /* Function vect_gen_prolog_loop_niters
1016 Generate the number of iterations which should be peeled as prolog for the
1017 loop represented by LOOP_VINFO. It is calculated as the misalignment of
1018 DR - the data reference recorded in LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).
1019 As a result, after the execution of this loop, the data reference DR will
1020 refer to an aligned location. The following computation is generated:
1022 If the misalignment of DR is known at compile time:
1023 addr_mis = int mis = DR_MISALIGNMENT (dr);
1024 Else, compute address misalignment in bytes:
1025 addr_mis = addr & (vectype_align - 1)
1027 prolog_niters = ((VF - addr_mis/elem_size)&(VF-1))/step
1029 (elem_size = element type size; an element is the scalar element whose type
1030 is the inner type of the vectype)
1032 The computations will be emitted at the end of BB. We also compute and
1033 store upper bound (included) of the result in BOUND.
1035 When the step of the data-ref in the loop is not 1 (as in interleaved data
1036 and SLP), the number of iterations of the prolog must be divided by the step
1037 (which is equal to the size of interleaved group).
1039 The above formulas assume that VF == number of elements in the vector. This
1040 may not hold when there are multiple-types in the loop.
1041 In this case, for some data-references in the loop the VF does not represent
1042 the number of elements that fit in the vector. Therefore, instead of VF we
1043 use TYPE_VECTOR_SUBPARTS. */
1045 static tree
1046 vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
1047 basic_block bb, int *bound)
1049 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
1050 tree var;
1051 tree niters_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
1052 gimple_seq stmts = NULL, new_stmts = NULL;
1053 tree iters, iters_name;
1054 gimple *dr_stmt = DR_STMT (dr);
1055 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
1056 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1057 unsigned int target_align = DR_TARGET_ALIGNMENT (dr);
1059 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
1061 int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1063 if (dump_enabled_p ())
1064 dump_printf_loc (MSG_NOTE, vect_location,
1065 "known peeling = %d.\n", npeel);
1067 iters = build_int_cst (niters_type, npeel);
1068 *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1070 else
1072 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
1073 tree offset = negative
1074 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
1075 tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
1076 &stmts, offset);
1077 tree type = unsigned_type_for (TREE_TYPE (start_addr));
1078 tree target_align_minus_1 = build_int_cst (type, target_align - 1);
1079 HOST_WIDE_INT elem_size
1080 = int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1081 tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
1082 HOST_WIDE_INT align_in_elems = target_align / elem_size;
1083 tree align_in_elems_minus_1 = build_int_cst (type, align_in_elems - 1);
1084 tree align_in_elems_tree = build_int_cst (type, align_in_elems);
1085 tree misalign_in_bytes;
1086 tree misalign_in_elems;
1088 /* Create: misalign_in_bytes = addr & (target_align - 1). */
1089 misalign_in_bytes
1090 = fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
1091 target_align_minus_1);
1093 /* Create: misalign_in_elems = misalign_in_bytes / element_size. */
1094 misalign_in_elems
1095 = fold_build2 (RSHIFT_EXPR, type, misalign_in_bytes, elem_size_log);
1097 /* Create: (niters_type) ((align_in_elems - misalign_in_elems)
1098 & (align_in_elems - 1)). */
1099 if (negative)
1100 iters = fold_build2 (MINUS_EXPR, type, misalign_in_elems,
1101 align_in_elems_tree);
1102 else
1103 iters = fold_build2 (MINUS_EXPR, type, align_in_elems_tree,
1104 misalign_in_elems);
1105 iters = fold_build2 (BIT_AND_EXPR, type, iters, align_in_elems_minus_1);
1106 iters = fold_convert (niters_type, iters);
1107 *bound = align_in_elems - 1;
1110 if (dump_enabled_p ())
1112 dump_printf_loc (MSG_NOTE, vect_location,
1113 "niters for prolog loop: ");
1114 dump_generic_expr (MSG_NOTE, TDF_SLIM, iters);
1115 dump_printf (MSG_NOTE, "\n");
1118 var = create_tmp_var (niters_type, "prolog_loop_niters");
1119 iters_name = force_gimple_operand (iters, &new_stmts, false, var);
1121 if (new_stmts)
1122 gimple_seq_add_seq (&stmts, new_stmts);
1123 if (stmts)
1125 gcc_assert (single_succ_p (bb));
1126 gimple_stmt_iterator gsi = gsi_last_bb (bb);
1127 if (gsi_end_p (gsi))
1128 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1129 else
1130 gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
1132 return iters_name;
1136 /* Function vect_update_init_of_dr
1138 NITERS iterations were peeled from LOOP. DR represents a data reference
1139 in LOOP. This function updates the information recorded in DR to
1140 account for the fact that the first NITERS iterations had already been
1141 executed. Specifically, it updates the OFFSET field of DR. */
1143 static void
1144 vect_update_init_of_dr (struct data_reference *dr, tree niters)
1146 tree offset = DR_OFFSET (dr);
1148 niters = fold_build2 (MULT_EXPR, sizetype,
1149 fold_convert (sizetype, niters),
1150 fold_convert (sizetype, DR_STEP (dr)));
1151 offset = fold_build2 (PLUS_EXPR, sizetype,
1152 fold_convert (sizetype, offset), niters);
1153 DR_OFFSET (dr) = offset;
1157 /* Function vect_update_inits_of_drs
1159 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1160 This function updates the information recorded for the data references in
1161 the loop to account for the fact that the first NITERS iterations had
1162 already been executed. Specifically, it updates the initial_condition of
1163 the access_function of all the data_references in the loop. */
1165 static void
1166 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
1168 unsigned int i;
1169 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1170 struct data_reference *dr;
1172 if (dump_enabled_p ())
1173 dump_printf_loc (MSG_NOTE, vect_location,
1174 "=== vect_update_inits_of_dr ===\n");
1176 /* Adjust niters to sizetype and insert stmts on loop preheader edge. */
1177 if (!types_compatible_p (sizetype, TREE_TYPE (niters)))
1179 gimple_seq seq;
1180 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1181 tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
1183 niters = fold_convert (sizetype, niters);
1184 niters = force_gimple_operand (niters, &seq, false, var);
1185 if (seq)
1187 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1188 gcc_assert (!new_bb);
1192 FOR_EACH_VEC_ELT (datarefs, i, dr)
1193 vect_update_init_of_dr (dr, niters);
1197 /* This function builds ni_name = number of iterations. Statements
1198 are emitted on the loop preheader edge. If NEW_VAR_P is not NULL, set
1199 it to TRUE if new ssa_var is generated. */
1201 tree
1202 vect_build_loop_niters (loop_vec_info loop_vinfo, bool *new_var_p)
1204 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1205 if (TREE_CODE (ni) == INTEGER_CST)
1206 return ni;
1207 else
1209 tree ni_name, var;
1210 gimple_seq stmts = NULL;
1211 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1213 var = create_tmp_var (TREE_TYPE (ni), "niters");
1214 ni_name = force_gimple_operand (ni, &stmts, false, var);
1215 if (stmts)
1217 gsi_insert_seq_on_edge_immediate (pe, stmts);
1218 if (new_var_p != NULL)
1219 *new_var_p = true;
1222 return ni_name;
1226 /* Calculate the number of iterations above which vectorized loop will be
1227 preferred than scalar loop. NITERS_PROLOG is the number of iterations
1228 of prolog loop. If it's integer const, the integer number is also passed
1229 in INT_NITERS_PROLOG. BOUND_PROLOG is the upper bound (included) of
1230 number of iterations of prolog loop. VFM1 is vector factor minus one.
1231 If CHECK_PROFITABILITY is true, TH is the threshold below which scalar
1232 (rather than vectorized) loop will be executed. This function stores
1233 upper bound (included) of the result in BOUND_SCALAR. */
1235 static tree
1236 vect_gen_scalar_loop_niters (tree niters_prolog, int int_niters_prolog,
1237 int bound_prolog, poly_int64 vfm1, int th,
1238 poly_uint64 *bound_scalar,
1239 bool check_profitability)
1241 tree type = TREE_TYPE (niters_prolog);
1242 tree niters = fold_build2 (PLUS_EXPR, type, niters_prolog,
1243 build_int_cst (type, vfm1));
1245 *bound_scalar = vfm1 + bound_prolog;
1246 if (check_profitability)
1248 /* TH indicates the minimum niters of vectorized loop, while we
1249 compute the maximum niters of scalar loop. */
1250 th--;
1251 /* Peeling for constant times. */
1252 if (int_niters_prolog >= 0)
1254 *bound_scalar = upper_bound (int_niters_prolog + vfm1, th);
1255 return build_int_cst (type, *bound_scalar);
1257 /* Peeling for unknown times. Note BOUND_PROLOG is the upper
1258 bound (inlcuded) of niters of prolog loop. */
1259 if (known_ge (th, vfm1 + bound_prolog))
1261 *bound_scalar = th;
1262 return build_int_cst (type, th);
1264 /* Need to do runtime comparison. */
1265 else if (maybe_gt (th, vfm1))
1267 *bound_scalar = upper_bound (*bound_scalar, th);
1268 return fold_build2 (MAX_EXPR, type,
1269 build_int_cst (type, th), niters);
1272 return niters;
1275 /* NITERS is the number of times that the original scalar loop executes
1276 after peeling. Work out the maximum number of iterations N that can
1277 be handled by the vectorized form of the loop and then either:
1279 a) set *STEP_VECTOR_PTR to the vectorization factor and generate:
1281 niters_vector = N
1283 b) set *STEP_VECTOR_PTR to one and generate:
1285 niters_vector = N / vf
1287 In both cases, store niters_vector in *NITERS_VECTOR_PTR and add
1288 any new statements on the loop preheader edge. NITERS_NO_OVERFLOW
1289 is true if NITERS doesn't overflow (i.e. if NITERS is always nonzero). */
1291 void
1292 vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters,
1293 tree *niters_vector_ptr, tree *step_vector_ptr,
1294 bool niters_no_overflow)
1296 tree ni_minus_gap, var;
1297 tree niters_vector, step_vector, type = TREE_TYPE (niters);
1298 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1299 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1300 tree log_vf = NULL_TREE;
1302 /* If epilogue loop is required because of data accesses with gaps, we
1303 subtract one iteration from the total number of iterations here for
1304 correct calculation of RATIO. */
1305 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1307 ni_minus_gap = fold_build2 (MINUS_EXPR, type, niters,
1308 build_one_cst (type));
1309 if (!is_gimple_val (ni_minus_gap))
1311 var = create_tmp_var (type, "ni_gap");
1312 gimple *stmts = NULL;
1313 ni_minus_gap = force_gimple_operand (ni_minus_gap, &stmts,
1314 true, var);
1315 gsi_insert_seq_on_edge_immediate (pe, stmts);
1318 else
1319 ni_minus_gap = niters;
1321 unsigned HOST_WIDE_INT const_vf;
1322 if (vf.is_constant (&const_vf))
1324 /* Create: niters >> log2(vf) */
1325 /* If it's known that niters == number of latch executions + 1 doesn't
1326 overflow, we can generate niters >> log2(vf); otherwise we generate
1327 (niters - vf) >> log2(vf) + 1 by using the fact that we know ratio
1328 will be at least one. */
1329 log_vf = build_int_cst (type, exact_log2 (const_vf));
1330 if (niters_no_overflow)
1331 niters_vector = fold_build2 (RSHIFT_EXPR, type, ni_minus_gap, log_vf);
1332 else
1333 niters_vector
1334 = fold_build2 (PLUS_EXPR, type,
1335 fold_build2 (RSHIFT_EXPR, type,
1336 fold_build2 (MINUS_EXPR, type,
1337 ni_minus_gap,
1338 build_int_cst (type, vf)),
1339 log_vf),
1340 build_int_cst (type, 1));
1341 step_vector = build_one_cst (type);
1343 else
1345 niters_vector = ni_minus_gap;
1346 step_vector = build_int_cst (type, vf);
1349 if (!is_gimple_val (niters_vector))
1351 var = create_tmp_var (type, "bnd");
1352 gimple_seq stmts = NULL;
1353 niters_vector = force_gimple_operand (niters_vector, &stmts, true, var);
1354 gsi_insert_seq_on_edge_immediate (pe, stmts);
1355 /* Peeling algorithm guarantees that vector loop bound is at least ONE,
1356 we set range information to make niters analyzer's life easier. */
1357 if (stmts != NULL && log_vf)
1358 set_range_info (niters_vector, VR_RANGE,
1359 wi::to_wide (build_int_cst (type, 1)),
1360 wi::to_wide (fold_build2 (RSHIFT_EXPR, type,
1361 TYPE_MAX_VALUE (type),
1362 log_vf)));
1364 *niters_vector_ptr = niters_vector;
1365 *step_vector_ptr = step_vector;
1367 return;
1370 /* Given NITERS_VECTOR which is the number of iterations for vectorized
1371 loop specified by LOOP_VINFO after vectorization, compute the number
1372 of iterations before vectorization (niters_vector * vf) and store it
1373 to NITERS_VECTOR_MULT_VF_PTR. */
1375 static void
1376 vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo,
1377 tree niters_vector,
1378 tree *niters_vector_mult_vf_ptr)
1380 /* We should be using a step_vector of VF if VF is variable. */
1381 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ();
1382 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1383 tree type = TREE_TYPE (niters_vector);
1384 tree log_vf = build_int_cst (type, exact_log2 (vf));
1385 basic_block exit_bb = single_exit (loop)->dest;
1387 gcc_assert (niters_vector_mult_vf_ptr != NULL);
1388 tree niters_vector_mult_vf = fold_build2 (LSHIFT_EXPR, type,
1389 niters_vector, log_vf);
1390 if (!is_gimple_val (niters_vector_mult_vf))
1392 tree var = create_tmp_var (type, "niters_vector_mult_vf");
1393 gimple_seq stmts = NULL;
1394 niters_vector_mult_vf = force_gimple_operand (niters_vector_mult_vf,
1395 &stmts, true, var);
1396 gimple_stmt_iterator gsi = gsi_start_bb (exit_bb);
1397 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1399 *niters_vector_mult_vf_ptr = niters_vector_mult_vf;
1402 /* Function slpeel_tree_duplicate_loop_to_edge_cfg duplciates FIRST/SECOND
1403 from SECOND/FIRST and puts it at the original loop's preheader/exit
1404 edge, the two loops are arranged as below:
1406 preheader_a:
1407 first_loop:
1408 header_a:
1409 i_1 = PHI<i_0, i_2>;
1411 i_2 = i_1 + 1;
1412 if (cond_a)
1413 goto latch_a;
1414 else
1415 goto between_bb;
1416 latch_a:
1417 goto header_a;
1419 between_bb:
1420 ;; i_x = PHI<i_2>; ;; LCSSA phi node to be created for FIRST,
1422 second_loop:
1423 header_b:
1424 i_3 = PHI<i_0, i_4>; ;; Use of i_0 to be replaced with i_x,
1425 or with i_2 if no LCSSA phi is created
1426 under condition of CREATE_LCSSA_FOR_IV_PHIS.
1428 i_4 = i_3 + 1;
1429 if (cond_b)
1430 goto latch_b;
1431 else
1432 goto exit_bb;
1433 latch_b:
1434 goto header_b;
1436 exit_bb:
1438 This function creates loop closed SSA for the first loop; update the
1439 second loop's PHI nodes by replacing argument on incoming edge with the
1440 result of newly created lcssa PHI nodes. IF CREATE_LCSSA_FOR_IV_PHIS
1441 is false, Loop closed ssa phis will only be created for non-iv phis for
1442 the first loop.
1444 This function assumes exit bb of the first loop is preheader bb of the
1445 second loop, i.e, between_bb in the example code. With PHIs updated,
1446 the second loop will execute rest iterations of the first. */
1448 static void
1449 slpeel_update_phi_nodes_for_loops (loop_vec_info loop_vinfo,
1450 struct loop *first, struct loop *second,
1451 bool create_lcssa_for_iv_phis)
1453 gphi_iterator gsi_update, gsi_orig;
1454 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1456 edge first_latch_e = EDGE_SUCC (first->latch, 0);
1457 edge second_preheader_e = loop_preheader_edge (second);
1458 basic_block between_bb = single_exit (first)->dest;
1460 gcc_assert (between_bb == second_preheader_e->src);
1461 gcc_assert (single_pred_p (between_bb) && single_succ_p (between_bb));
1462 /* Either the first loop or the second is the loop to be vectorized. */
1463 gcc_assert (loop == first || loop == second);
1465 for (gsi_orig = gsi_start_phis (first->header),
1466 gsi_update = gsi_start_phis (second->header);
1467 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
1468 gsi_next (&gsi_orig), gsi_next (&gsi_update))
1470 gphi *orig_phi = gsi_orig.phi ();
1471 gphi *update_phi = gsi_update.phi ();
1473 tree arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, first_latch_e);
1474 /* Generate lcssa PHI node for the first loop. */
1475 gphi *vect_phi = (loop == first) ? orig_phi : update_phi;
1476 if (create_lcssa_for_iv_phis || !iv_phi_p (vect_phi))
1478 tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
1479 gphi *lcssa_phi = create_phi_node (new_res, between_bb);
1480 add_phi_arg (lcssa_phi, arg, single_exit (first), UNKNOWN_LOCATION);
1481 arg = new_res;
1484 /* Update PHI node in the second loop by replacing arg on the loop's
1485 incoming edge. */
1486 adjust_phi_and_debug_stmts (update_phi, second_preheader_e, arg);
1490 /* Function slpeel_add_loop_guard adds guard skipping from the beginning
1491 of SKIP_LOOP to the beginning of UPDATE_LOOP. GUARD_EDGE and MERGE_EDGE
1492 are two pred edges of the merge point before UPDATE_LOOP. The two loops
1493 appear like below:
1495 guard_bb:
1496 if (cond)
1497 goto merge_bb;
1498 else
1499 goto skip_loop;
1501 skip_loop:
1502 header_a:
1503 i_1 = PHI<i_0, i_2>;
1505 i_2 = i_1 + 1;
1506 if (cond_a)
1507 goto latch_a;
1508 else
1509 goto exit_a;
1510 latch_a:
1511 goto header_a;
1513 exit_a:
1514 i_5 = PHI<i_2>;
1516 merge_bb:
1517 ;; PHI (i_x = PHI<i_0, i_5>) to be created at merge point.
1519 update_loop:
1520 header_b:
1521 i_3 = PHI<i_5, i_4>; ;; Use of i_5 to be replaced with i_x.
1523 i_4 = i_3 + 1;
1524 if (cond_b)
1525 goto latch_b;
1526 else
1527 goto exit_bb;
1528 latch_b:
1529 goto header_b;
1531 exit_bb:
1533 This function creates PHI nodes at merge_bb and replaces the use of i_5
1534 in the update_loop's PHI node with the result of new PHI result. */
1536 static void
1537 slpeel_update_phi_nodes_for_guard1 (struct loop *skip_loop,
1538 struct loop *update_loop,
1539 edge guard_edge, edge merge_edge)
1541 source_location merge_loc, guard_loc;
1542 edge orig_e = loop_preheader_edge (skip_loop);
1543 edge update_e = loop_preheader_edge (update_loop);
1544 gphi_iterator gsi_orig, gsi_update;
1546 for ((gsi_orig = gsi_start_phis (skip_loop->header),
1547 gsi_update = gsi_start_phis (update_loop->header));
1548 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
1549 gsi_next (&gsi_orig), gsi_next (&gsi_update))
1551 gphi *orig_phi = gsi_orig.phi ();
1552 gphi *update_phi = gsi_update.phi ();
1554 /* Generate new phi node at merge bb of the guard. */
1555 tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
1556 gphi *new_phi = create_phi_node (new_res, guard_edge->dest);
1558 /* Merge bb has two incoming edges: GUARD_EDGE and MERGE_EDGE. Set the
1559 args in NEW_PHI for these edges. */
1560 tree merge_arg = PHI_ARG_DEF_FROM_EDGE (update_phi, update_e);
1561 tree guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
1562 merge_loc = gimple_phi_arg_location_from_edge (update_phi, update_e);
1563 guard_loc = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
1564 add_phi_arg (new_phi, merge_arg, merge_edge, merge_loc);
1565 add_phi_arg (new_phi, guard_arg, guard_edge, guard_loc);
1567 /* Update phi in UPDATE_PHI. */
1568 adjust_phi_and_debug_stmts (update_phi, update_e, new_res);
1572 /* LCSSA_PHI is a lcssa phi of EPILOG loop which is copied from LOOP,
1573 this function searches for the corresponding lcssa phi node in exit
1574 bb of LOOP. If it is found, return the phi result; otherwise return
1575 NULL. */
1577 static tree
1578 find_guard_arg (struct loop *loop, struct loop *epilog ATTRIBUTE_UNUSED,
1579 gphi *lcssa_phi)
1581 gphi_iterator gsi;
1582 edge e = single_exit (loop);
1584 gcc_assert (single_pred_p (e->dest));
1585 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
1587 gphi *phi = gsi.phi ();
1588 if (operand_equal_p (PHI_ARG_DEF (phi, 0),
1589 PHI_ARG_DEF (lcssa_phi, 0), 0))
1590 return PHI_RESULT (phi);
1592 return NULL_TREE;
1595 /* LOOP and EPILOG are two consecutive loops in CFG and EPILOG is copied
1596 from LOOP. Function slpeel_add_loop_guard adds guard skipping from a
1597 point between the two loops to the end of EPILOG. Edges GUARD_EDGE
1598 and MERGE_EDGE are the two pred edges of merge_bb at the end of EPILOG.
1599 The CFG looks like:
1601 loop:
1602 header_a:
1603 i_1 = PHI<i_0, i_2>;
1605 i_2 = i_1 + 1;
1606 if (cond_a)
1607 goto latch_a;
1608 else
1609 goto exit_a;
1610 latch_a:
1611 goto header_a;
1613 exit_a:
1615 guard_bb:
1616 if (cond)
1617 goto merge_bb;
1618 else
1619 goto epilog_loop;
1621 ;; fall_through_bb
1623 epilog_loop:
1624 header_b:
1625 i_3 = PHI<i_2, i_4>;
1627 i_4 = i_3 + 1;
1628 if (cond_b)
1629 goto latch_b;
1630 else
1631 goto merge_bb;
1632 latch_b:
1633 goto header_b;
1635 merge_bb:
1636 ; PHI node (i_y = PHI<i_2, i_4>) to be created at merge point.
1638 exit_bb:
1639 i_x = PHI<i_4>; ;Use of i_4 to be replaced with i_y in merge_bb.
1641 For each name used out side EPILOG (i.e - for each name that has a lcssa
1642 phi in exit_bb) we create a new PHI in merge_bb. The new PHI has two
1643 args corresponding to GUARD_EDGE and MERGE_EDGE. Arg for MERGE_EDGE is
1644 the arg of the original PHI in exit_bb, arg for GUARD_EDGE is defined
1645 by LOOP and is found in the exit bb of LOOP. Arg of the original PHI
1646 in exit_bb will also be updated. */
1648 static void
1649 slpeel_update_phi_nodes_for_guard2 (struct loop *loop, struct loop *epilog,
1650 edge guard_edge, edge merge_edge)
1652 gphi_iterator gsi;
1653 basic_block merge_bb = guard_edge->dest;
1655 gcc_assert (single_succ_p (merge_bb));
1656 edge e = single_succ_edge (merge_bb);
1657 basic_block exit_bb = e->dest;
1658 gcc_assert (single_pred_p (exit_bb));
1659 gcc_assert (single_pred (exit_bb) == single_exit (epilog)->dest);
1661 for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1663 gphi *update_phi = gsi.phi ();
1664 tree old_arg = PHI_ARG_DEF (update_phi, 0);
1665 /* This loop-closed-phi actually doesn't represent a use out of the
1666 loop - the phi arg is a constant. */
1667 if (TREE_CODE (old_arg) != SSA_NAME)
1668 continue;
1670 tree merge_arg = get_current_def (old_arg);
1671 if (!merge_arg)
1672 merge_arg = old_arg;
1674 tree guard_arg = find_guard_arg (loop, epilog, update_phi);
1675 /* If the var is live after loop but not a reduction, we simply
1676 use the old arg. */
1677 if (!guard_arg)
1678 guard_arg = old_arg;
1680 /* Create new phi node in MERGE_BB: */
1681 tree new_res = copy_ssa_name (PHI_RESULT (update_phi));
1682 gphi *merge_phi = create_phi_node (new_res, merge_bb);
1684 /* MERGE_BB has two incoming edges: GUARD_EDGE and MERGE_EDGE, Set
1685 the two PHI args in merge_phi for these edges. */
1686 add_phi_arg (merge_phi, merge_arg, merge_edge, UNKNOWN_LOCATION);
1687 add_phi_arg (merge_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
1689 /* Update the original phi in exit_bb. */
1690 adjust_phi_and_debug_stmts (update_phi, e, new_res);
1694 /* EPILOG loop is duplicated from the original loop for vectorizing,
1695 the arg of its loop closed ssa PHI needs to be updated. */
1697 static void
1698 slpeel_update_phi_nodes_for_lcssa (struct loop *epilog)
1700 gphi_iterator gsi;
1701 basic_block exit_bb = single_exit (epilog)->dest;
1703 gcc_assert (single_pred_p (exit_bb));
1704 edge e = EDGE_PRED (exit_bb, 0);
1705 for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1706 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
1709 /* Function vect_do_peeling.
1711 Input:
1712 - LOOP_VINFO: Represent a loop to be vectorized, which looks like:
1714 preheader:
1715 LOOP:
1716 header_bb:
1717 loop_body
1718 if (exit_loop_cond) goto exit_bb
1719 else goto header_bb
1720 exit_bb:
1722 - NITERS: The number of iterations of the loop.
1723 - NITERSM1: The number of iterations of the loop's latch.
1724 - NITERS_NO_OVERFLOW: No overflow in computing NITERS.
1725 - TH, CHECK_PROFITABILITY: Threshold of niters to vectorize loop if
1726 CHECK_PROFITABILITY is true.
1727 Output:
1728 - *NITERS_VECTOR and *STEP_VECTOR describe how the main loop should
1729 iterate after vectorization; see slpeel_make_loop_iterate_ntimes
1730 for details.
1731 - *NITERS_VECTOR_MULT_VF_VAR is either null or an SSA name that
1732 should be set to the number of scalar iterations handled by the
1733 vector loop. The SSA name is only used on exit from the loop.
1735 This function peels prolog and epilog from the loop, adds guards skipping
1736 PROLOG and EPILOG for various conditions. As a result, the changed CFG
1737 would look like:
1739 guard_bb_1:
1740 if (prefer_scalar_loop) goto merge_bb_1
1741 else goto guard_bb_2
1743 guard_bb_2:
1744 if (skip_prolog) goto merge_bb_2
1745 else goto prolog_preheader
1747 prolog_preheader:
1748 PROLOG:
1749 prolog_header_bb:
1750 prolog_body
1751 if (exit_prolog_cond) goto prolog_exit_bb
1752 else goto prolog_header_bb
1753 prolog_exit_bb:
1755 merge_bb_2:
1757 vector_preheader:
1758 VECTOR LOOP:
1759 vector_header_bb:
1760 vector_body
1761 if (exit_vector_cond) goto vector_exit_bb
1762 else goto vector_header_bb
1763 vector_exit_bb:
1765 guard_bb_3:
1766 if (skip_epilog) goto merge_bb_3
1767 else goto epilog_preheader
1769 merge_bb_1:
1771 epilog_preheader:
1772 EPILOG:
1773 epilog_header_bb:
1774 epilog_body
1775 if (exit_epilog_cond) goto merge_bb_3
1776 else goto epilog_header_bb
1778 merge_bb_3:
1780 Note this function peels prolog and epilog only if it's necessary,
1781 as well as guards.
1782 Returns created epilogue or NULL.
1784 TODO: Guard for prefer_scalar_loop should be emitted along with
1785 versioning conditions if loop versioning is needed. */
1788 struct loop *
1789 vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
1790 tree *niters_vector, tree *step_vector,
1791 tree *niters_vector_mult_vf_var, int th,
1792 bool check_profitability, bool niters_no_overflow)
1794 edge e, guard_e;
1795 tree type = TREE_TYPE (niters), guard_cond;
1796 basic_block guard_bb, guard_to;
1797 profile_probability prob_prolog, prob_vector, prob_epilog;
1798 int bound_prolog = 0;
1799 poly_uint64 bound_scalar = 0;
1800 int estimated_vf;
1801 int prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1802 bool epilog_peeling = (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)
1803 || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
1805 if (!prolog_peeling && !epilog_peeling)
1806 return NULL;
1808 prob_vector = profile_probability::guessed_always ().apply_scale (9, 10);
1809 estimated_vf = vect_vf_for_cost (loop_vinfo);
1810 if (estimated_vf == 2)
1811 estimated_vf = 3;
1812 prob_prolog = prob_epilog = profile_probability::guessed_always ()
1813 .apply_scale (estimated_vf - 1, estimated_vf);
1814 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1816 struct loop *prolog, *epilog = NULL, *loop = LOOP_VINFO_LOOP (loop_vinfo);
1817 struct loop *first_loop = loop;
1818 bool irred_flag = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP;
1819 create_lcssa_for_virtual_phi (loop);
1820 update_ssa (TODO_update_ssa_only_virtuals);
1822 if (MAY_HAVE_DEBUG_BIND_STMTS)
1824 gcc_assert (!adjust_vec.exists ());
1825 adjust_vec.create (32);
1827 initialize_original_copy_tables ();
1829 /* Prolog loop may be skipped. */
1830 bool skip_prolog = (prolog_peeling != 0);
1831 /* Skip to epilog if scalar loop may be preferred. It's only needed
1832 when we peel for epilog loop and when it hasn't been checked with
1833 loop versioning. */
1834 bool skip_vector = ((!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1835 && !LOOP_REQUIRES_VERSIONING (loop_vinfo))
1836 || !vf.is_constant ());
1837 /* Epilog loop must be executed if the number of iterations for epilog
1838 loop is known at compile time, otherwise we need to add a check at
1839 the end of vector loop and skip to the end of epilog loop. */
1840 bool skip_epilog = (prolog_peeling < 0
1841 || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1842 || !vf.is_constant ());
1843 /* PEELING_FOR_GAPS is special because epilog loop must be executed. */
1844 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1845 skip_epilog = false;
1847 /* Record the anchor bb at which guard should be placed if scalar loop
1848 may be preferred. */
1849 basic_block anchor = loop_preheader_edge (loop)->src;
1850 if (skip_vector)
1852 split_edge (loop_preheader_edge (loop));
1854 /* Due to the order in which we peel prolog and epilog, we first
1855 propagate probability to the whole loop. The purpose is to
1856 avoid adjusting probabilities of both prolog and vector loops
1857 separately. Note in this case, the probability of epilog loop
1858 needs to be scaled back later. */
1859 basic_block bb_before_loop = loop_preheader_edge (loop)->src;
1860 if (prob_vector.initialized_p ())
1862 scale_bbs_frequencies (&bb_before_loop, 1, prob_vector);
1863 scale_loop_profile (loop, prob_vector, 0);
1867 tree niters_prolog = build_int_cst (type, 0);
1868 source_location loop_loc = find_loop_location (loop);
1869 struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
1870 if (prolog_peeling)
1872 e = loop_preheader_edge (loop);
1873 if (!slpeel_can_duplicate_loop_p (loop, e))
1875 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1876 "loop can't be duplicated to preheader edge.\n");
1877 gcc_unreachable ();
1879 /* Peel prolog and put it on preheader edge of loop. */
1880 prolog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
1881 if (!prolog)
1883 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1884 "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1885 gcc_unreachable ();
1887 slpeel_update_phi_nodes_for_loops (loop_vinfo, prolog, loop, true);
1888 first_loop = prolog;
1889 reset_original_copy_tables ();
1891 /* Generate and update the number of iterations for prolog loop. */
1892 niters_prolog = vect_gen_prolog_loop_niters (loop_vinfo, anchor,
1893 &bound_prolog);
1894 tree step_prolog = build_one_cst (TREE_TYPE (niters_prolog));
1895 slpeel_make_loop_iterate_ntimes (prolog, niters_prolog, step_prolog,
1896 NULL_TREE, false);
1898 /* Skip the prolog loop. */
1899 if (skip_prolog)
1901 guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
1902 niters_prolog, build_int_cst (type, 0));
1903 guard_bb = loop_preheader_edge (prolog)->src;
1904 basic_block bb_after_prolog = loop_preheader_edge (loop)->src;
1905 guard_to = split_edge (loop_preheader_edge (loop));
1906 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
1907 guard_to, guard_bb,
1908 prob_prolog.invert (),
1909 irred_flag);
1910 e = EDGE_PRED (guard_to, 0);
1911 e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
1912 slpeel_update_phi_nodes_for_guard1 (prolog, loop, guard_e, e);
1914 scale_bbs_frequencies (&bb_after_prolog, 1, prob_prolog);
1915 scale_loop_profile (prolog, prob_prolog, bound_prolog);
1917 /* Update init address of DRs. */
1918 vect_update_inits_of_drs (loop_vinfo, niters_prolog);
1919 /* Update niters for vector loop. */
1920 LOOP_VINFO_NITERS (loop_vinfo)
1921 = fold_build2 (MINUS_EXPR, type, niters, niters_prolog);
1922 LOOP_VINFO_NITERSM1 (loop_vinfo)
1923 = fold_build2 (MINUS_EXPR, type,
1924 LOOP_VINFO_NITERSM1 (loop_vinfo), niters_prolog);
1925 bool new_var_p = false;
1926 niters = vect_build_loop_niters (loop_vinfo, &new_var_p);
1927 /* It's guaranteed that vector loop bound before vectorization is at
1928 least VF, so set range information for newly generated var. */
1929 if (new_var_p)
1930 set_range_info (niters, VR_RANGE,
1931 wi::to_wide (build_int_cst (type, vf)),
1932 wi::to_wide (TYPE_MAX_VALUE (type)));
1934 /* Prolog iterates at most bound_prolog times, latch iterates at
1935 most bound_prolog - 1 times. */
1936 record_niter_bound (prolog, bound_prolog - 1, false, true);
1937 delete_update_ssa ();
1938 adjust_vec_debug_stmts ();
1939 scev_reset ();
1942 if (epilog_peeling)
1944 e = single_exit (loop);
1945 if (!slpeel_can_duplicate_loop_p (loop, e))
1947 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1948 "loop can't be duplicated to exit edge.\n");
1949 gcc_unreachable ();
1951 /* Peel epilog and put it on exit edge of loop. */
1952 epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
1953 if (!epilog)
1955 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1956 "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1957 gcc_unreachable ();
1959 slpeel_update_phi_nodes_for_loops (loop_vinfo, loop, epilog, false);
1961 /* Scalar version loop may be preferred. In this case, add guard
1962 and skip to epilog. Note this only happens when the number of
1963 iterations of loop is unknown at compile time, otherwise this
1964 won't be vectorized. */
1965 if (skip_vector)
1967 /* Additional epilogue iteration is peeled if gap exists. */
1968 bool peel_for_gaps = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo);
1969 tree t = vect_gen_scalar_loop_niters (niters_prolog, prolog_peeling,
1970 bound_prolog,
1971 peel_for_gaps ? vf : vf - 1,
1972 th, &bound_scalar,
1973 check_profitability);
1974 /* Build guard against NITERSM1 since NITERS may overflow. */
1975 guard_cond = fold_build2 (LT_EXPR, boolean_type_node, nitersm1, t);
1976 guard_bb = anchor;
1977 guard_to = split_edge (loop_preheader_edge (epilog));
1978 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
1979 guard_to, guard_bb,
1980 prob_vector.invert (),
1981 irred_flag);
1982 e = EDGE_PRED (guard_to, 0);
1983 e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
1984 slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e);
1986 /* Simply propagate profile info from guard_bb to guard_to which is
1987 a merge point of control flow. */
1988 guard_to->count = guard_bb->count;
1990 /* Scale probability of epilog loop back.
1991 FIXME: We should avoid scaling down and back up. Profile may
1992 get lost if we scale down to 0. */
1993 basic_block *bbs = get_loop_body (epilog);
1994 for (unsigned int i = 0; i < epilog->num_nodes; i++)
1995 bbs[i]->count = bbs[i]->count.apply_scale
1996 (bbs[i]->count,
1997 bbs[i]->count.apply_probability
1998 (prob_vector));
1999 free (bbs);
2002 basic_block bb_before_epilog = loop_preheader_edge (epilog)->src;
2003 tree niters_vector_mult_vf;
2004 /* If loop is peeled for non-zero constant times, now niters refers to
2005 orig_niters - prolog_peeling, it won't overflow even the orig_niters
2006 overflows. */
2007 niters_no_overflow |= (prolog_peeling > 0);
2008 vect_gen_vector_loop_niters (loop_vinfo, niters,
2009 niters_vector, step_vector,
2010 niters_no_overflow);
2011 if (!integer_onep (*step_vector))
2013 /* On exit from the loop we will have an easy way of calcalating
2014 NITERS_VECTOR / STEP * STEP. Install a dummy definition
2015 until then. */
2016 niters_vector_mult_vf = make_ssa_name (TREE_TYPE (*niters_vector));
2017 SSA_NAME_DEF_STMT (niters_vector_mult_vf) = gimple_build_nop ();
2018 *niters_vector_mult_vf_var = niters_vector_mult_vf;
2020 else
2021 vect_gen_vector_loop_niters_mult_vf (loop_vinfo, *niters_vector,
2022 &niters_vector_mult_vf);
2023 /* Update IVs of original loop as if they were advanced by
2024 niters_vector_mult_vf steps. */
2025 gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
2026 edge update_e = skip_vector ? e : loop_preheader_edge (epilog);
2027 vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf,
2028 update_e);
2030 if (skip_epilog)
2032 guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
2033 niters, niters_vector_mult_vf);
2034 guard_bb = single_exit (loop)->dest;
2035 guard_to = split_edge (single_exit (epilog));
2036 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond, guard_to,
2037 skip_vector ? anchor : guard_bb,
2038 prob_epilog.invert (),
2039 irred_flag);
2040 slpeel_update_phi_nodes_for_guard2 (loop, epilog, guard_e,
2041 single_exit (epilog));
2042 /* Only need to handle basic block before epilog loop if it's not
2043 the guard_bb, which is the case when skip_vector is true. */
2044 if (guard_bb != bb_before_epilog)
2046 prob_epilog = prob_vector * prob_epilog + prob_vector.invert ();
2048 scale_bbs_frequencies (&bb_before_epilog, 1, prob_epilog);
2050 scale_loop_profile (epilog, prob_epilog, 0);
2052 else
2053 slpeel_update_phi_nodes_for_lcssa (epilog);
2055 unsigned HOST_WIDE_INT bound1, bound2;
2056 if (vf.is_constant (&bound1) && bound_scalar.is_constant (&bound2))
2058 bound1 -= LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) ? 1 : 2;
2059 if (bound2)
2060 /* We share epilog loop with scalar version loop. */
2061 bound1 = MAX (bound1, bound2 - 1);
2062 record_niter_bound (epilog, bound1, false, true);
2065 delete_update_ssa ();
2066 adjust_vec_debug_stmts ();
2067 scev_reset ();
2069 adjust_vec.release ();
2070 free_original_copy_tables ();
2072 return epilog;
2075 /* Function vect_create_cond_for_niters_checks.
2077 Create a conditional expression that represents the run-time checks for
2078 loop's niter. The loop is guaranteed to to terminate if the run-time
2079 checks hold.
2081 Input:
2082 COND_EXPR - input conditional expression. New conditions will be chained
2083 with logical AND operation. If it is NULL, then the function
2084 is used to return the number of alias checks.
2085 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2086 to be checked.
2088 Output:
2089 COND_EXPR - conditional expression.
2091 The returned COND_EXPR is the conditional expression to be used in the
2092 if statement that controls which version of the loop gets executed at
2093 runtime. */
2095 static void
2096 vect_create_cond_for_niters_checks (loop_vec_info loop_vinfo, tree *cond_expr)
2098 tree part_cond_expr = LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo);
2100 if (*cond_expr)
2101 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2102 *cond_expr, part_cond_expr);
2103 else
2104 *cond_expr = part_cond_expr;
2107 /* Set *COND_EXPR to a tree that is true when both the original *COND_EXPR
2108 and PART_COND_EXPR are true. Treat a null *COND_EXPR as "true". */
2110 static void
2111 chain_cond_expr (tree *cond_expr, tree part_cond_expr)
2113 if (*cond_expr)
2114 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2115 *cond_expr, part_cond_expr);
2116 else
2117 *cond_expr = part_cond_expr;
2120 /* Function vect_create_cond_for_align_checks.
2122 Create a conditional expression that represents the alignment checks for
2123 all of data references (array element references) whose alignment must be
2124 checked at runtime.
2126 Input:
2127 COND_EXPR - input conditional expression. New conditions will be chained
2128 with logical AND operation.
2129 LOOP_VINFO - two fields of the loop information are used.
2130 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
2131 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
2133 Output:
2134 COND_EXPR_STMT_LIST - statements needed to construct the conditional
2135 expression.
2136 The returned value is the conditional expression to be used in the if
2137 statement that controls which version of the loop gets executed at runtime.
2139 The algorithm makes two assumptions:
2140 1) The number of bytes "n" in a vector is a power of 2.
2141 2) An address "a" is aligned if a%n is zero and that this
2142 test can be done as a&(n-1) == 0. For example, for 16
2143 byte vectors the test is a&0xf == 0. */
2145 static void
2146 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
2147 tree *cond_expr,
2148 gimple_seq *cond_expr_stmt_list)
2150 vec<gimple *> may_misalign_stmts
2151 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2152 gimple *ref_stmt;
2153 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
2154 tree mask_cst;
2155 unsigned int i;
2156 tree int_ptrsize_type;
2157 char tmp_name[20];
2158 tree or_tmp_name = NULL_TREE;
2159 tree and_tmp_name;
2160 gimple *and_stmt;
2161 tree ptrsize_zero;
2162 tree part_cond_expr;
2164 /* Check that mask is one less than a power of 2, i.e., mask is
2165 all zeros followed by all ones. */
2166 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2168 int_ptrsize_type = signed_type_for (ptr_type_node);
2170 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2171 of the first vector of the i'th data reference. */
2173 FOR_EACH_VEC_ELT (may_misalign_stmts, i, ref_stmt)
2175 gimple_seq new_stmt_list = NULL;
2176 tree addr_base;
2177 tree addr_tmp_name;
2178 tree new_or_tmp_name;
2179 gimple *addr_stmt, *or_stmt;
2180 stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
2181 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2182 bool negative = tree_int_cst_compare
2183 (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
2184 tree offset = negative
2185 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
2187 /* create: addr_tmp = (int)(address_of_first_vector) */
2188 addr_base =
2189 vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
2190 offset);
2191 if (new_stmt_list != NULL)
2192 gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
2194 sprintf (tmp_name, "addr2int%d", i);
2195 addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2196 addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base);
2197 gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
2199 /* The addresses are OR together. */
2201 if (or_tmp_name != NULL_TREE)
2203 /* create: or_tmp = or_tmp | addr_tmp */
2204 sprintf (tmp_name, "orptrs%d", i);
2205 new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2206 or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
2207 or_tmp_name, addr_tmp_name);
2208 gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
2209 or_tmp_name = new_or_tmp_name;
2211 else
2212 or_tmp_name = addr_tmp_name;
2214 } /* end for i */
2216 mask_cst = build_int_cst (int_ptrsize_type, mask);
2218 /* create: and_tmp = or_tmp & mask */
2219 and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
2221 and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR,
2222 or_tmp_name, mask_cst);
2223 gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
2225 /* Make and_tmp the left operand of the conditional test against zero.
2226 if and_tmp has a nonzero bit then some address is unaligned. */
2227 ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2228 part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
2229 and_tmp_name, ptrsize_zero);
2230 chain_cond_expr (cond_expr, part_cond_expr);
2233 /* If LOOP_VINFO_CHECK_UNEQUAL_ADDRS contains <A1, B1>, ..., <An, Bn>,
2234 create a tree representation of: (&A1 != &B1) && ... && (&An != &Bn).
2235 Set *COND_EXPR to a tree that is true when both the original *COND_EXPR
2236 and this new condition are true. Treat a null *COND_EXPR as "true". */
2238 static void
2239 vect_create_cond_for_unequal_addrs (loop_vec_info loop_vinfo, tree *cond_expr)
2241 vec<vec_object_pair> pairs = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo);
2242 unsigned int i;
2243 vec_object_pair *pair;
2244 FOR_EACH_VEC_ELT (pairs, i, pair)
2246 tree addr1 = build_fold_addr_expr (pair->first);
2247 tree addr2 = build_fold_addr_expr (pair->second);
2248 tree part_cond_expr = fold_build2 (NE_EXPR, boolean_type_node,
2249 addr1, addr2);
2250 chain_cond_expr (cond_expr, part_cond_expr);
2254 /* Function vect_create_cond_for_alias_checks.
2256 Create a conditional expression that represents the run-time checks for
2257 overlapping of address ranges represented by a list of data references
2258 relations passed as input.
2260 Input:
2261 COND_EXPR - input conditional expression. New conditions will be chained
2262 with logical AND operation. If it is NULL, then the function
2263 is used to return the number of alias checks.
2264 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2265 to be checked.
2267 Output:
2268 COND_EXPR - conditional expression.
2270 The returned COND_EXPR is the conditional expression to be used in the if
2271 statement that controls which version of the loop gets executed at runtime.
2274 void
2275 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
2277 vec<dr_with_seg_len_pair_t> comp_alias_ddrs =
2278 LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
2280 if (comp_alias_ddrs.is_empty ())
2281 return;
2283 create_runtime_alias_checks (LOOP_VINFO_LOOP (loop_vinfo),
2284 &comp_alias_ddrs, cond_expr);
2285 if (dump_enabled_p ())
2286 dump_printf_loc (MSG_NOTE, vect_location,
2287 "created %u versioning for alias checks.\n",
2288 comp_alias_ddrs.length ());
2292 /* Function vect_loop_versioning.
2294 If the loop has data references that may or may not be aligned or/and
2295 has data reference relations whose independence was not proven then
2296 two versions of the loop need to be generated, one which is vectorized
2297 and one which isn't. A test is then generated to control which of the
2298 loops is executed. The test checks for the alignment of all of the
2299 data references that may or may not be aligned. An additional
2300 sequence of runtime tests is generated for each pairs of DDRs whose
2301 independence was not proven. The vectorized version of loop is
2302 executed only if both alias and alignment tests are passed.
2304 The test generated to check which version of loop is executed
2305 is modified to also check for profitability as indicated by the
2306 cost model threshold TH.
2308 The versioning precondition(s) are placed in *COND_EXPR and
2309 *COND_EXPR_STMT_LIST. */
2311 void
2312 vect_loop_versioning (loop_vec_info loop_vinfo,
2313 unsigned int th, bool check_profitability,
2314 poly_uint64 versioning_threshold)
2316 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *nloop;
2317 struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
2318 basic_block condition_bb;
2319 gphi_iterator gsi;
2320 gimple_stmt_iterator cond_exp_gsi;
2321 basic_block merge_bb;
2322 basic_block new_exit_bb;
2323 edge new_exit_e, e;
2324 gphi *orig_phi, *new_phi;
2325 tree cond_expr = NULL_TREE;
2326 gimple_seq cond_expr_stmt_list = NULL;
2327 tree arg;
2328 profile_probability prob = profile_probability::likely ();
2329 gimple_seq gimplify_stmt_list = NULL;
2330 tree scalar_loop_iters = LOOP_VINFO_NITERSM1 (loop_vinfo);
2331 bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
2332 bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
2333 bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo);
2335 if (check_profitability)
2336 cond_expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
2337 build_int_cst (TREE_TYPE (scalar_loop_iters),
2338 th - 1));
2339 if (maybe_ne (versioning_threshold, 0U))
2341 tree expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
2342 build_int_cst (TREE_TYPE (scalar_loop_iters),
2343 versioning_threshold - 1));
2344 if (cond_expr)
2345 cond_expr = fold_build2 (BIT_AND_EXPR, boolean_type_node,
2346 expr, cond_expr);
2347 else
2348 cond_expr = expr;
2351 if (version_niter)
2352 vect_create_cond_for_niters_checks (loop_vinfo, &cond_expr);
2354 if (cond_expr)
2355 cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
2356 is_gimple_condexpr, NULL_TREE);
2358 if (version_align)
2359 vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
2360 &cond_expr_stmt_list);
2362 if (version_alias)
2364 vect_create_cond_for_unequal_addrs (loop_vinfo, &cond_expr);
2365 vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
2368 cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,
2369 is_gimple_condexpr, NULL_TREE);
2370 gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
2372 initialize_original_copy_tables ();
2373 if (scalar_loop)
2375 edge scalar_e;
2376 basic_block preheader, scalar_preheader;
2378 /* We don't want to scale SCALAR_LOOP's frequencies, we need to
2379 scale LOOP's frequencies instead. */
2380 nloop = loop_version (scalar_loop, cond_expr, &condition_bb,
2381 prob, prob.invert (), prob, prob.invert (), true);
2382 scale_loop_frequencies (loop, prob);
2383 /* CONDITION_BB was created above SCALAR_LOOP's preheader,
2384 while we need to move it above LOOP's preheader. */
2385 e = loop_preheader_edge (loop);
2386 scalar_e = loop_preheader_edge (scalar_loop);
2387 gcc_assert (empty_block_p (e->src)
2388 && single_pred_p (e->src));
2389 gcc_assert (empty_block_p (scalar_e->src)
2390 && single_pred_p (scalar_e->src));
2391 gcc_assert (single_pred_p (condition_bb));
2392 preheader = e->src;
2393 scalar_preheader = scalar_e->src;
2394 scalar_e = find_edge (condition_bb, scalar_preheader);
2395 e = single_pred_edge (preheader);
2396 redirect_edge_and_branch_force (single_pred_edge (condition_bb),
2397 scalar_preheader);
2398 redirect_edge_and_branch_force (scalar_e, preheader);
2399 redirect_edge_and_branch_force (e, condition_bb);
2400 set_immediate_dominator (CDI_DOMINATORS, condition_bb,
2401 single_pred (condition_bb));
2402 set_immediate_dominator (CDI_DOMINATORS, scalar_preheader,
2403 single_pred (scalar_preheader));
2404 set_immediate_dominator (CDI_DOMINATORS, preheader,
2405 condition_bb);
2407 else
2408 nloop = loop_version (loop, cond_expr, &condition_bb,
2409 prob, prob.invert (), prob, prob.invert (), true);
2411 if (version_niter)
2413 /* The versioned loop could be infinite, we need to clear existing
2414 niter information which is copied from the original loop. */
2415 gcc_assert (loop_constraint_set_p (loop, LOOP_C_FINITE));
2416 vect_free_loop_info_assumptions (nloop);
2417 /* And set constraint LOOP_C_INFINITE for niter analyzer. */
2418 loop_constraint_set (loop, LOOP_C_INFINITE);
2421 if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
2422 && dump_enabled_p ())
2424 if (version_alias)
2425 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2426 "loop versioned for vectorization because of "
2427 "possible aliasing\n");
2428 if (version_align)
2429 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2430 "loop versioned for vectorization to enhance "
2431 "alignment\n");
2434 free_original_copy_tables ();
2436 /* Loop versioning violates an assumption we try to maintain during
2437 vectorization - that the loop exit block has a single predecessor.
2438 After versioning, the exit block of both loop versions is the same
2439 basic block (i.e. it has two predecessors). Just in order to simplify
2440 following transformations in the vectorizer, we fix this situation
2441 here by adding a new (empty) block on the exit-edge of the loop,
2442 with the proper loop-exit phis to maintain loop-closed-form.
2443 If loop versioning wasn't done from loop, but scalar_loop instead,
2444 merge_bb will have already just a single successor. */
2446 merge_bb = single_exit (loop)->dest;
2447 if (scalar_loop == NULL || EDGE_COUNT (merge_bb->preds) >= 2)
2449 gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
2450 new_exit_bb = split_edge (single_exit (loop));
2451 new_exit_e = single_exit (loop);
2452 e = EDGE_SUCC (new_exit_bb, 0);
2454 for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2456 tree new_res;
2457 orig_phi = gsi.phi ();
2458 new_res = copy_ssa_name (PHI_RESULT (orig_phi));
2459 new_phi = create_phi_node (new_res, new_exit_bb);
2460 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2461 add_phi_arg (new_phi, arg, new_exit_e,
2462 gimple_phi_arg_location_from_edge (orig_phi, e));
2463 adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
2467 /* End loop-exit-fixes after versioning. */
2469 if (cond_expr_stmt_list)
2471 cond_exp_gsi = gsi_last_bb (condition_bb);
2472 gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
2473 GSI_SAME_STMT);
2475 update_ssa (TODO_update_ssa);