Add an alternative vector loop iv mechanism
[official-gcc.git] / gcc / tree-vect-loop-manip.c
blob098b42874cdc82c271149778b7bb51e60290e884
1 /* Vectorizer Specific Loop Manipulations
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "cfghooks.h"
29 #include "tree-pass.h"
30 #include "ssa.h"
31 #include "fold-const.h"
32 #include "cfganal.h"
33 #include "gimplify.h"
34 #include "gimple-iterator.h"
35 #include "gimplify-me.h"
36 #include "tree-cfg.h"
37 #include "tree-ssa-loop-manip.h"
38 #include "tree-into-ssa.h"
39 #include "tree-ssa.h"
40 #include "cfgloop.h"
41 #include "tree-scalar-evolution.h"
42 #include "tree-vectorizer.h"
43 #include "tree-ssa-loop-ivopts.h"
44 #include "gimple-fold.h"
46 /*************************************************************************
47 Simple Loop Peeling Utilities
49 Utilities to support loop peeling for vectorization purposes.
50 *************************************************************************/
53 /* Renames the use *OP_P. */
55 static void
56 rename_use_op (use_operand_p op_p)
58 tree new_name;
60 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
61 return;
63 new_name = get_current_def (USE_FROM_PTR (op_p));
65 /* Something defined outside of the loop. */
66 if (!new_name)
67 return;
69 /* An ordinary ssa name defined in the loop. */
71 SET_USE (op_p, new_name);
75 /* Renames the variables in basic block BB. Allow renaming of PHI arguments
76 on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
77 true. */
79 static void
80 rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop)
82 gimple *stmt;
83 use_operand_p use_p;
84 ssa_op_iter iter;
85 edge e;
86 edge_iterator ei;
87 struct loop *loop = bb->loop_father;
88 struct loop *outer_loop = NULL;
90 if (rename_from_outer_loop)
92 gcc_assert (loop);
93 outer_loop = loop_outer (loop);
96 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
97 gsi_next (&gsi))
99 stmt = gsi_stmt (gsi);
100 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
101 rename_use_op (use_p);
104 FOR_EACH_EDGE (e, ei, bb->preds)
106 if (!flow_bb_inside_loop_p (loop, e->src))
108 if (!rename_from_outer_loop)
109 continue;
110 if (e->src != outer_loop->header)
112 if (outer_loop->inner->next)
114 /* If outer_loop has 2 inner loops, allow there to
115 be an extra basic block which decides which of the
116 two loops to use using LOOP_VECTORIZED. */
117 if (!single_pred_p (e->src)
118 || single_pred (e->src) != outer_loop->header)
119 continue;
123 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
124 gsi_next (&gsi))
125 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
130 struct adjust_info
132 tree from, to;
133 basic_block bb;
136 /* A stack of values to be adjusted in debug stmts. We have to
137 process them LIFO, so that the closest substitution applies. If we
138 processed them FIFO, without the stack, we might substitute uses
139 with a PHI DEF that would soon become non-dominant, and when we got
140 to the suitable one, it wouldn't have anything to substitute any
141 more. */
142 static vec<adjust_info, va_heap> adjust_vec;
144 /* Adjust any debug stmts that referenced AI->from values to use the
145 loop-closed AI->to, if the references are dominated by AI->bb and
146 not by the definition of AI->from. */
148 static void
149 adjust_debug_stmts_now (adjust_info *ai)
151 basic_block bbphi = ai->bb;
152 tree orig_def = ai->from;
153 tree new_def = ai->to;
154 imm_use_iterator imm_iter;
155 gimple *stmt;
156 basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
158 gcc_assert (dom_info_available_p (CDI_DOMINATORS));
160 /* Adjust any debug stmts that held onto non-loop-closed
161 references. */
162 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
164 use_operand_p use_p;
165 basic_block bbuse;
167 if (!is_gimple_debug (stmt))
168 continue;
170 gcc_assert (gimple_debug_bind_p (stmt));
172 bbuse = gimple_bb (stmt);
174 if ((bbuse == bbphi
175 || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
176 && !(bbuse == bbdef
177 || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
179 if (new_def)
180 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
181 SET_USE (use_p, new_def);
182 else
184 gimple_debug_bind_reset_value (stmt);
185 update_stmt (stmt);
191 /* Adjust debug stmts as scheduled before. */
193 static void
194 adjust_vec_debug_stmts (void)
196 if (!MAY_HAVE_DEBUG_BIND_STMTS)
197 return;
199 gcc_assert (adjust_vec.exists ());
201 while (!adjust_vec.is_empty ())
203 adjust_debug_stmts_now (&adjust_vec.last ());
204 adjust_vec.pop ();
208 /* Adjust any debug stmts that referenced FROM values to use the
209 loop-closed TO, if the references are dominated by BB and not by
210 the definition of FROM. If adjust_vec is non-NULL, adjustments
211 will be postponed until adjust_vec_debug_stmts is called. */
213 static void
214 adjust_debug_stmts (tree from, tree to, basic_block bb)
216 adjust_info ai;
218 if (MAY_HAVE_DEBUG_BIND_STMTS
219 && TREE_CODE (from) == SSA_NAME
220 && ! SSA_NAME_IS_DEFAULT_DEF (from)
221 && ! virtual_operand_p (from))
223 ai.from = from;
224 ai.to = to;
225 ai.bb = bb;
227 if (adjust_vec.exists ())
228 adjust_vec.safe_push (ai);
229 else
230 adjust_debug_stmts_now (&ai);
234 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
235 to adjust any debug stmts that referenced the old phi arg,
236 presumably non-loop-closed references left over from other
237 transformations. */
239 static void
240 adjust_phi_and_debug_stmts (gimple *update_phi, edge e, tree new_def)
242 tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
244 SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
246 if (MAY_HAVE_DEBUG_BIND_STMTS)
247 adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
248 gimple_bb (update_phi));
251 /* Make LOOP iterate N == (NITERS - STEP) / STEP + 1 times,
252 where NITERS is known to be outside the range [1, STEP - 1].
253 This is equivalent to making the loop execute NITERS / STEP
254 times when NITERS is nonzero and (1 << M) / STEP times otherwise,
255 where M is the precision of NITERS.
257 NITERS_MAYBE_ZERO is true if NITERS can be zero, false it is known
258 to be >= STEP. In the latter case N is always NITERS / STEP.
260 If FINAL_IV is nonnull, it is an SSA name that should be set to
261 N * STEP on exit from the loop.
263 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
265 void
266 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters, tree step,
267 tree final_iv, bool niters_maybe_zero)
269 tree indx_before_incr, indx_after_incr;
270 gcond *cond_stmt;
271 gcond *orig_cond;
272 edge pe = loop_preheader_edge (loop);
273 edge exit_edge = single_exit (loop);
274 gimple_stmt_iterator loop_cond_gsi;
275 gimple_stmt_iterator incr_gsi;
276 bool insert_after;
277 source_location loop_loc;
278 enum tree_code code;
279 tree niters_type = TREE_TYPE (niters);
281 orig_cond = get_loop_exit_condition (loop);
282 gcc_assert (orig_cond);
283 loop_cond_gsi = gsi_for_stmt (orig_cond);
285 tree init, limit;
286 if (!niters_maybe_zero && integer_onep (step))
288 /* In this case we can use a simple 0-based IV:
291 x = 0;
295 x += 1;
297 while (x < NITERS); */
298 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
299 init = build_zero_cst (niters_type);
300 limit = niters;
302 else
304 /* The following works for all values of NITERS except 0:
307 x = 0;
311 x += STEP;
313 while (x <= NITERS - STEP);
315 so that the loop continues to iterate if x + STEP - 1 < NITERS
316 but stops if x + STEP - 1 >= NITERS.
318 However, if NITERS is zero, x never hits a value above NITERS - STEP
319 before wrapping around. There are two obvious ways of dealing with
320 this:
322 - start at STEP - 1 and compare x before incrementing it
323 - start at -1 and compare x after incrementing it
325 The latter is simpler and is what we use. The loop in this case
326 looks like:
329 x = -1;
333 x += STEP;
335 while (x < NITERS - STEP);
337 In both cases the loop limit is NITERS - STEP. */
338 gimple_seq seq = NULL;
339 limit = force_gimple_operand (niters, &seq, true, NULL_TREE);
340 limit = gimple_build (&seq, MINUS_EXPR, TREE_TYPE (limit), limit, step);
341 if (seq)
343 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
344 gcc_assert (!new_bb);
346 if (niters_maybe_zero)
348 /* Case C. */
349 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
350 init = build_all_ones_cst (niters_type);
352 else
354 /* Case B. */
355 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GT_EXPR : LE_EXPR;
356 init = build_zero_cst (niters_type);
360 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
361 create_iv (init, step, NULL_TREE, loop,
362 &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
364 indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
365 true, NULL_TREE, true,
366 GSI_SAME_STMT);
367 limit = force_gimple_operand_gsi (&loop_cond_gsi, limit, true, NULL_TREE,
368 true, GSI_SAME_STMT);
370 cond_stmt = gimple_build_cond (code, indx_after_incr, limit, NULL_TREE,
371 NULL_TREE);
373 gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
375 /* Remove old loop exit test: */
376 gsi_remove (&loop_cond_gsi, true);
377 free_stmt_vec_info (orig_cond);
379 loop_loc = find_loop_location (loop);
380 if (dump_enabled_p ())
382 if (LOCATION_LOCUS (loop_loc) != UNKNOWN_LOCATION)
383 dump_printf (MSG_NOTE, "\nloop at %s:%d: ", LOCATION_FILE (loop_loc),
384 LOCATION_LINE (loop_loc));
385 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, cond_stmt, 0);
388 /* Record the number of latch iterations. */
389 if (limit == niters)
390 /* Case A: the loop iterates NITERS times. Subtract one to get the
391 latch count. */
392 loop->nb_iterations = fold_build2 (MINUS_EXPR, niters_type, niters,
393 build_int_cst (niters_type, 1));
394 else
395 /* Case B or C: the loop iterates (NITERS - STEP) / STEP + 1 times.
396 Subtract one from this to get the latch count. */
397 loop->nb_iterations = fold_build2 (TRUNC_DIV_EXPR, niters_type,
398 limit, step);
400 if (final_iv)
402 gassign *assign = gimple_build_assign (final_iv, MINUS_EXPR,
403 indx_after_incr, init);
404 gsi_insert_on_edge_immediate (single_exit (loop), assign);
408 /* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg.
409 For all PHI arguments in FROM->dest and TO->dest from those
410 edges ensure that TO->dest PHI arguments have current_def
411 to that in from. */
413 static void
414 slpeel_duplicate_current_defs_from_edges (edge from, edge to)
416 gimple_stmt_iterator gsi_from, gsi_to;
418 for (gsi_from = gsi_start_phis (from->dest),
419 gsi_to = gsi_start_phis (to->dest);
420 !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);)
422 gimple *from_phi = gsi_stmt (gsi_from);
423 gimple *to_phi = gsi_stmt (gsi_to);
424 tree from_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, from);
425 tree to_arg = PHI_ARG_DEF_FROM_EDGE (to_phi, to);
426 if (virtual_operand_p (from_arg))
428 gsi_next (&gsi_from);
429 continue;
431 if (virtual_operand_p (to_arg))
433 gsi_next (&gsi_to);
434 continue;
436 if (TREE_CODE (from_arg) != SSA_NAME)
437 gcc_assert (operand_equal_p (from_arg, to_arg, 0));
438 else
440 if (get_current_def (to_arg) == NULL_TREE)
441 set_current_def (to_arg, get_current_def (from_arg));
443 gsi_next (&gsi_from);
444 gsi_next (&gsi_to);
447 gphi *from_phi = get_virtual_phi (from->dest);
448 gphi *to_phi = get_virtual_phi (to->dest);
449 if (from_phi)
450 set_current_def (PHI_ARG_DEF_FROM_EDGE (to_phi, to),
451 get_current_def (PHI_ARG_DEF_FROM_EDGE (from_phi, from)));
455 /* Given LOOP this function generates a new copy of it and puts it
456 on E which is either the entry or exit of LOOP. If SCALAR_LOOP is
457 non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
458 basic blocks from SCALAR_LOOP instead of LOOP, but to either the
459 entry or exit of LOOP. */
461 struct loop *
462 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop,
463 struct loop *scalar_loop, edge e)
465 struct loop *new_loop;
466 basic_block *new_bbs, *bbs, *pbbs;
467 bool at_exit;
468 bool was_imm_dom;
469 basic_block exit_dest;
470 edge exit, new_exit;
471 bool duplicate_outer_loop = false;
473 exit = single_exit (loop);
474 at_exit = (e == exit);
475 if (!at_exit && e != loop_preheader_edge (loop))
476 return NULL;
478 if (scalar_loop == NULL)
479 scalar_loop = loop;
481 bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
482 pbbs = bbs + 1;
483 get_loop_body_with_size (scalar_loop, pbbs, scalar_loop->num_nodes);
484 /* Allow duplication of outer loops. */
485 if (scalar_loop->inner)
486 duplicate_outer_loop = true;
487 /* Check whether duplication is possible. */
488 if (!can_copy_bbs_p (pbbs, scalar_loop->num_nodes))
490 free (bbs);
491 return NULL;
494 /* Generate new loop structure. */
495 new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
496 duplicate_subloops (scalar_loop, new_loop);
498 exit_dest = exit->dest;
499 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
500 exit_dest) == loop->header ?
501 true : false);
503 /* Also copy the pre-header, this avoids jumping through hoops to
504 duplicate the loop entry PHI arguments. Create an empty
505 pre-header unconditionally for this. */
506 basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
507 edge entry_e = single_pred_edge (preheader);
508 bbs[0] = preheader;
509 new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
511 exit = single_exit (scalar_loop);
512 copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
513 &exit, 1, &new_exit, NULL,
514 at_exit ? loop->latch : e->src, true);
515 exit = single_exit (loop);
516 basic_block new_preheader = new_bbs[0];
518 add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
520 if (scalar_loop != loop)
522 /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
523 SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop,
524 but LOOP will not. slpeel_update_phi_nodes_for_guard{1,2} expects
525 the LOOP SSA_NAMEs (on the exit edge and edge from latch to
526 header) to have current_def set, so copy them over. */
527 slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop),
528 exit);
529 slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop->latch,
531 EDGE_SUCC (loop->latch, 0));
534 if (at_exit) /* Add the loop copy at exit. */
536 if (scalar_loop != loop)
538 gphi_iterator gsi;
539 new_exit = redirect_edge_and_branch (new_exit, exit_dest);
541 for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi);
542 gsi_next (&gsi))
544 gphi *phi = gsi.phi ();
545 tree orig_arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
546 location_t orig_locus
547 = gimple_phi_arg_location_from_edge (phi, e);
549 add_phi_arg (phi, orig_arg, new_exit, orig_locus);
552 redirect_edge_and_branch_force (e, new_preheader);
553 flush_pending_stmts (e);
554 set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
555 if (was_imm_dom || duplicate_outer_loop)
556 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
558 /* And remove the non-necessary forwarder again. Keep the other
559 one so we have a proper pre-header for the loop at the exit edge. */
560 redirect_edge_pred (single_succ_edge (preheader),
561 single_pred (preheader));
562 delete_basic_block (preheader);
563 set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
564 loop_preheader_edge (scalar_loop)->src);
566 else /* Add the copy at entry. */
568 if (scalar_loop != loop)
570 /* Remove the non-necessary forwarder of scalar_loop again. */
571 redirect_edge_pred (single_succ_edge (preheader),
572 single_pred (preheader));
573 delete_basic_block (preheader);
574 set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
575 loop_preheader_edge (scalar_loop)->src);
576 preheader = split_edge (loop_preheader_edge (loop));
577 entry_e = single_pred_edge (preheader);
580 redirect_edge_and_branch_force (entry_e, new_preheader);
581 flush_pending_stmts (entry_e);
582 set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
584 redirect_edge_and_branch_force (new_exit, preheader);
585 flush_pending_stmts (new_exit);
586 set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
588 /* And remove the non-necessary forwarder again. Keep the other
589 one so we have a proper pre-header for the loop at the exit edge. */
590 redirect_edge_pred (single_succ_edge (new_preheader),
591 single_pred (new_preheader));
592 delete_basic_block (new_preheader);
593 set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
594 loop_preheader_edge (new_loop)->src);
597 /* Skip new preheader since it's deleted if copy loop is added at entry. */
598 for (unsigned i = (at_exit ? 0 : 1); i < scalar_loop->num_nodes + 1; i++)
599 rename_variables_in_bb (new_bbs[i], duplicate_outer_loop);
601 if (scalar_loop != loop)
603 /* Update new_loop->header PHIs, so that on the preheader
604 edge they are the ones from loop rather than scalar_loop. */
605 gphi_iterator gsi_orig, gsi_new;
606 edge orig_e = loop_preheader_edge (loop);
607 edge new_e = loop_preheader_edge (new_loop);
609 for (gsi_orig = gsi_start_phis (loop->header),
610 gsi_new = gsi_start_phis (new_loop->header);
611 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_new);
612 gsi_next (&gsi_orig), gsi_next (&gsi_new))
614 gphi *orig_phi = gsi_orig.phi ();
615 gphi *new_phi = gsi_new.phi ();
616 tree orig_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
617 location_t orig_locus
618 = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
620 add_phi_arg (new_phi, orig_arg, new_e, orig_locus);
624 free (new_bbs);
625 free (bbs);
627 checking_verify_dominators (CDI_DOMINATORS);
629 return new_loop;
633 /* Given the condition expression COND, put it as the last statement of
634 GUARD_BB; set both edges' probability; set dominator of GUARD_TO to
635 DOM_BB; return the skip edge. GUARD_TO is the target basic block to
636 skip the loop. PROBABILITY is the skip edge's probability. Mark the
637 new edge as irreducible if IRREDUCIBLE_P is true. */
639 static edge
640 slpeel_add_loop_guard (basic_block guard_bb, tree cond,
641 basic_block guard_to, basic_block dom_bb,
642 profile_probability probability, bool irreducible_p)
644 gimple_stmt_iterator gsi;
645 edge new_e, enter_e;
646 gcond *cond_stmt;
647 gimple_seq gimplify_stmt_list = NULL;
649 enter_e = EDGE_SUCC (guard_bb, 0);
650 enter_e->flags &= ~EDGE_FALLTHRU;
651 enter_e->flags |= EDGE_FALSE_VALUE;
652 gsi = gsi_last_bb (guard_bb);
654 cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
655 NULL_TREE);
656 if (gimplify_stmt_list)
657 gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
659 cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
660 gsi = gsi_last_bb (guard_bb);
661 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
663 /* Add new edge to connect guard block to the merge/loop-exit block. */
664 new_e = make_edge (guard_bb, guard_to, EDGE_TRUE_VALUE);
666 new_e->probability = probability;
667 if (irreducible_p)
668 new_e->flags |= EDGE_IRREDUCIBLE_LOOP;
670 enter_e->probability = probability.invert ();
671 set_immediate_dominator (CDI_DOMINATORS, guard_to, dom_bb);
673 /* Split enter_e to preserve LOOPS_HAVE_PREHEADERS. */
674 if (enter_e->dest->loop_father->header == enter_e->dest)
675 split_edge (enter_e);
677 return new_e;
681 /* This function verifies that the following restrictions apply to LOOP:
682 (1) it consists of exactly 2 basic blocks - header, and an empty latch
683 for innermost loop and 5 basic blocks for outer-loop.
684 (2) it is single entry, single exit
685 (3) its exit condition is the last stmt in the header
686 (4) E is the entry/exit edge of LOOP.
689 bool
690 slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
692 edge exit_e = single_exit (loop);
693 edge entry_e = loop_preheader_edge (loop);
694 gcond *orig_cond = get_loop_exit_condition (loop);
695 gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
696 unsigned int num_bb = loop->inner? 5 : 2;
698 /* All loops have an outer scope; the only case loop->outer is NULL is for
699 the function itself. */
700 if (!loop_outer (loop)
701 || loop->num_nodes != num_bb
702 || !empty_block_p (loop->latch)
703 || !single_exit (loop)
704 /* Verify that new loop exit condition can be trivially modified. */
705 || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
706 || (e != exit_e && e != entry_e))
707 return false;
709 return true;
712 /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
713 in the exit bb and rename all the uses after the loop. This simplifies
714 the *guard[12] routines, which assume loop closed SSA form for all PHIs
715 (but normally loop closed SSA form doesn't require virtual PHIs to be
716 in the same form). Doing this early simplifies the checking what
717 uses should be renamed. */
719 static void
720 create_lcssa_for_virtual_phi (struct loop *loop)
722 gphi_iterator gsi;
723 edge exit_e = single_exit (loop);
725 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
726 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
728 gphi *phi = gsi.phi ();
729 for (gsi = gsi_start_phis (exit_e->dest);
730 !gsi_end_p (gsi); gsi_next (&gsi))
731 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
732 break;
733 if (gsi_end_p (gsi))
735 tree new_vop = copy_ssa_name (PHI_RESULT (phi));
736 gphi *new_phi = create_phi_node (new_vop, exit_e->dest);
737 tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
738 imm_use_iterator imm_iter;
739 gimple *stmt;
740 use_operand_p use_p;
742 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (new_vop)
743 = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (vop);
744 add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
745 gimple_phi_set_result (new_phi, new_vop);
746 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
747 if (stmt != new_phi
748 && !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
749 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
750 SET_USE (use_p, new_vop);
752 break;
757 /* Function vect_get_loop_location.
759 Extract the location of the loop in the source code.
760 If the loop is not well formed for vectorization, an estimated
761 location is calculated.
762 Return the loop location if succeed and NULL if not. */
764 source_location
765 find_loop_location (struct loop *loop)
767 gimple *stmt = NULL;
768 basic_block bb;
769 gimple_stmt_iterator si;
771 if (!loop)
772 return UNKNOWN_LOCATION;
774 stmt = get_loop_exit_condition (loop);
776 if (stmt
777 && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
778 return gimple_location (stmt);
780 /* If we got here the loop is probably not "well formed",
781 try to estimate the loop location */
783 if (!loop->header)
784 return UNKNOWN_LOCATION;
786 bb = loop->header;
788 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
790 stmt = gsi_stmt (si);
791 if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
792 return gimple_location (stmt);
795 return UNKNOWN_LOCATION;
798 /* Return true if PHI defines an IV of the loop to be vectorized. */
800 static bool
801 iv_phi_p (gphi *phi)
803 if (virtual_operand_p (PHI_RESULT (phi)))
804 return false;
806 stmt_vec_info stmt_info = vinfo_for_stmt (phi);
807 gcc_assert (stmt_info != NULL);
808 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
809 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
810 return false;
812 return true;
815 /* Function vect_can_advance_ivs_p
817 In case the number of iterations that LOOP iterates is unknown at compile
818 time, an epilog loop will be generated, and the loop induction variables
819 (IVs) will be "advanced" to the value they are supposed to take just before
820 the epilog loop. Here we check that the access function of the loop IVs
821 and the expression that represents the loop bound are simple enough.
822 These restrictions will be relaxed in the future. */
824 bool
825 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
827 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
828 basic_block bb = loop->header;
829 gphi_iterator gsi;
831 /* Analyze phi functions of the loop header. */
833 if (dump_enabled_p ())
834 dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
835 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
837 tree evolution_part;
839 gphi *phi = gsi.phi ();
840 if (dump_enabled_p ())
842 dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
843 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
846 /* Skip virtual phi's. The data dependences that are associated with
847 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.
849 Skip reduction phis. */
850 if (!iv_phi_p (phi))
852 if (dump_enabled_p ())
853 dump_printf_loc (MSG_NOTE, vect_location,
854 "reduc or virtual phi. skip.\n");
855 continue;
858 /* Analyze the evolution function. */
860 evolution_part
861 = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
862 if (evolution_part == NULL_TREE)
864 if (dump_enabled_p ())
865 dump_printf (MSG_MISSED_OPTIMIZATION,
866 "No access function or evolution.\n");
867 return false;
870 /* FORNOW: We do not transform initial conditions of IVs
871 which evolution functions are not invariants in the loop. */
873 if (!expr_invariant_in_loop_p (loop, evolution_part))
875 if (dump_enabled_p ())
876 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
877 "evolution not invariant in loop.\n");
878 return false;
881 /* FORNOW: We do not transform initial conditions of IVs
882 which evolution functions are a polynomial of degree >= 2. */
884 if (tree_is_chrec (evolution_part))
886 if (dump_enabled_p ())
887 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
888 "evolution is chrec.\n");
889 return false;
893 return true;
897 /* Function vect_update_ivs_after_vectorizer.
899 "Advance" the induction variables of LOOP to the value they should take
900 after the execution of LOOP. This is currently necessary because the
901 vectorizer does not handle induction variables that are used after the
902 loop. Such a situation occurs when the last iterations of LOOP are
903 peeled, because:
904 1. We introduced new uses after LOOP for IVs that were not originally used
905 after LOOP: the IVs of LOOP are now used by an epilog loop.
906 2. LOOP is going to be vectorized; this means that it will iterate N/VF
907 times, whereas the loop IVs should be bumped N times.
909 Input:
910 - LOOP - a loop that is going to be vectorized. The last few iterations
911 of LOOP were peeled.
912 - NITERS - the number of iterations that LOOP executes (before it is
913 vectorized). i.e, the number of times the ivs should be bumped.
914 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
915 coming out from LOOP on which there are uses of the LOOP ivs
916 (this is the path from LOOP->exit to epilog_loop->preheader).
918 The new definitions of the ivs are placed in LOOP->exit.
919 The phi args associated with the edge UPDATE_E in the bb
920 UPDATE_E->dest are updated accordingly.
922 Assumption 1: Like the rest of the vectorizer, this function assumes
923 a single loop exit that has a single predecessor.
925 Assumption 2: The phi nodes in the LOOP header and in update_bb are
926 organized in the same order.
928 Assumption 3: The access function of the ivs is simple enough (see
929 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
931 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
932 coming out of LOOP on which the ivs of LOOP are used (this is the path
933 that leads to the epilog loop; other paths skip the epilog loop). This
934 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
935 needs to have its phis updated.
938 static void
939 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo,
940 tree niters, edge update_e)
942 gphi_iterator gsi, gsi1;
943 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
944 basic_block update_bb = update_e->dest;
945 basic_block exit_bb = single_exit (loop)->dest;
947 /* Make sure there exists a single-predecessor exit bb: */
948 gcc_assert (single_pred_p (exit_bb));
949 gcc_assert (single_succ_edge (exit_bb) == update_e);
951 for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
952 !gsi_end_p (gsi) && !gsi_end_p (gsi1);
953 gsi_next (&gsi), gsi_next (&gsi1))
955 tree init_expr;
956 tree step_expr, off;
957 tree type;
958 tree var, ni, ni_name;
959 gimple_stmt_iterator last_gsi;
961 gphi *phi = gsi.phi ();
962 gphi *phi1 = gsi1.phi ();
963 if (dump_enabled_p ())
965 dump_printf_loc (MSG_NOTE, vect_location,
966 "vect_update_ivs_after_vectorizer: phi: ");
967 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
970 /* Skip reduction and virtual phis. */
971 if (!iv_phi_p (phi))
973 if (dump_enabled_p ())
974 dump_printf_loc (MSG_NOTE, vect_location,
975 "reduc or virtual phi. skip.\n");
976 continue;
979 type = TREE_TYPE (gimple_phi_result (phi));
980 step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
981 step_expr = unshare_expr (step_expr);
983 /* FORNOW: We do not support IVs whose evolution function is a polynomial
984 of degree >= 2 or exponential. */
985 gcc_assert (!tree_is_chrec (step_expr));
987 init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
989 off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
990 fold_convert (TREE_TYPE (step_expr), niters),
991 step_expr);
992 if (POINTER_TYPE_P (type))
993 ni = fold_build_pointer_plus (init_expr, off);
994 else
995 ni = fold_build2 (PLUS_EXPR, type,
996 init_expr, fold_convert (type, off));
998 var = create_tmp_var (type, "tmp");
1000 last_gsi = gsi_last_bb (exit_bb);
1001 gimple_seq new_stmts = NULL;
1002 ni_name = force_gimple_operand (ni, &new_stmts, false, var);
1003 /* Exit_bb shouldn't be empty. */
1004 if (!gsi_end_p (last_gsi))
1005 gsi_insert_seq_after (&last_gsi, new_stmts, GSI_SAME_STMT);
1006 else
1007 gsi_insert_seq_before (&last_gsi, new_stmts, GSI_SAME_STMT);
1009 /* Fix phi expressions in the successor bb. */
1010 adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
1014 /* Function vect_gen_prolog_loop_niters
1016 Generate the number of iterations which should be peeled as prolog for the
1017 loop represented by LOOP_VINFO. It is calculated as the misalignment of
1018 DR - the data reference recorded in LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).
1019 As a result, after the execution of this loop, the data reference DR will
1020 refer to an aligned location. The following computation is generated:
1022 If the misalignment of DR is known at compile time:
1023 addr_mis = int mis = DR_MISALIGNMENT (dr);
1024 Else, compute address misalignment in bytes:
1025 addr_mis = addr & (vectype_align - 1)
1027 prolog_niters = ((VF - addr_mis/elem_size)&(VF-1))/step
1029 (elem_size = element type size; an element is the scalar element whose type
1030 is the inner type of the vectype)
1032 The computations will be emitted at the end of BB. We also compute and
1033 store upper bound (included) of the result in BOUND.
1035 When the step of the data-ref in the loop is not 1 (as in interleaved data
1036 and SLP), the number of iterations of the prolog must be divided by the step
1037 (which is equal to the size of interleaved group).
1039 The above formulas assume that VF == number of elements in the vector. This
1040 may not hold when there are multiple-types in the loop.
1041 In this case, for some data-references in the loop the VF does not represent
1042 the number of elements that fit in the vector. Therefore, instead of VF we
1043 use TYPE_VECTOR_SUBPARTS. */
1045 static tree
1046 vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
1047 basic_block bb, int *bound)
1049 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
1050 tree var;
1051 tree niters_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
1052 gimple_seq stmts = NULL, new_stmts = NULL;
1053 tree iters, iters_name;
1054 gimple *dr_stmt = DR_STMT (dr);
1055 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
1056 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1057 unsigned int target_align = DR_TARGET_ALIGNMENT (dr);
1059 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
1061 int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1063 if (dump_enabled_p ())
1064 dump_printf_loc (MSG_NOTE, vect_location,
1065 "known peeling = %d.\n", npeel);
1067 iters = build_int_cst (niters_type, npeel);
1068 *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1070 else
1072 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
1073 tree offset = negative
1074 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
1075 tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
1076 &stmts, offset);
1077 tree type = unsigned_type_for (TREE_TYPE (start_addr));
1078 tree target_align_minus_1 = build_int_cst (type, target_align - 1);
1079 HOST_WIDE_INT elem_size
1080 = int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1081 tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
1082 HOST_WIDE_INT align_in_elems = target_align / elem_size;
1083 tree align_in_elems_minus_1 = build_int_cst (type, align_in_elems - 1);
1084 tree align_in_elems_tree = build_int_cst (type, align_in_elems);
1085 tree misalign_in_bytes;
1086 tree misalign_in_elems;
1088 /* Create: misalign_in_bytes = addr & (target_align - 1). */
1089 misalign_in_bytes
1090 = fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
1091 target_align_minus_1);
1093 /* Create: misalign_in_elems = misalign_in_bytes / element_size. */
1094 misalign_in_elems
1095 = fold_build2 (RSHIFT_EXPR, type, misalign_in_bytes, elem_size_log);
1097 /* Create: (niters_type) ((align_in_elems - misalign_in_elems)
1098 & (align_in_elems - 1)). */
1099 if (negative)
1100 iters = fold_build2 (MINUS_EXPR, type, misalign_in_elems,
1101 align_in_elems_tree);
1102 else
1103 iters = fold_build2 (MINUS_EXPR, type, align_in_elems_tree,
1104 misalign_in_elems);
1105 iters = fold_build2 (BIT_AND_EXPR, type, iters, align_in_elems_minus_1);
1106 iters = fold_convert (niters_type, iters);
1107 *bound = align_in_elems - 1;
1110 if (dump_enabled_p ())
1112 dump_printf_loc (MSG_NOTE, vect_location,
1113 "niters for prolog loop: ");
1114 dump_generic_expr (MSG_NOTE, TDF_SLIM, iters);
1115 dump_printf (MSG_NOTE, "\n");
1118 var = create_tmp_var (niters_type, "prolog_loop_niters");
1119 iters_name = force_gimple_operand (iters, &new_stmts, false, var);
1121 if (new_stmts)
1122 gimple_seq_add_seq (&stmts, new_stmts);
1123 if (stmts)
1125 gcc_assert (single_succ_p (bb));
1126 gimple_stmt_iterator gsi = gsi_last_bb (bb);
1127 if (gsi_end_p (gsi))
1128 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1129 else
1130 gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
1132 return iters_name;
1136 /* Function vect_update_init_of_dr
1138 NITERS iterations were peeled from LOOP. DR represents a data reference
1139 in LOOP. This function updates the information recorded in DR to
1140 account for the fact that the first NITERS iterations had already been
1141 executed. Specifically, it updates the OFFSET field of DR. */
1143 static void
1144 vect_update_init_of_dr (struct data_reference *dr, tree niters)
1146 tree offset = DR_OFFSET (dr);
1148 niters = fold_build2 (MULT_EXPR, sizetype,
1149 fold_convert (sizetype, niters),
1150 fold_convert (sizetype, DR_STEP (dr)));
1151 offset = fold_build2 (PLUS_EXPR, sizetype,
1152 fold_convert (sizetype, offset), niters);
1153 DR_OFFSET (dr) = offset;
1157 /* Function vect_update_inits_of_drs
1159 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1160 This function updates the information recorded for the data references in
1161 the loop to account for the fact that the first NITERS iterations had
1162 already been executed. Specifically, it updates the initial_condition of
1163 the access_function of all the data_references in the loop. */
1165 static void
1166 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
1168 unsigned int i;
1169 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1170 struct data_reference *dr;
1172 if (dump_enabled_p ())
1173 dump_printf_loc (MSG_NOTE, vect_location,
1174 "=== vect_update_inits_of_dr ===\n");
1176 /* Adjust niters to sizetype and insert stmts on loop preheader edge. */
1177 if (!types_compatible_p (sizetype, TREE_TYPE (niters)))
1179 gimple_seq seq;
1180 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1181 tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
1183 niters = fold_convert (sizetype, niters);
1184 niters = force_gimple_operand (niters, &seq, false, var);
1185 if (seq)
1187 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1188 gcc_assert (!new_bb);
1192 FOR_EACH_VEC_ELT (datarefs, i, dr)
1193 vect_update_init_of_dr (dr, niters);
1197 /* This function builds ni_name = number of iterations. Statements
1198 are emitted on the loop preheader edge. If NEW_VAR_P is not NULL, set
1199 it to TRUE if new ssa_var is generated. */
1201 tree
1202 vect_build_loop_niters (loop_vec_info loop_vinfo, bool *new_var_p)
1204 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1205 if (TREE_CODE (ni) == INTEGER_CST)
1206 return ni;
1207 else
1209 tree ni_name, var;
1210 gimple_seq stmts = NULL;
1211 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1213 var = create_tmp_var (TREE_TYPE (ni), "niters");
1214 ni_name = force_gimple_operand (ni, &stmts, false, var);
1215 if (stmts)
1217 gsi_insert_seq_on_edge_immediate (pe, stmts);
1218 if (new_var_p != NULL)
1219 *new_var_p = true;
1222 return ni_name;
1226 /* Calculate the number of iterations above which vectorized loop will be
1227 preferred than scalar loop. NITERS_PROLOG is the number of iterations
1228 of prolog loop. If it's integer const, the integer number is also passed
1229 in INT_NITERS_PROLOG. BOUND_PROLOG is the upper bound (included) of
1230 number of iterations of prolog loop. VFM1 is vector factor minus one.
1231 If CHECK_PROFITABILITY is true, TH is the threshold below which scalar
1232 (rather than vectorized) loop will be executed. This function stores
1233 upper bound (included) of the result in BOUND_SCALAR. */
1235 static tree
1236 vect_gen_scalar_loop_niters (tree niters_prolog, int int_niters_prolog,
1237 int bound_prolog, int vfm1, int th,
1238 int *bound_scalar, bool check_profitability)
1240 tree type = TREE_TYPE (niters_prolog);
1241 tree niters = fold_build2 (PLUS_EXPR, type, niters_prolog,
1242 build_int_cst (type, vfm1));
1244 *bound_scalar = vfm1 + bound_prolog;
1245 if (check_profitability)
1247 /* TH indicates the minimum niters of vectorized loop, while we
1248 compute the maximum niters of scalar loop. */
1249 th--;
1250 /* Peeling for constant times. */
1251 if (int_niters_prolog >= 0)
1253 *bound_scalar = (int_niters_prolog + vfm1 < th
1254 ? th
1255 : vfm1 + int_niters_prolog);
1256 return build_int_cst (type, *bound_scalar);
1258 /* Peeling for unknown times. Note BOUND_PROLOG is the upper
1259 bound (inlcuded) of niters of prolog loop. */
1260 if (th >= vfm1 + bound_prolog)
1262 *bound_scalar = th;
1263 return build_int_cst (type, th);
1265 /* Need to do runtime comparison, but BOUND_SCALAR remains the same. */
1266 else if (th > vfm1)
1267 return fold_build2 (MAX_EXPR, type, build_int_cst (type, th), niters);
1269 return niters;
1272 /* NITERS is the number of times that the original scalar loop executes
1273 after peeling. Work out the maximum number of iterations N that can
1274 be handled by the vectorized form of the loop and then either:
1276 a) set *STEP_VECTOR_PTR to the vectorization factor and generate:
1278 niters_vector = N
1280 b) set *STEP_VECTOR_PTR to one and generate:
1282 niters_vector = N / vf
1284 In both cases, store niters_vector in *NITERS_VECTOR_PTR and add
1285 any new statements on the loop preheader edge. NITERS_NO_OVERFLOW
1286 is true if NITERS doesn't overflow (i.e. if NITERS is always nonzero). */
1288 void
1289 vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters,
1290 tree *niters_vector_ptr, tree *step_vector_ptr,
1291 bool niters_no_overflow)
1293 tree ni_minus_gap, var;
1294 tree niters_vector, step_vector, type = TREE_TYPE (niters);
1295 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1296 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1297 tree log_vf = NULL_TREE;
1299 /* If epilogue loop is required because of data accesses with gaps, we
1300 subtract one iteration from the total number of iterations here for
1301 correct calculation of RATIO. */
1302 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1304 ni_minus_gap = fold_build2 (MINUS_EXPR, type, niters,
1305 build_one_cst (type));
1306 if (!is_gimple_val (ni_minus_gap))
1308 var = create_tmp_var (type, "ni_gap");
1309 gimple *stmts = NULL;
1310 ni_minus_gap = force_gimple_operand (ni_minus_gap, &stmts,
1311 true, var);
1312 gsi_insert_seq_on_edge_immediate (pe, stmts);
1315 else
1316 ni_minus_gap = niters;
1318 if (1)
1320 /* Create: niters >> log2(vf) */
1321 /* If it's known that niters == number of latch executions + 1 doesn't
1322 overflow, we can generate niters >> log2(vf); otherwise we generate
1323 (niters - vf) >> log2(vf) + 1 by using the fact that we know ratio
1324 will be at least one. */
1325 log_vf = build_int_cst (type, exact_log2 (vf));
1326 if (niters_no_overflow)
1327 niters_vector = fold_build2 (RSHIFT_EXPR, type, ni_minus_gap, log_vf);
1328 else
1329 niters_vector
1330 = fold_build2 (PLUS_EXPR, type,
1331 fold_build2 (RSHIFT_EXPR, type,
1332 fold_build2 (MINUS_EXPR, type,
1333 ni_minus_gap,
1334 build_int_cst (type, vf)),
1335 log_vf),
1336 build_int_cst (type, 1));
1337 step_vector = build_one_cst (type);
1339 else
1341 niters_vector = ni_minus_gap;
1342 step_vector = build_int_cst (type, vf);
1345 if (!is_gimple_val (niters_vector))
1347 var = create_tmp_var (type, "bnd");
1348 gimple_seq stmts = NULL;
1349 niters_vector = force_gimple_operand (niters_vector, &stmts, true, var);
1350 gsi_insert_seq_on_edge_immediate (pe, stmts);
1351 /* Peeling algorithm guarantees that vector loop bound is at least ONE,
1352 we set range information to make niters analyzer's life easier. */
1353 if (stmts != NULL && log_vf)
1354 set_range_info (niters_vector, VR_RANGE,
1355 wi::to_wide (build_int_cst (type, 1)),
1356 wi::to_wide (fold_build2 (RSHIFT_EXPR, type,
1357 TYPE_MAX_VALUE (type),
1358 log_vf)));
1360 *niters_vector_ptr = niters_vector;
1361 *step_vector_ptr = step_vector;
1363 return;
1366 /* Given NITERS_VECTOR which is the number of iterations for vectorized
1367 loop specified by LOOP_VINFO after vectorization, compute the number
1368 of iterations before vectorization (niters_vector * vf) and store it
1369 to NITERS_VECTOR_MULT_VF_PTR. */
1371 static void
1372 vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo,
1373 tree niters_vector,
1374 tree *niters_vector_mult_vf_ptr)
1376 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1377 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1378 tree type = TREE_TYPE (niters_vector);
1379 tree log_vf = build_int_cst (type, exact_log2 (vf));
1380 basic_block exit_bb = single_exit (loop)->dest;
1382 gcc_assert (niters_vector_mult_vf_ptr != NULL);
1383 tree niters_vector_mult_vf = fold_build2 (LSHIFT_EXPR, type,
1384 niters_vector, log_vf);
1385 if (!is_gimple_val (niters_vector_mult_vf))
1387 tree var = create_tmp_var (type, "niters_vector_mult_vf");
1388 gimple_seq stmts = NULL;
1389 niters_vector_mult_vf = force_gimple_operand (niters_vector_mult_vf,
1390 &stmts, true, var);
1391 gimple_stmt_iterator gsi = gsi_start_bb (exit_bb);
1392 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1394 *niters_vector_mult_vf_ptr = niters_vector_mult_vf;
1397 /* Function slpeel_tree_duplicate_loop_to_edge_cfg duplciates FIRST/SECOND
1398 from SECOND/FIRST and puts it at the original loop's preheader/exit
1399 edge, the two loops are arranged as below:
1401 preheader_a:
1402 first_loop:
1403 header_a:
1404 i_1 = PHI<i_0, i_2>;
1406 i_2 = i_1 + 1;
1407 if (cond_a)
1408 goto latch_a;
1409 else
1410 goto between_bb;
1411 latch_a:
1412 goto header_a;
1414 between_bb:
1415 ;; i_x = PHI<i_2>; ;; LCSSA phi node to be created for FIRST,
1417 second_loop:
1418 header_b:
1419 i_3 = PHI<i_0, i_4>; ;; Use of i_0 to be replaced with i_x,
1420 or with i_2 if no LCSSA phi is created
1421 under condition of CREATE_LCSSA_FOR_IV_PHIS.
1423 i_4 = i_3 + 1;
1424 if (cond_b)
1425 goto latch_b;
1426 else
1427 goto exit_bb;
1428 latch_b:
1429 goto header_b;
1431 exit_bb:
1433 This function creates loop closed SSA for the first loop; update the
1434 second loop's PHI nodes by replacing argument on incoming edge with the
1435 result of newly created lcssa PHI nodes. IF CREATE_LCSSA_FOR_IV_PHIS
1436 is false, Loop closed ssa phis will only be created for non-iv phis for
1437 the first loop.
1439 This function assumes exit bb of the first loop is preheader bb of the
1440 second loop, i.e, between_bb in the example code. With PHIs updated,
1441 the second loop will execute rest iterations of the first. */
1443 static void
1444 slpeel_update_phi_nodes_for_loops (loop_vec_info loop_vinfo,
1445 struct loop *first, struct loop *second,
1446 bool create_lcssa_for_iv_phis)
1448 gphi_iterator gsi_update, gsi_orig;
1449 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1451 edge first_latch_e = EDGE_SUCC (first->latch, 0);
1452 edge second_preheader_e = loop_preheader_edge (second);
1453 basic_block between_bb = single_exit (first)->dest;
1455 gcc_assert (between_bb == second_preheader_e->src);
1456 gcc_assert (single_pred_p (between_bb) && single_succ_p (between_bb));
1457 /* Either the first loop or the second is the loop to be vectorized. */
1458 gcc_assert (loop == first || loop == second);
1460 for (gsi_orig = gsi_start_phis (first->header),
1461 gsi_update = gsi_start_phis (second->header);
1462 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
1463 gsi_next (&gsi_orig), gsi_next (&gsi_update))
1465 gphi *orig_phi = gsi_orig.phi ();
1466 gphi *update_phi = gsi_update.phi ();
1468 tree arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, first_latch_e);
1469 /* Generate lcssa PHI node for the first loop. */
1470 gphi *vect_phi = (loop == first) ? orig_phi : update_phi;
1471 if (create_lcssa_for_iv_phis || !iv_phi_p (vect_phi))
1473 tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
1474 gphi *lcssa_phi = create_phi_node (new_res, between_bb);
1475 add_phi_arg (lcssa_phi, arg, single_exit (first), UNKNOWN_LOCATION);
1476 arg = new_res;
1479 /* Update PHI node in the second loop by replacing arg on the loop's
1480 incoming edge. */
1481 adjust_phi_and_debug_stmts (update_phi, second_preheader_e, arg);
1485 /* Function slpeel_add_loop_guard adds guard skipping from the beginning
1486 of SKIP_LOOP to the beginning of UPDATE_LOOP. GUARD_EDGE and MERGE_EDGE
1487 are two pred edges of the merge point before UPDATE_LOOP. The two loops
1488 appear like below:
1490 guard_bb:
1491 if (cond)
1492 goto merge_bb;
1493 else
1494 goto skip_loop;
1496 skip_loop:
1497 header_a:
1498 i_1 = PHI<i_0, i_2>;
1500 i_2 = i_1 + 1;
1501 if (cond_a)
1502 goto latch_a;
1503 else
1504 goto exit_a;
1505 latch_a:
1506 goto header_a;
1508 exit_a:
1509 i_5 = PHI<i_2>;
1511 merge_bb:
1512 ;; PHI (i_x = PHI<i_0, i_5>) to be created at merge point.
1514 update_loop:
1515 header_b:
1516 i_3 = PHI<i_5, i_4>; ;; Use of i_5 to be replaced with i_x.
1518 i_4 = i_3 + 1;
1519 if (cond_b)
1520 goto latch_b;
1521 else
1522 goto exit_bb;
1523 latch_b:
1524 goto header_b;
1526 exit_bb:
1528 This function creates PHI nodes at merge_bb and replaces the use of i_5
1529 in the update_loop's PHI node with the result of new PHI result. */
1531 static void
1532 slpeel_update_phi_nodes_for_guard1 (struct loop *skip_loop,
1533 struct loop *update_loop,
1534 edge guard_edge, edge merge_edge)
1536 source_location merge_loc, guard_loc;
1537 edge orig_e = loop_preheader_edge (skip_loop);
1538 edge update_e = loop_preheader_edge (update_loop);
1539 gphi_iterator gsi_orig, gsi_update;
1541 for ((gsi_orig = gsi_start_phis (skip_loop->header),
1542 gsi_update = gsi_start_phis (update_loop->header));
1543 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
1544 gsi_next (&gsi_orig), gsi_next (&gsi_update))
1546 gphi *orig_phi = gsi_orig.phi ();
1547 gphi *update_phi = gsi_update.phi ();
1549 /* Generate new phi node at merge bb of the guard. */
1550 tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
1551 gphi *new_phi = create_phi_node (new_res, guard_edge->dest);
1553 /* Merge bb has two incoming edges: GUARD_EDGE and MERGE_EDGE. Set the
1554 args in NEW_PHI for these edges. */
1555 tree merge_arg = PHI_ARG_DEF_FROM_EDGE (update_phi, update_e);
1556 tree guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
1557 merge_loc = gimple_phi_arg_location_from_edge (update_phi, update_e);
1558 guard_loc = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
1559 add_phi_arg (new_phi, merge_arg, merge_edge, merge_loc);
1560 add_phi_arg (new_phi, guard_arg, guard_edge, guard_loc);
1562 /* Update phi in UPDATE_PHI. */
1563 adjust_phi_and_debug_stmts (update_phi, update_e, new_res);
1567 /* LCSSA_PHI is a lcssa phi of EPILOG loop which is copied from LOOP,
1568 this function searches for the corresponding lcssa phi node in exit
1569 bb of LOOP. If it is found, return the phi result; otherwise return
1570 NULL. */
1572 static tree
1573 find_guard_arg (struct loop *loop, struct loop *epilog ATTRIBUTE_UNUSED,
1574 gphi *lcssa_phi)
1576 gphi_iterator gsi;
1577 edge e = single_exit (loop);
1579 gcc_assert (single_pred_p (e->dest));
1580 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
1582 gphi *phi = gsi.phi ();
1583 if (operand_equal_p (PHI_ARG_DEF (phi, 0),
1584 PHI_ARG_DEF (lcssa_phi, 0), 0))
1585 return PHI_RESULT (phi);
1587 return NULL_TREE;
1590 /* LOOP and EPILOG are two consecutive loops in CFG and EPILOG is copied
1591 from LOOP. Function slpeel_add_loop_guard adds guard skipping from a
1592 point between the two loops to the end of EPILOG. Edges GUARD_EDGE
1593 and MERGE_EDGE are the two pred edges of merge_bb at the end of EPILOG.
1594 The CFG looks like:
1596 loop:
1597 header_a:
1598 i_1 = PHI<i_0, i_2>;
1600 i_2 = i_1 + 1;
1601 if (cond_a)
1602 goto latch_a;
1603 else
1604 goto exit_a;
1605 latch_a:
1606 goto header_a;
1608 exit_a:
1610 guard_bb:
1611 if (cond)
1612 goto merge_bb;
1613 else
1614 goto epilog_loop;
1616 ;; fall_through_bb
1618 epilog_loop:
1619 header_b:
1620 i_3 = PHI<i_2, i_4>;
1622 i_4 = i_3 + 1;
1623 if (cond_b)
1624 goto latch_b;
1625 else
1626 goto merge_bb;
1627 latch_b:
1628 goto header_b;
1630 merge_bb:
1631 ; PHI node (i_y = PHI<i_2, i_4>) to be created at merge point.
1633 exit_bb:
1634 i_x = PHI<i_4>; ;Use of i_4 to be replaced with i_y in merge_bb.
1636 For each name used out side EPILOG (i.e - for each name that has a lcssa
1637 phi in exit_bb) we create a new PHI in merge_bb. The new PHI has two
1638 args corresponding to GUARD_EDGE and MERGE_EDGE. Arg for MERGE_EDGE is
1639 the arg of the original PHI in exit_bb, arg for GUARD_EDGE is defined
1640 by LOOP and is found in the exit bb of LOOP. Arg of the original PHI
1641 in exit_bb will also be updated. */
1643 static void
1644 slpeel_update_phi_nodes_for_guard2 (struct loop *loop, struct loop *epilog,
1645 edge guard_edge, edge merge_edge)
1647 gphi_iterator gsi;
1648 basic_block merge_bb = guard_edge->dest;
1650 gcc_assert (single_succ_p (merge_bb));
1651 edge e = single_succ_edge (merge_bb);
1652 basic_block exit_bb = e->dest;
1653 gcc_assert (single_pred_p (exit_bb));
1654 gcc_assert (single_pred (exit_bb) == single_exit (epilog)->dest);
1656 for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1658 gphi *update_phi = gsi.phi ();
1659 tree old_arg = PHI_ARG_DEF (update_phi, 0);
1660 /* This loop-closed-phi actually doesn't represent a use out of the
1661 loop - the phi arg is a constant. */
1662 if (TREE_CODE (old_arg) != SSA_NAME)
1663 continue;
1665 tree merge_arg = get_current_def (old_arg);
1666 if (!merge_arg)
1667 merge_arg = old_arg;
1669 tree guard_arg = find_guard_arg (loop, epilog, update_phi);
1670 /* If the var is live after loop but not a reduction, we simply
1671 use the old arg. */
1672 if (!guard_arg)
1673 guard_arg = old_arg;
1675 /* Create new phi node in MERGE_BB: */
1676 tree new_res = copy_ssa_name (PHI_RESULT (update_phi));
1677 gphi *merge_phi = create_phi_node (new_res, merge_bb);
1679 /* MERGE_BB has two incoming edges: GUARD_EDGE and MERGE_EDGE, Set
1680 the two PHI args in merge_phi for these edges. */
1681 add_phi_arg (merge_phi, merge_arg, merge_edge, UNKNOWN_LOCATION);
1682 add_phi_arg (merge_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
1684 /* Update the original phi in exit_bb. */
1685 adjust_phi_and_debug_stmts (update_phi, e, new_res);
1689 /* EPILOG loop is duplicated from the original loop for vectorizing,
1690 the arg of its loop closed ssa PHI needs to be updated. */
1692 static void
1693 slpeel_update_phi_nodes_for_lcssa (struct loop *epilog)
1695 gphi_iterator gsi;
1696 basic_block exit_bb = single_exit (epilog)->dest;
1698 gcc_assert (single_pred_p (exit_bb));
1699 edge e = EDGE_PRED (exit_bb, 0);
1700 for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1701 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
1704 /* Function vect_do_peeling.
1706 Input:
1707 - LOOP_VINFO: Represent a loop to be vectorized, which looks like:
1709 preheader:
1710 LOOP:
1711 header_bb:
1712 loop_body
1713 if (exit_loop_cond) goto exit_bb
1714 else goto header_bb
1715 exit_bb:
1717 - NITERS: The number of iterations of the loop.
1718 - NITERSM1: The number of iterations of the loop's latch.
1719 - NITERS_NO_OVERFLOW: No overflow in computing NITERS.
1720 - TH, CHECK_PROFITABILITY: Threshold of niters to vectorize loop if
1721 CHECK_PROFITABILITY is true.
1722 Output:
1723 - *NITERS_VECTOR and *STEP_VECTOR describe how the main loop should
1724 iterate after vectorization; see slpeel_make_loop_iterate_ntimes
1725 for details.
1726 - *NITERS_VECTOR_MULT_VF_VAR is either null or an SSA name that
1727 should be set to the number of scalar iterations handled by the
1728 vector loop. The SSA name is only used on exit from the loop.
1730 This function peels prolog and epilog from the loop, adds guards skipping
1731 PROLOG and EPILOG for various conditions. As a result, the changed CFG
1732 would look like:
1734 guard_bb_1:
1735 if (prefer_scalar_loop) goto merge_bb_1
1736 else goto guard_bb_2
1738 guard_bb_2:
1739 if (skip_prolog) goto merge_bb_2
1740 else goto prolog_preheader
1742 prolog_preheader:
1743 PROLOG:
1744 prolog_header_bb:
1745 prolog_body
1746 if (exit_prolog_cond) goto prolog_exit_bb
1747 else goto prolog_header_bb
1748 prolog_exit_bb:
1750 merge_bb_2:
1752 vector_preheader:
1753 VECTOR LOOP:
1754 vector_header_bb:
1755 vector_body
1756 if (exit_vector_cond) goto vector_exit_bb
1757 else goto vector_header_bb
1758 vector_exit_bb:
1760 guard_bb_3:
1761 if (skip_epilog) goto merge_bb_3
1762 else goto epilog_preheader
1764 merge_bb_1:
1766 epilog_preheader:
1767 EPILOG:
1768 epilog_header_bb:
1769 epilog_body
1770 if (exit_epilog_cond) goto merge_bb_3
1771 else goto epilog_header_bb
1773 merge_bb_3:
1775 Note this function peels prolog and epilog only if it's necessary,
1776 as well as guards.
1777 Returns created epilogue or NULL.
1779 TODO: Guard for prefer_scalar_loop should be emitted along with
1780 versioning conditions if loop versioning is needed. */
1783 struct loop *
1784 vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
1785 tree *niters_vector, tree *step_vector,
1786 tree *niters_vector_mult_vf_var, int th,
1787 bool check_profitability, bool niters_no_overflow)
1789 edge e, guard_e;
1790 tree type = TREE_TYPE (niters), guard_cond;
1791 basic_block guard_bb, guard_to;
1792 profile_probability prob_prolog, prob_vector, prob_epilog;
1793 int bound_prolog = 0, bound_scalar = 0, bound = 0;
1794 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1795 int prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1796 bool epilog_peeling = (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)
1797 || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
1799 if (!prolog_peeling && !epilog_peeling)
1800 return NULL;
1802 prob_vector = profile_probability::guessed_always ().apply_scale (9, 10);
1803 if ((vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo)) == 2)
1804 vf = 3;
1805 prob_prolog = prob_epilog = profile_probability::guessed_always ()
1806 .apply_scale (vf - 1, vf);
1807 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1809 struct loop *prolog, *epilog = NULL, *loop = LOOP_VINFO_LOOP (loop_vinfo);
1810 struct loop *first_loop = loop;
1811 bool irred_flag = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP;
1812 create_lcssa_for_virtual_phi (loop);
1813 update_ssa (TODO_update_ssa_only_virtuals);
1815 if (MAY_HAVE_DEBUG_BIND_STMTS)
1817 gcc_assert (!adjust_vec.exists ());
1818 adjust_vec.create (32);
1820 initialize_original_copy_tables ();
1822 /* Prolog loop may be skipped. */
1823 bool skip_prolog = (prolog_peeling != 0);
1824 /* Skip to epilog if scalar loop may be preferred. It's only needed
1825 when we peel for epilog loop and when it hasn't been checked with
1826 loop versioning. */
1827 bool skip_vector = (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1828 && !LOOP_REQUIRES_VERSIONING (loop_vinfo));
1829 /* Epilog loop must be executed if the number of iterations for epilog
1830 loop is known at compile time, otherwise we need to add a check at
1831 the end of vector loop and skip to the end of epilog loop. */
1832 bool skip_epilog = (prolog_peeling < 0
1833 || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo));
1834 /* PEELING_FOR_GAPS is special because epilog loop must be executed. */
1835 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1836 skip_epilog = false;
1838 /* Record the anchor bb at which guard should be placed if scalar loop
1839 may be preferred. */
1840 basic_block anchor = loop_preheader_edge (loop)->src;
1841 if (skip_vector)
1843 split_edge (loop_preheader_edge (loop));
1845 /* Due to the order in which we peel prolog and epilog, we first
1846 propagate probability to the whole loop. The purpose is to
1847 avoid adjusting probabilities of both prolog and vector loops
1848 separately. Note in this case, the probability of epilog loop
1849 needs to be scaled back later. */
1850 basic_block bb_before_loop = loop_preheader_edge (loop)->src;
1851 if (prob_vector.initialized_p ())
1852 scale_bbs_frequencies (&bb_before_loop, 1, prob_vector);
1853 scale_loop_profile (loop, prob_vector, bound);
1856 tree niters_prolog = build_int_cst (type, 0);
1857 source_location loop_loc = find_loop_location (loop);
1858 struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
1859 if (prolog_peeling)
1861 e = loop_preheader_edge (loop);
1862 if (!slpeel_can_duplicate_loop_p (loop, e))
1864 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1865 "loop can't be duplicated to preheader edge.\n");
1866 gcc_unreachable ();
1868 /* Peel prolog and put it on preheader edge of loop. */
1869 prolog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
1870 if (!prolog)
1872 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1873 "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1874 gcc_unreachable ();
1876 slpeel_update_phi_nodes_for_loops (loop_vinfo, prolog, loop, true);
1877 first_loop = prolog;
1878 reset_original_copy_tables ();
1880 /* Generate and update the number of iterations for prolog loop. */
1881 niters_prolog = vect_gen_prolog_loop_niters (loop_vinfo, anchor,
1882 &bound_prolog);
1883 tree step_prolog = build_one_cst (TREE_TYPE (niters_prolog));
1884 slpeel_make_loop_iterate_ntimes (prolog, niters_prolog, step_prolog,
1885 NULL_TREE, false);
1887 /* Skip the prolog loop. */
1888 if (skip_prolog)
1890 guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
1891 niters_prolog, build_int_cst (type, 0));
1892 guard_bb = loop_preheader_edge (prolog)->src;
1893 basic_block bb_after_prolog = loop_preheader_edge (loop)->src;
1894 guard_to = split_edge (loop_preheader_edge (loop));
1895 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
1896 guard_to, guard_bb,
1897 prob_prolog.invert (),
1898 irred_flag);
1899 e = EDGE_PRED (guard_to, 0);
1900 e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
1901 slpeel_update_phi_nodes_for_guard1 (prolog, loop, guard_e, e);
1903 scale_bbs_frequencies (&bb_after_prolog, 1, prob_prolog);
1904 scale_loop_profile (prolog, prob_prolog, bound_prolog);
1906 /* Update init address of DRs. */
1907 vect_update_inits_of_drs (loop_vinfo, niters_prolog);
1908 /* Update niters for vector loop. */
1909 LOOP_VINFO_NITERS (loop_vinfo)
1910 = fold_build2 (MINUS_EXPR, type, niters, niters_prolog);
1911 LOOP_VINFO_NITERSM1 (loop_vinfo)
1912 = fold_build2 (MINUS_EXPR, type,
1913 LOOP_VINFO_NITERSM1 (loop_vinfo), niters_prolog);
1914 bool new_var_p = false;
1915 niters = vect_build_loop_niters (loop_vinfo, &new_var_p);
1916 /* It's guaranteed that vector loop bound before vectorization is at
1917 least VF, so set range information for newly generated var. */
1918 if (new_var_p)
1919 set_range_info (niters, VR_RANGE,
1920 wi::to_wide (build_int_cst (type, vf)),
1921 wi::to_wide (TYPE_MAX_VALUE (type)));
1923 /* Prolog iterates at most bound_prolog times, latch iterates at
1924 most bound_prolog - 1 times. */
1925 record_niter_bound (prolog, bound_prolog - 1, false, true);
1926 delete_update_ssa ();
1927 adjust_vec_debug_stmts ();
1928 scev_reset ();
1931 if (epilog_peeling)
1933 e = single_exit (loop);
1934 if (!slpeel_can_duplicate_loop_p (loop, e))
1936 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1937 "loop can't be duplicated to exit edge.\n");
1938 gcc_unreachable ();
1940 /* Peel epilog and put it on exit edge of loop. */
1941 epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
1942 if (!epilog)
1944 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1945 "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1946 gcc_unreachable ();
1948 slpeel_update_phi_nodes_for_loops (loop_vinfo, loop, epilog, false);
1950 /* Scalar version loop may be preferred. In this case, add guard
1951 and skip to epilog. Note this only happens when the number of
1952 iterations of loop is unknown at compile time, otherwise this
1953 won't be vectorized. */
1954 if (skip_vector)
1956 /* Additional epilogue iteration is peeled if gap exists. */
1957 bool peel_for_gaps = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo);
1958 tree t = vect_gen_scalar_loop_niters (niters_prolog, prolog_peeling,
1959 bound_prolog,
1960 peel_for_gaps ? vf : vf - 1,
1961 th, &bound_scalar,
1962 check_profitability);
1963 /* Build guard against NITERSM1 since NITERS may overflow. */
1964 guard_cond = fold_build2 (LT_EXPR, boolean_type_node, nitersm1, t);
1965 guard_bb = anchor;
1966 guard_to = split_edge (loop_preheader_edge (epilog));
1967 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
1968 guard_to, guard_bb,
1969 prob_vector.invert (),
1970 irred_flag);
1971 e = EDGE_PRED (guard_to, 0);
1972 e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
1973 slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e);
1975 /* Simply propagate profile info from guard_bb to guard_to which is
1976 a merge point of control flow. */
1977 guard_to->count = guard_bb->count;
1979 /* Scale probability of epilog loop back.
1980 FIXME: We should avoid scaling down and back up. Profile may
1981 get lost if we scale down to 0. */
1982 basic_block *bbs = get_loop_body (epilog);
1983 for (unsigned int i = 0; i < epilog->num_nodes; i++)
1984 bbs[i]->count = bbs[i]->count.apply_scale
1985 (bbs[i]->count,
1986 bbs[i]->count.apply_probability
1987 (prob_vector));
1988 free (bbs);
1991 basic_block bb_before_epilog = loop_preheader_edge (epilog)->src;
1992 tree niters_vector_mult_vf;
1993 /* If loop is peeled for non-zero constant times, now niters refers to
1994 orig_niters - prolog_peeling, it won't overflow even the orig_niters
1995 overflows. */
1996 niters_no_overflow |= (prolog_peeling > 0);
1997 vect_gen_vector_loop_niters (loop_vinfo, niters,
1998 niters_vector, step_vector,
1999 niters_no_overflow);
2000 if (!integer_onep (*step_vector))
2002 /* On exit from the loop we will have an easy way of calcalating
2003 NITERS_VECTOR / STEP * STEP. Install a dummy definition
2004 until then. */
2005 niters_vector_mult_vf = make_ssa_name (TREE_TYPE (*niters_vector));
2006 SSA_NAME_DEF_STMT (niters_vector_mult_vf) = gimple_build_nop ();
2007 *niters_vector_mult_vf_var = niters_vector_mult_vf;
2009 else
2010 vect_gen_vector_loop_niters_mult_vf (loop_vinfo, *niters_vector,
2011 &niters_vector_mult_vf);
2012 /* Update IVs of original loop as if they were advanced by
2013 niters_vector_mult_vf steps. */
2014 gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
2015 edge update_e = skip_vector ? e : loop_preheader_edge (epilog);
2016 vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf,
2017 update_e);
2019 if (skip_epilog)
2021 guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
2022 niters, niters_vector_mult_vf);
2023 guard_bb = single_exit (loop)->dest;
2024 guard_to = split_edge (single_exit (epilog));
2025 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond, guard_to,
2026 skip_vector ? anchor : guard_bb,
2027 prob_epilog.invert (),
2028 irred_flag);
2029 slpeel_update_phi_nodes_for_guard2 (loop, epilog, guard_e,
2030 single_exit (epilog));
2031 /* Only need to handle basic block before epilog loop if it's not
2032 the guard_bb, which is the case when skip_vector is true. */
2033 if (guard_bb != bb_before_epilog)
2035 prob_epilog = prob_vector * prob_epilog + prob_vector.invert ();
2037 scale_bbs_frequencies (&bb_before_epilog, 1, prob_epilog);
2039 scale_loop_profile (epilog, prob_epilog, bound);
2041 else
2042 slpeel_update_phi_nodes_for_lcssa (epilog);
2044 bound = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) ? vf - 1 : vf - 2;
2045 /* We share epilog loop with scalar version loop. */
2046 bound = MAX (bound, bound_scalar - 1);
2047 record_niter_bound (epilog, bound, false, true);
2049 delete_update_ssa ();
2050 adjust_vec_debug_stmts ();
2051 scev_reset ();
2053 adjust_vec.release ();
2054 free_original_copy_tables ();
2056 return epilog;
2059 /* Function vect_create_cond_for_niters_checks.
2061 Create a conditional expression that represents the run-time checks for
2062 loop's niter. The loop is guaranteed to to terminate if the run-time
2063 checks hold.
2065 Input:
2066 COND_EXPR - input conditional expression. New conditions will be chained
2067 with logical AND operation. If it is NULL, then the function
2068 is used to return the number of alias checks.
2069 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2070 to be checked.
2072 Output:
2073 COND_EXPR - conditional expression.
2075 The returned COND_EXPR is the conditional expression to be used in the
2076 if statement that controls which version of the loop gets executed at
2077 runtime. */
2079 static void
2080 vect_create_cond_for_niters_checks (loop_vec_info loop_vinfo, tree *cond_expr)
2082 tree part_cond_expr = LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo);
2084 if (*cond_expr)
2085 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2086 *cond_expr, part_cond_expr);
2087 else
2088 *cond_expr = part_cond_expr;
2091 /* Set *COND_EXPR to a tree that is true when both the original *COND_EXPR
2092 and PART_COND_EXPR are true. Treat a null *COND_EXPR as "true". */
2094 static void
2095 chain_cond_expr (tree *cond_expr, tree part_cond_expr)
2097 if (*cond_expr)
2098 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2099 *cond_expr, part_cond_expr);
2100 else
2101 *cond_expr = part_cond_expr;
2104 /* Function vect_create_cond_for_align_checks.
2106 Create a conditional expression that represents the alignment checks for
2107 all of data references (array element references) whose alignment must be
2108 checked at runtime.
2110 Input:
2111 COND_EXPR - input conditional expression. New conditions will be chained
2112 with logical AND operation.
2113 LOOP_VINFO - two fields of the loop information are used.
2114 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
2115 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
2117 Output:
2118 COND_EXPR_STMT_LIST - statements needed to construct the conditional
2119 expression.
2120 The returned value is the conditional expression to be used in the if
2121 statement that controls which version of the loop gets executed at runtime.
2123 The algorithm makes two assumptions:
2124 1) The number of bytes "n" in a vector is a power of 2.
2125 2) An address "a" is aligned if a%n is zero and that this
2126 test can be done as a&(n-1) == 0. For example, for 16
2127 byte vectors the test is a&0xf == 0. */
2129 static void
2130 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
2131 tree *cond_expr,
2132 gimple_seq *cond_expr_stmt_list)
2134 vec<gimple *> may_misalign_stmts
2135 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2136 gimple *ref_stmt;
2137 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
2138 tree mask_cst;
2139 unsigned int i;
2140 tree int_ptrsize_type;
2141 char tmp_name[20];
2142 tree or_tmp_name = NULL_TREE;
2143 tree and_tmp_name;
2144 gimple *and_stmt;
2145 tree ptrsize_zero;
2146 tree part_cond_expr;
2148 /* Check that mask is one less than a power of 2, i.e., mask is
2149 all zeros followed by all ones. */
2150 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2152 int_ptrsize_type = signed_type_for (ptr_type_node);
2154 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2155 of the first vector of the i'th data reference. */
2157 FOR_EACH_VEC_ELT (may_misalign_stmts, i, ref_stmt)
2159 gimple_seq new_stmt_list = NULL;
2160 tree addr_base;
2161 tree addr_tmp_name;
2162 tree new_or_tmp_name;
2163 gimple *addr_stmt, *or_stmt;
2164 stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
2165 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2166 bool negative = tree_int_cst_compare
2167 (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
2168 tree offset = negative
2169 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
2171 /* create: addr_tmp = (int)(address_of_first_vector) */
2172 addr_base =
2173 vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
2174 offset);
2175 if (new_stmt_list != NULL)
2176 gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
2178 sprintf (tmp_name, "addr2int%d", i);
2179 addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2180 addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base);
2181 gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
2183 /* The addresses are OR together. */
2185 if (or_tmp_name != NULL_TREE)
2187 /* create: or_tmp = or_tmp | addr_tmp */
2188 sprintf (tmp_name, "orptrs%d", i);
2189 new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2190 or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
2191 or_tmp_name, addr_tmp_name);
2192 gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
2193 or_tmp_name = new_or_tmp_name;
2195 else
2196 or_tmp_name = addr_tmp_name;
2198 } /* end for i */
2200 mask_cst = build_int_cst (int_ptrsize_type, mask);
2202 /* create: and_tmp = or_tmp & mask */
2203 and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
2205 and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR,
2206 or_tmp_name, mask_cst);
2207 gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
2209 /* Make and_tmp the left operand of the conditional test against zero.
2210 if and_tmp has a nonzero bit then some address is unaligned. */
2211 ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2212 part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
2213 and_tmp_name, ptrsize_zero);
2214 chain_cond_expr (cond_expr, part_cond_expr);
2217 /* If LOOP_VINFO_CHECK_UNEQUAL_ADDRS contains <A1, B1>, ..., <An, Bn>,
2218 create a tree representation of: (&A1 != &B1) && ... && (&An != &Bn).
2219 Set *COND_EXPR to a tree that is true when both the original *COND_EXPR
2220 and this new condition are true. Treat a null *COND_EXPR as "true". */
2222 static void
2223 vect_create_cond_for_unequal_addrs (loop_vec_info loop_vinfo, tree *cond_expr)
2225 vec<vec_object_pair> pairs = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo);
2226 unsigned int i;
2227 vec_object_pair *pair;
2228 FOR_EACH_VEC_ELT (pairs, i, pair)
2230 tree addr1 = build_fold_addr_expr (pair->first);
2231 tree addr2 = build_fold_addr_expr (pair->second);
2232 tree part_cond_expr = fold_build2 (NE_EXPR, boolean_type_node,
2233 addr1, addr2);
2234 chain_cond_expr (cond_expr, part_cond_expr);
2238 /* Function vect_create_cond_for_alias_checks.
2240 Create a conditional expression that represents the run-time checks for
2241 overlapping of address ranges represented by a list of data references
2242 relations passed as input.
2244 Input:
2245 COND_EXPR - input conditional expression. New conditions will be chained
2246 with logical AND operation. If it is NULL, then the function
2247 is used to return the number of alias checks.
2248 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2249 to be checked.
2251 Output:
2252 COND_EXPR - conditional expression.
2254 The returned COND_EXPR is the conditional expression to be used in the if
2255 statement that controls which version of the loop gets executed at runtime.
2258 void
2259 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
2261 vec<dr_with_seg_len_pair_t> comp_alias_ddrs =
2262 LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
2264 if (comp_alias_ddrs.is_empty ())
2265 return;
2267 create_runtime_alias_checks (LOOP_VINFO_LOOP (loop_vinfo),
2268 &comp_alias_ddrs, cond_expr);
2269 if (dump_enabled_p ())
2270 dump_printf_loc (MSG_NOTE, vect_location,
2271 "created %u versioning for alias checks.\n",
2272 comp_alias_ddrs.length ());
2276 /* Function vect_loop_versioning.
2278 If the loop has data references that may or may not be aligned or/and
2279 has data reference relations whose independence was not proven then
2280 two versions of the loop need to be generated, one which is vectorized
2281 and one which isn't. A test is then generated to control which of the
2282 loops is executed. The test checks for the alignment of all of the
2283 data references that may or may not be aligned. An additional
2284 sequence of runtime tests is generated for each pairs of DDRs whose
2285 independence was not proven. The vectorized version of loop is
2286 executed only if both alias and alignment tests are passed.
2288 The test generated to check which version of loop is executed
2289 is modified to also check for profitability as indicated by the
2290 cost model threshold TH.
2292 The versioning precondition(s) are placed in *COND_EXPR and
2293 *COND_EXPR_STMT_LIST. */
2295 void
2296 vect_loop_versioning (loop_vec_info loop_vinfo,
2297 unsigned int th, bool check_profitability,
2298 poly_uint64 versioning_threshold)
2300 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *nloop;
2301 struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
2302 basic_block condition_bb;
2303 gphi_iterator gsi;
2304 gimple_stmt_iterator cond_exp_gsi;
2305 basic_block merge_bb;
2306 basic_block new_exit_bb;
2307 edge new_exit_e, e;
2308 gphi *orig_phi, *new_phi;
2309 tree cond_expr = NULL_TREE;
2310 gimple_seq cond_expr_stmt_list = NULL;
2311 tree arg;
2312 profile_probability prob = profile_probability::likely ();
2313 gimple_seq gimplify_stmt_list = NULL;
2314 tree scalar_loop_iters = LOOP_VINFO_NITERSM1 (loop_vinfo);
2315 bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
2316 bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
2317 bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo);
2319 if (check_profitability)
2320 cond_expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
2321 build_int_cst (TREE_TYPE (scalar_loop_iters),
2322 th - 1));
2323 if (maybe_ne (versioning_threshold, 0U))
2325 tree expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
2326 build_int_cst (TREE_TYPE (scalar_loop_iters),
2327 versioning_threshold - 1));
2328 if (cond_expr)
2329 cond_expr = fold_build2 (BIT_AND_EXPR, boolean_type_node,
2330 expr, cond_expr);
2331 else
2332 cond_expr = expr;
2335 if (version_niter)
2336 vect_create_cond_for_niters_checks (loop_vinfo, &cond_expr);
2338 if (cond_expr)
2339 cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
2340 is_gimple_condexpr, NULL_TREE);
2342 if (version_align)
2343 vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
2344 &cond_expr_stmt_list);
2346 if (version_alias)
2348 vect_create_cond_for_unequal_addrs (loop_vinfo, &cond_expr);
2349 vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
2352 cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,
2353 is_gimple_condexpr, NULL_TREE);
2354 gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
2356 initialize_original_copy_tables ();
2357 if (scalar_loop)
2359 edge scalar_e;
2360 basic_block preheader, scalar_preheader;
2362 /* We don't want to scale SCALAR_LOOP's frequencies, we need to
2363 scale LOOP's frequencies instead. */
2364 nloop = loop_version (scalar_loop, cond_expr, &condition_bb,
2365 prob, prob.invert (), prob, prob.invert (), true);
2366 scale_loop_frequencies (loop, prob);
2367 /* CONDITION_BB was created above SCALAR_LOOP's preheader,
2368 while we need to move it above LOOP's preheader. */
2369 e = loop_preheader_edge (loop);
2370 scalar_e = loop_preheader_edge (scalar_loop);
2371 gcc_assert (empty_block_p (e->src)
2372 && single_pred_p (e->src));
2373 gcc_assert (empty_block_p (scalar_e->src)
2374 && single_pred_p (scalar_e->src));
2375 gcc_assert (single_pred_p (condition_bb));
2376 preheader = e->src;
2377 scalar_preheader = scalar_e->src;
2378 scalar_e = find_edge (condition_bb, scalar_preheader);
2379 e = single_pred_edge (preheader);
2380 redirect_edge_and_branch_force (single_pred_edge (condition_bb),
2381 scalar_preheader);
2382 redirect_edge_and_branch_force (scalar_e, preheader);
2383 redirect_edge_and_branch_force (e, condition_bb);
2384 set_immediate_dominator (CDI_DOMINATORS, condition_bb,
2385 single_pred (condition_bb));
2386 set_immediate_dominator (CDI_DOMINATORS, scalar_preheader,
2387 single_pred (scalar_preheader));
2388 set_immediate_dominator (CDI_DOMINATORS, preheader,
2389 condition_bb);
2391 else
2392 nloop = loop_version (loop, cond_expr, &condition_bb,
2393 prob, prob.invert (), prob, prob.invert (), true);
2395 if (version_niter)
2397 /* The versioned loop could be infinite, we need to clear existing
2398 niter information which is copied from the original loop. */
2399 gcc_assert (loop_constraint_set_p (loop, LOOP_C_FINITE));
2400 vect_free_loop_info_assumptions (nloop);
2401 /* And set constraint LOOP_C_INFINITE for niter analyzer. */
2402 loop_constraint_set (loop, LOOP_C_INFINITE);
2405 if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
2406 && dump_enabled_p ())
2408 if (version_alias)
2409 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2410 "loop versioned for vectorization because of "
2411 "possible aliasing\n");
2412 if (version_align)
2413 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2414 "loop versioned for vectorization to enhance "
2415 "alignment\n");
2418 free_original_copy_tables ();
2420 /* Loop versioning violates an assumption we try to maintain during
2421 vectorization - that the loop exit block has a single predecessor.
2422 After versioning, the exit block of both loop versions is the same
2423 basic block (i.e. it has two predecessors). Just in order to simplify
2424 following transformations in the vectorizer, we fix this situation
2425 here by adding a new (empty) block on the exit-edge of the loop,
2426 with the proper loop-exit phis to maintain loop-closed-form.
2427 If loop versioning wasn't done from loop, but scalar_loop instead,
2428 merge_bb will have already just a single successor. */
2430 merge_bb = single_exit (loop)->dest;
2431 if (scalar_loop == NULL || EDGE_COUNT (merge_bb->preds) >= 2)
2433 gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
2434 new_exit_bb = split_edge (single_exit (loop));
2435 new_exit_e = single_exit (loop);
2436 e = EDGE_SUCC (new_exit_bb, 0);
2438 for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2440 tree new_res;
2441 orig_phi = gsi.phi ();
2442 new_res = copy_ssa_name (PHI_RESULT (orig_phi));
2443 new_phi = create_phi_node (new_res, new_exit_bb);
2444 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2445 add_phi_arg (new_phi, arg, new_exit_e,
2446 gimple_phi_arg_location_from_edge (orig_phi, e));
2447 adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
2451 /* End loop-exit-fixes after versioning. */
2453 if (cond_expr_stmt_list)
2455 cond_exp_gsi = gsi_last_bb (condition_bb);
2456 gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
2457 GSI_SAME_STMT);
2459 update_ssa (TODO_update_ssa);