[AArch64] Use new target pass registration framework for FMA steering pass
[official-gcc.git] / gcc / tree-vect-loop-manip.c
blob291ecd940dba5885bfc1c703a6a114128db54d78
1 /* Vectorizer Specific Loop Manipulations
2 Copyright (C) 2003-2016 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "cfghooks.h"
29 #include "tree-pass.h"
30 #include "ssa.h"
31 #include "fold-const.h"
32 #include "cfganal.h"
33 #include "gimplify.h"
34 #include "gimple-iterator.h"
35 #include "gimplify-me.h"
36 #include "tree-cfg.h"
37 #include "tree-ssa-loop-manip.h"
38 #include "tree-into-ssa.h"
39 #include "tree-ssa.h"
40 #include "cfgloop.h"
41 #include "tree-scalar-evolution.h"
42 #include "tree-vectorizer.h"
43 #include "tree-ssa-loop-ivopts.h"
45 /*************************************************************************
46 Simple Loop Peeling Utilities
48 Utilities to support loop peeling for vectorization purposes.
49 *************************************************************************/
52 /* Renames the use *OP_P. */
54 static void
55 rename_use_op (use_operand_p op_p)
57 tree new_name;
59 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
60 return;
62 new_name = get_current_def (USE_FROM_PTR (op_p));
64 /* Something defined outside of the loop. */
65 if (!new_name)
66 return;
68 /* An ordinary ssa name defined in the loop. */
70 SET_USE (op_p, new_name);
74 /* Renames the variables in basic block BB. Allow renaming of PHI argumnets
75 on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
76 true. */
78 static void
79 rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop)
81 gimple *stmt;
82 use_operand_p use_p;
83 ssa_op_iter iter;
84 edge e;
85 edge_iterator ei;
86 struct loop *loop = bb->loop_father;
87 struct loop *outer_loop = NULL;
89 if (rename_from_outer_loop)
91 gcc_assert (loop);
92 outer_loop = loop_outer (loop);
95 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
96 gsi_next (&gsi))
98 stmt = gsi_stmt (gsi);
99 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
100 rename_use_op (use_p);
103 FOR_EACH_EDGE (e, ei, bb->preds)
105 if (!flow_bb_inside_loop_p (loop, e->src)
106 && (!rename_from_outer_loop || e->src != outer_loop->header))
107 continue;
108 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
109 gsi_next (&gsi))
110 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
115 struct adjust_info
117 tree from, to;
118 basic_block bb;
121 /* A stack of values to be adjusted in debug stmts. We have to
122 process them LIFO, so that the closest substitution applies. If we
123 processed them FIFO, without the stack, we might substitute uses
124 with a PHI DEF that would soon become non-dominant, and when we got
125 to the suitable one, it wouldn't have anything to substitute any
126 more. */
127 static vec<adjust_info, va_heap> adjust_vec;
129 /* Adjust any debug stmts that referenced AI->from values to use the
130 loop-closed AI->to, if the references are dominated by AI->bb and
131 not by the definition of AI->from. */
133 static void
134 adjust_debug_stmts_now (adjust_info *ai)
136 basic_block bbphi = ai->bb;
137 tree orig_def = ai->from;
138 tree new_def = ai->to;
139 imm_use_iterator imm_iter;
140 gimple *stmt;
141 basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
143 gcc_assert (dom_info_available_p (CDI_DOMINATORS));
145 /* Adjust any debug stmts that held onto non-loop-closed
146 references. */
147 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
149 use_operand_p use_p;
150 basic_block bbuse;
152 if (!is_gimple_debug (stmt))
153 continue;
155 gcc_assert (gimple_debug_bind_p (stmt));
157 bbuse = gimple_bb (stmt);
159 if ((bbuse == bbphi
160 || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
161 && !(bbuse == bbdef
162 || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
164 if (new_def)
165 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
166 SET_USE (use_p, new_def);
167 else
169 gimple_debug_bind_reset_value (stmt);
170 update_stmt (stmt);
176 /* Adjust debug stmts as scheduled before. */
178 static void
179 adjust_vec_debug_stmts (void)
181 if (!MAY_HAVE_DEBUG_STMTS)
182 return;
184 gcc_assert (adjust_vec.exists ());
186 while (!adjust_vec.is_empty ())
188 adjust_debug_stmts_now (&adjust_vec.last ());
189 adjust_vec.pop ();
193 /* Adjust any debug stmts that referenced FROM values to use the
194 loop-closed TO, if the references are dominated by BB and not by
195 the definition of FROM. If adjust_vec is non-NULL, adjustments
196 will be postponed until adjust_vec_debug_stmts is called. */
198 static void
199 adjust_debug_stmts (tree from, tree to, basic_block bb)
201 adjust_info ai;
203 if (MAY_HAVE_DEBUG_STMTS
204 && TREE_CODE (from) == SSA_NAME
205 && ! SSA_NAME_IS_DEFAULT_DEF (from)
206 && ! virtual_operand_p (from))
208 ai.from = from;
209 ai.to = to;
210 ai.bb = bb;
212 if (adjust_vec.exists ())
213 adjust_vec.safe_push (ai);
214 else
215 adjust_debug_stmts_now (&ai);
219 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
220 to adjust any debug stmts that referenced the old phi arg,
221 presumably non-loop-closed references left over from other
222 transformations. */
224 static void
225 adjust_phi_and_debug_stmts (gimple *update_phi, edge e, tree new_def)
227 tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
229 SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
231 if (MAY_HAVE_DEBUG_STMTS)
232 adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
233 gimple_bb (update_phi));
236 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
237 that starts at zero, increases by one and its limit is NITERS.
239 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
241 void
242 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
244 tree indx_before_incr, indx_after_incr;
245 gcond *cond_stmt;
246 gcond *orig_cond;
247 edge exit_edge = single_exit (loop);
248 gimple_stmt_iterator loop_cond_gsi;
249 gimple_stmt_iterator incr_gsi;
250 bool insert_after;
251 tree init = build_int_cst (TREE_TYPE (niters), 0);
252 tree step = build_int_cst (TREE_TYPE (niters), 1);
253 source_location loop_loc;
254 enum tree_code code;
256 orig_cond = get_loop_exit_condition (loop);
257 gcc_assert (orig_cond);
258 loop_cond_gsi = gsi_for_stmt (orig_cond);
260 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
261 create_iv (init, step, NULL_TREE, loop,
262 &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
264 indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
265 true, NULL_TREE, true,
266 GSI_SAME_STMT);
267 niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
268 true, GSI_SAME_STMT);
270 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
271 cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
272 NULL_TREE);
274 gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
276 /* Remove old loop exit test: */
277 gsi_remove (&loop_cond_gsi, true);
278 free_stmt_vec_info (orig_cond);
280 loop_loc = find_loop_location (loop);
281 if (dump_enabled_p ())
283 if (LOCATION_LOCUS (loop_loc) != UNKNOWN_LOCATION)
284 dump_printf (MSG_NOTE, "\nloop at %s:%d: ", LOCATION_FILE (loop_loc),
285 LOCATION_LINE (loop_loc));
286 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, cond_stmt, 0);
288 loop->nb_iterations = niters;
291 /* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg.
292 For all PHI arguments in FROM->dest and TO->dest from those
293 edges ensure that TO->dest PHI arguments have current_def
294 to that in from. */
296 static void
297 slpeel_duplicate_current_defs_from_edges (edge from, edge to)
299 gimple_stmt_iterator gsi_from, gsi_to;
301 for (gsi_from = gsi_start_phis (from->dest),
302 gsi_to = gsi_start_phis (to->dest);
303 !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);)
305 gimple *from_phi = gsi_stmt (gsi_from);
306 gimple *to_phi = gsi_stmt (gsi_to);
307 tree from_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, from);
308 tree to_arg = PHI_ARG_DEF_FROM_EDGE (to_phi, to);
309 if (virtual_operand_p (from_arg))
311 gsi_next (&gsi_from);
312 continue;
314 if (virtual_operand_p (to_arg))
316 gsi_next (&gsi_to);
317 continue;
319 if (TREE_CODE (from_arg) != SSA_NAME)
320 gcc_assert (operand_equal_p (from_arg, to_arg, 0));
321 else
323 if (get_current_def (to_arg) == NULL_TREE)
324 set_current_def (to_arg, get_current_def (from_arg));
326 gsi_next (&gsi_from);
327 gsi_next (&gsi_to);
330 gphi *from_phi = get_virtual_phi (from->dest);
331 gphi *to_phi = get_virtual_phi (to->dest);
332 if (from_phi)
333 set_current_def (PHI_ARG_DEF_FROM_EDGE (to_phi, to),
334 get_current_def (PHI_ARG_DEF_FROM_EDGE (from_phi, from)));
338 /* Given LOOP this function generates a new copy of it and puts it
339 on E which is either the entry or exit of LOOP. If SCALAR_LOOP is
340 non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
341 basic blocks from SCALAR_LOOP instead of LOOP, but to either the
342 entry or exit of LOOP. */
344 struct loop *
345 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop,
346 struct loop *scalar_loop, edge e)
348 struct loop *new_loop;
349 basic_block *new_bbs, *bbs, *pbbs;
350 bool at_exit;
351 bool was_imm_dom;
352 basic_block exit_dest;
353 edge exit, new_exit;
354 bool duplicate_outer_loop = false;
356 exit = single_exit (loop);
357 at_exit = (e == exit);
358 if (!at_exit && e != loop_preheader_edge (loop))
359 return NULL;
361 if (scalar_loop == NULL)
362 scalar_loop = loop;
364 bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
365 pbbs = bbs + 1;
366 get_loop_body_with_size (scalar_loop, pbbs, scalar_loop->num_nodes);
367 /* Allow duplication of outer loops. */
368 if (scalar_loop->inner)
369 duplicate_outer_loop = true;
370 /* Check whether duplication is possible. */
371 if (!can_copy_bbs_p (pbbs, scalar_loop->num_nodes))
373 free (bbs);
374 return NULL;
377 /* Generate new loop structure. */
378 new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
379 duplicate_subloops (scalar_loop, new_loop);
381 exit_dest = exit->dest;
382 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
383 exit_dest) == loop->header ?
384 true : false);
386 /* Also copy the pre-header, this avoids jumping through hoops to
387 duplicate the loop entry PHI arguments. Create an empty
388 pre-header unconditionally for this. */
389 basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
390 edge entry_e = single_pred_edge (preheader);
391 bbs[0] = preheader;
392 new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
394 exit = single_exit (scalar_loop);
395 copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
396 &exit, 1, &new_exit, NULL,
397 at_exit ? loop->latch : e->src, true);
398 exit = single_exit (loop);
399 basic_block new_preheader = new_bbs[0];
401 add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
403 if (scalar_loop != loop)
405 /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
406 SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop,
407 but LOOP will not. slpeel_update_phi_nodes_for_guard{1,2} expects
408 the LOOP SSA_NAMEs (on the exit edge and edge from latch to
409 header) to have current_def set, so copy them over. */
410 slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop),
411 exit);
412 slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop->latch,
414 EDGE_SUCC (loop->latch, 0));
417 if (at_exit) /* Add the loop copy at exit. */
419 if (scalar_loop != loop)
421 gphi_iterator gsi;
422 new_exit = redirect_edge_and_branch (new_exit, exit_dest);
424 for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi);
425 gsi_next (&gsi))
427 gphi *phi = gsi.phi ();
428 tree orig_arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
429 location_t orig_locus
430 = gimple_phi_arg_location_from_edge (phi, e);
432 add_phi_arg (phi, orig_arg, new_exit, orig_locus);
435 redirect_edge_and_branch_force (e, new_preheader);
436 flush_pending_stmts (e);
437 set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
438 if (was_imm_dom || duplicate_outer_loop)
439 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
441 /* And remove the non-necessary forwarder again. Keep the other
442 one so we have a proper pre-header for the loop at the exit edge. */
443 redirect_edge_pred (single_succ_edge (preheader),
444 single_pred (preheader));
445 delete_basic_block (preheader);
446 set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
447 loop_preheader_edge (scalar_loop)->src);
449 else /* Add the copy at entry. */
451 if (scalar_loop != loop)
453 /* Remove the non-necessary forwarder of scalar_loop again. */
454 redirect_edge_pred (single_succ_edge (preheader),
455 single_pred (preheader));
456 delete_basic_block (preheader);
457 set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
458 loop_preheader_edge (scalar_loop)->src);
459 preheader = split_edge (loop_preheader_edge (loop));
460 entry_e = single_pred_edge (preheader);
463 redirect_edge_and_branch_force (entry_e, new_preheader);
464 flush_pending_stmts (entry_e);
465 set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
467 redirect_edge_and_branch_force (new_exit, preheader);
468 flush_pending_stmts (new_exit);
469 set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
471 /* And remove the non-necessary forwarder again. Keep the other
472 one so we have a proper pre-header for the loop at the exit edge. */
473 redirect_edge_pred (single_succ_edge (new_preheader),
474 single_pred (new_preheader));
475 delete_basic_block (new_preheader);
476 set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
477 loop_preheader_edge (new_loop)->src);
480 for (unsigned i = 0; i < scalar_loop->num_nodes + 1; i++)
481 rename_variables_in_bb (new_bbs[i], duplicate_outer_loop);
483 if (scalar_loop != loop)
485 /* Update new_loop->header PHIs, so that on the preheader
486 edge they are the ones from loop rather than scalar_loop. */
487 gphi_iterator gsi_orig, gsi_new;
488 edge orig_e = loop_preheader_edge (loop);
489 edge new_e = loop_preheader_edge (new_loop);
491 for (gsi_orig = gsi_start_phis (loop->header),
492 gsi_new = gsi_start_phis (new_loop->header);
493 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_new);
494 gsi_next (&gsi_orig), gsi_next (&gsi_new))
496 gphi *orig_phi = gsi_orig.phi ();
497 gphi *new_phi = gsi_new.phi ();
498 tree orig_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
499 location_t orig_locus
500 = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
502 add_phi_arg (new_phi, orig_arg, new_e, orig_locus);
506 free (new_bbs);
507 free (bbs);
509 checking_verify_dominators (CDI_DOMINATORS);
511 return new_loop;
515 /* Given the condition expression COND, put it as the last statement of
516 GUARD_BB; set both edges' probability; set dominator of GUARD_TO to
517 DOM_BB; return the skip edge. GUARD_TO is the target basic block to
518 skip the loop. PROBABILITY is the skip edge's probability. */
520 static edge
521 slpeel_add_loop_guard (basic_block guard_bb, tree cond,
522 basic_block guard_to, basic_block dom_bb,
523 int probability)
525 gimple_stmt_iterator gsi;
526 edge new_e, enter_e;
527 gcond *cond_stmt;
528 gimple_seq gimplify_stmt_list = NULL;
530 enter_e = EDGE_SUCC (guard_bb, 0);
531 enter_e->flags &= ~EDGE_FALLTHRU;
532 enter_e->flags |= EDGE_FALSE_VALUE;
533 gsi = gsi_last_bb (guard_bb);
535 cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
536 NULL_TREE);
537 if (gimplify_stmt_list)
538 gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
540 cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
541 gsi = gsi_last_bb (guard_bb);
542 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
544 /* Add new edge to connect guard block to the merge/loop-exit block. */
545 new_e = make_edge (guard_bb, guard_to, EDGE_TRUE_VALUE);
547 new_e->count = guard_bb->count;
548 new_e->probability = probability;
549 new_e->count = apply_probability (enter_e->count, probability);
550 enter_e->count -= new_e->count;
551 enter_e->probability = inverse_probability (probability);
552 set_immediate_dominator (CDI_DOMINATORS, guard_to, dom_bb);
553 return new_e;
557 /* This function verifies that the following restrictions apply to LOOP:
558 (1) it consists of exactly 2 basic blocks - header, and an empty latch
559 for innermost loop and 5 basic blocks for outer-loop.
560 (2) it is single entry, single exit
561 (3) its exit condition is the last stmt in the header
562 (4) E is the entry/exit edge of LOOP.
565 bool
566 slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
568 edge exit_e = single_exit (loop);
569 edge entry_e = loop_preheader_edge (loop);
570 gcond *orig_cond = get_loop_exit_condition (loop);
571 gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
572 unsigned int num_bb = loop->inner? 5 : 2;
574 /* All loops have an outer scope; the only case loop->outer is NULL is for
575 the function itself. */
576 if (!loop_outer (loop)
577 || loop->num_nodes != num_bb
578 || !empty_block_p (loop->latch)
579 || !single_exit (loop)
580 /* Verify that new loop exit condition can be trivially modified. */
581 || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
582 || (e != exit_e && e != entry_e))
583 return false;
585 return true;
588 /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
589 in the exit bb and rename all the uses after the loop. This simplifies
590 the *guard[12] routines, which assume loop closed SSA form for all PHIs
591 (but normally loop closed SSA form doesn't require virtual PHIs to be
592 in the same form). Doing this early simplifies the checking what
593 uses should be renamed. */
595 static void
596 create_lcssa_for_virtual_phi (struct loop *loop)
598 gphi_iterator gsi;
599 edge exit_e = single_exit (loop);
601 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
602 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
604 gphi *phi = gsi.phi ();
605 for (gsi = gsi_start_phis (exit_e->dest);
606 !gsi_end_p (gsi); gsi_next (&gsi))
607 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
608 break;
609 if (gsi_end_p (gsi))
611 tree new_vop = copy_ssa_name (PHI_RESULT (phi));
612 gphi *new_phi = create_phi_node (new_vop, exit_e->dest);
613 tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
614 imm_use_iterator imm_iter;
615 gimple *stmt;
616 use_operand_p use_p;
618 add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
619 gimple_phi_set_result (new_phi, new_vop);
620 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
621 if (stmt != new_phi
622 && !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
623 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
624 SET_USE (use_p, new_vop);
626 break;
631 /* Function vect_get_loop_location.
633 Extract the location of the loop in the source code.
634 If the loop is not well formed for vectorization, an estimated
635 location is calculated.
636 Return the loop location if succeed and NULL if not. */
638 source_location
639 find_loop_location (struct loop *loop)
641 gimple *stmt = NULL;
642 basic_block bb;
643 gimple_stmt_iterator si;
645 if (!loop)
646 return UNKNOWN_LOCATION;
648 stmt = get_loop_exit_condition (loop);
650 if (stmt
651 && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
652 return gimple_location (stmt);
654 /* If we got here the loop is probably not "well formed",
655 try to estimate the loop location */
657 if (!loop->header)
658 return UNKNOWN_LOCATION;
660 bb = loop->header;
662 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
664 stmt = gsi_stmt (si);
665 if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
666 return gimple_location (stmt);
669 return UNKNOWN_LOCATION;
672 /* Return true if PHI defines an IV of the loop to be vectorized. */
674 static bool
675 iv_phi_p (gphi *phi)
677 if (virtual_operand_p (PHI_RESULT (phi)))
678 return false;
680 stmt_vec_info stmt_info = vinfo_for_stmt (phi);
681 gcc_assert (stmt_info != NULL);
682 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
683 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
684 return false;
686 return true;
689 /* Function vect_can_advance_ivs_p
691 In case the number of iterations that LOOP iterates is unknown at compile
692 time, an epilog loop will be generated, and the loop induction variables
693 (IVs) will be "advanced" to the value they are supposed to take just before
694 the epilog loop. Here we check that the access function of the loop IVs
695 and the expression that represents the loop bound are simple enough.
696 These restrictions will be relaxed in the future. */
698 bool
699 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
701 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
702 basic_block bb = loop->header;
703 gphi_iterator gsi;
705 /* Analyze phi functions of the loop header. */
707 if (dump_enabled_p ())
708 dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
709 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
711 tree evolution_part;
713 gphi *phi = gsi.phi ();
714 if (dump_enabled_p ())
716 dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
717 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
720 /* Skip virtual phi's. The data dependences that are associated with
721 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.
723 Skip reduction phis. */
724 if (!iv_phi_p (phi))
726 if (dump_enabled_p ())
727 dump_printf_loc (MSG_NOTE, vect_location,
728 "reduc or virtual phi. skip.\n");
729 continue;
732 /* Analyze the evolution function. */
734 evolution_part
735 = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
736 if (evolution_part == NULL_TREE)
738 if (dump_enabled_p ())
739 dump_printf (MSG_MISSED_OPTIMIZATION,
740 "No access function or evolution.\n");
741 return false;
744 /* FORNOW: We do not transform initial conditions of IVs
745 which evolution functions are not invariants in the loop. */
747 if (!expr_invariant_in_loop_p (loop, evolution_part))
749 if (dump_enabled_p ())
750 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
751 "evolution not invariant in loop.\n");
752 return false;
755 /* FORNOW: We do not transform initial conditions of IVs
756 which evolution functions are a polynomial of degree >= 2. */
758 if (tree_is_chrec (evolution_part))
760 if (dump_enabled_p ())
761 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
762 "evolution is chrec.\n");
763 return false;
767 return true;
771 /* Function vect_update_ivs_after_vectorizer.
773 "Advance" the induction variables of LOOP to the value they should take
774 after the execution of LOOP. This is currently necessary because the
775 vectorizer does not handle induction variables that are used after the
776 loop. Such a situation occurs when the last iterations of LOOP are
777 peeled, because:
778 1. We introduced new uses after LOOP for IVs that were not originally used
779 after LOOP: the IVs of LOOP are now used by an epilog loop.
780 2. LOOP is going to be vectorized; this means that it will iterate N/VF
781 times, whereas the loop IVs should be bumped N times.
783 Input:
784 - LOOP - a loop that is going to be vectorized. The last few iterations
785 of LOOP were peeled.
786 - NITERS - the number of iterations that LOOP executes (before it is
787 vectorized). i.e, the number of times the ivs should be bumped.
788 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
789 coming out from LOOP on which there are uses of the LOOP ivs
790 (this is the path from LOOP->exit to epilog_loop->preheader).
792 The new definitions of the ivs are placed in LOOP->exit.
793 The phi args associated with the edge UPDATE_E in the bb
794 UPDATE_E->dest are updated accordingly.
796 Assumption 1: Like the rest of the vectorizer, this function assumes
797 a single loop exit that has a single predecessor.
799 Assumption 2: The phi nodes in the LOOP header and in update_bb are
800 organized in the same order.
802 Assumption 3: The access function of the ivs is simple enough (see
803 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
805 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
806 coming out of LOOP on which the ivs of LOOP are used (this is the path
807 that leads to the epilog loop; other paths skip the epilog loop). This
808 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
809 needs to have its phis updated.
812 static void
813 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo,
814 tree niters, edge update_e)
816 gphi_iterator gsi, gsi1;
817 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
818 basic_block update_bb = update_e->dest;
819 basic_block exit_bb = single_exit (loop)->dest;
821 /* Make sure there exists a single-predecessor exit bb: */
822 gcc_assert (single_pred_p (exit_bb));
823 gcc_assert (single_succ_edge (exit_bb) == update_e);
825 for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
826 !gsi_end_p (gsi) && !gsi_end_p (gsi1);
827 gsi_next (&gsi), gsi_next (&gsi1))
829 tree init_expr;
830 tree step_expr, off;
831 tree type;
832 tree var, ni, ni_name;
833 gimple_stmt_iterator last_gsi;
835 gphi *phi = gsi.phi ();
836 gphi *phi1 = gsi1.phi ();
837 if (dump_enabled_p ())
839 dump_printf_loc (MSG_NOTE, vect_location,
840 "vect_update_ivs_after_vectorizer: phi: ");
841 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
844 /* Skip reduction and virtual phis. */
845 if (!iv_phi_p (phi))
847 if (dump_enabled_p ())
848 dump_printf_loc (MSG_NOTE, vect_location,
849 "reduc or virtual phi. skip.\n");
850 continue;
853 type = TREE_TYPE (gimple_phi_result (phi));
854 step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
855 step_expr = unshare_expr (step_expr);
857 /* FORNOW: We do not support IVs whose evolution function is a polynomial
858 of degree >= 2 or exponential. */
859 gcc_assert (!tree_is_chrec (step_expr));
861 init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
863 off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
864 fold_convert (TREE_TYPE (step_expr), niters),
865 step_expr);
866 if (POINTER_TYPE_P (type))
867 ni = fold_build_pointer_plus (init_expr, off);
868 else
869 ni = fold_build2 (PLUS_EXPR, type,
870 init_expr, fold_convert (type, off));
872 var = create_tmp_var (type, "tmp");
874 last_gsi = gsi_last_bb (exit_bb);
875 gimple_seq new_stmts = NULL;
876 ni_name = force_gimple_operand (ni, &new_stmts, false, var);
877 /* Exit_bb shouldn't be empty. */
878 if (!gsi_end_p (last_gsi))
879 gsi_insert_seq_after (&last_gsi, new_stmts, GSI_SAME_STMT);
880 else
881 gsi_insert_seq_before (&last_gsi, new_stmts, GSI_SAME_STMT);
883 /* Fix phi expressions in the successor bb. */
884 adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
888 /* Function vect_gen_prolog_loop_niters
890 Generate the number of iterations which should be peeled as prolog for the
891 loop represented by LOOP_VINFO. It is calculated as the misalignment of
892 DR - the data reference recorded in LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).
893 As a result, after the execution of this loop, the data reference DR will
894 refer to an aligned location. The following computation is generated:
896 If the misalignment of DR is known at compile time:
897 addr_mis = int mis = DR_MISALIGNMENT (dr);
898 Else, compute address misalignment in bytes:
899 addr_mis = addr & (vectype_align - 1)
901 prolog_niters = ((VF - addr_mis/elem_size)&(VF-1))/step
903 (elem_size = element type size; an element is the scalar element whose type
904 is the inner type of the vectype)
906 The computations will be emitted at the end of BB. We also compute and
907 store upper bound of the result in BOUND.
909 When the step of the data-ref in the loop is not 1 (as in interleaved data
910 and SLP), the number of iterations of the prolog must be divided by the step
911 (which is equal to the size of interleaved group).
913 The above formulas assume that VF == number of elements in the vector. This
914 may not hold when there are multiple-types in the loop.
915 In this case, for some data-references in the loop the VF does not represent
916 the number of elements that fit in the vector. Therefore, instead of VF we
917 use TYPE_VECTOR_SUBPARTS. */
919 static tree
920 vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
921 basic_block bb, int *bound)
923 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
924 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
925 tree var;
926 tree niters_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
927 gimple_seq stmts = NULL, new_stmts = NULL;
928 tree iters, iters_name;
929 gimple *dr_stmt = DR_STMT (dr);
930 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
931 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
932 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
933 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
935 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
937 int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
939 if (dump_enabled_p ())
940 dump_printf_loc (MSG_NOTE, vect_location,
941 "known peeling = %d.\n", npeel);
943 iters = build_int_cst (niters_type, npeel);
944 *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) + 1;
946 else
948 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
949 tree offset = negative
950 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
951 tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
952 &stmts, offset, loop);
953 tree type = unsigned_type_for (TREE_TYPE (start_addr));
954 tree vectype_align_minus_1 = build_int_cst (type, vectype_align - 1);
955 HOST_WIDE_INT elem_size =
956 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
957 tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
958 tree nelements_minus_1 = build_int_cst (type, nelements - 1);
959 tree nelements_tree = build_int_cst (type, nelements);
960 tree byte_misalign;
961 tree elem_misalign;
963 /* Create: byte_misalign = addr & (vectype_align - 1) */
964 byte_misalign =
965 fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
966 vectype_align_minus_1);
968 /* Create: elem_misalign = byte_misalign / element_size */
969 elem_misalign =
970 fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
972 /* Create: (niters_type) (nelements - elem_misalign)&(nelements - 1) */
973 if (negative)
974 iters = fold_build2 (MINUS_EXPR, type, elem_misalign, nelements_tree);
975 else
976 iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
977 iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
978 iters = fold_convert (niters_type, iters);
979 *bound = nelements;
982 if (dump_enabled_p ())
984 dump_printf_loc (MSG_NOTE, vect_location,
985 "niters for prolog loop: ");
986 dump_generic_expr (MSG_NOTE, TDF_SLIM, iters);
987 dump_printf (MSG_NOTE, "\n");
990 var = create_tmp_var (niters_type, "prolog_loop_niters");
991 iters_name = force_gimple_operand (iters, &new_stmts, false, var);
993 if (new_stmts)
994 gimple_seq_add_seq (&stmts, new_stmts);
995 if (stmts)
997 gcc_assert (single_succ_p (bb));
998 gimple_stmt_iterator gsi = gsi_last_bb (bb);
999 if (gsi_end_p (gsi))
1000 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1001 else
1002 gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
1004 return iters_name;
1008 /* Function vect_update_init_of_dr
1010 NITERS iterations were peeled from LOOP. DR represents a data reference
1011 in LOOP. This function updates the information recorded in DR to
1012 account for the fact that the first NITERS iterations had already been
1013 executed. Specifically, it updates the OFFSET field of DR. */
1015 static void
1016 vect_update_init_of_dr (struct data_reference *dr, tree niters)
1018 tree offset = DR_OFFSET (dr);
1020 niters = fold_build2 (MULT_EXPR, sizetype,
1021 fold_convert (sizetype, niters),
1022 fold_convert (sizetype, DR_STEP (dr)));
1023 offset = fold_build2 (PLUS_EXPR, sizetype,
1024 fold_convert (sizetype, offset), niters);
1025 DR_OFFSET (dr) = offset;
1029 /* Function vect_update_inits_of_drs
1031 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1032 This function updates the information recorded for the data references in
1033 the loop to account for the fact that the first NITERS iterations had
1034 already been executed. Specifically, it updates the initial_condition of
1035 the access_function of all the data_references in the loop. */
1037 static void
1038 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
1040 unsigned int i;
1041 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1042 struct data_reference *dr;
1044 if (dump_enabled_p ())
1045 dump_printf_loc (MSG_NOTE, vect_location,
1046 "=== vect_update_inits_of_dr ===\n");
1048 /* Adjust niters to sizetype and insert stmts on loop preheader edge. */
1049 if (!types_compatible_p (sizetype, TREE_TYPE (niters)))
1051 gimple_seq seq;
1052 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1053 tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
1055 niters = fold_convert (sizetype, niters);
1056 niters = force_gimple_operand (niters, &seq, false, var);
1057 if (seq)
1059 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1060 gcc_assert (!new_bb);
1064 FOR_EACH_VEC_ELT (datarefs, i, dr)
1065 vect_update_init_of_dr (dr, niters);
1069 /* This function builds ni_name = number of iterations. Statements
1070 are emitted on the loop preheader edge. */
1072 tree
1073 vect_build_loop_niters (loop_vec_info loop_vinfo)
1075 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1076 if (TREE_CODE (ni) == INTEGER_CST)
1077 return ni;
1078 else
1080 tree ni_name, var;
1081 gimple_seq stmts = NULL;
1082 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1084 var = create_tmp_var (TREE_TYPE (ni), "niters");
1085 ni_name = force_gimple_operand (ni, &stmts, false, var);
1086 if (stmts)
1087 gsi_insert_seq_on_edge_immediate (pe, stmts);
1089 return ni_name;
1093 /* Calculate the number of iterations under which scalar loop will be
1094 preferred than vectorized loop. NITERS_PROLOG is the number of
1095 iterations of prolog loop. If it's integer const, the integer
1096 number is also passed by INT_NITERS_PROLOG. VF is vector factor;
1097 TH is the threshold for vectorized loop if CHECK_PROFITABILITY is
1098 true. This function also store upper bound of the result in BOUND. */
1100 static tree
1101 vect_gen_scalar_loop_niters (tree niters_prolog, int int_niters_prolog,
1102 int bound_prolog, int vf, int th, int *bound,
1103 bool check_profitability)
1105 tree type = TREE_TYPE (niters_prolog);
1106 tree niters = fold_build2 (PLUS_EXPR, type, niters_prolog,
1107 build_int_cst (type, vf));
1109 *bound = vf + bound_prolog;
1110 if (check_profitability)
1112 th++;
1113 /* Peeling for constant times. */
1114 if (int_niters_prolog >= 0)
1116 *bound = (int_niters_prolog + vf < th
1117 ? th
1118 : vf + int_niters_prolog);
1119 return build_int_cst (type, *bound);
1121 /* Peeling for unknown times, in this case, prolog loop must
1122 execute less than bound_prolog times. */
1123 if (th >= vf + bound_prolog - 1)
1125 *bound = th;
1126 return build_int_cst (type, th);
1128 /* Need to do runtime comparison, but bound remains the same. */
1129 else if (th > vf)
1130 return fold_build2 (MAX_EXPR, type, build_int_cst (type, th), niters);
1132 return niters;
1135 /* This function generates the following statements:
1137 niters = number of iterations loop executes (after peeling)
1138 niters_vector = niters / vf
1140 and places them on the loop preheader edge. NITERS_NO_OVERFLOW is
1141 true if NITERS doesn't overflow. */
1143 void
1144 vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters,
1145 tree *niters_vector_ptr, bool niters_no_overflow)
1147 tree ni_minus_gap, var;
1148 tree niters_vector;
1149 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1150 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1151 tree log_vf = build_int_cst (TREE_TYPE (niters), exact_log2 (vf));
1153 /* If epilogue loop is required because of data accesses with gaps, we
1154 subtract one iteration from the total number of iterations here for
1155 correct calculation of RATIO. */
1156 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1158 ni_minus_gap = fold_build2 (MINUS_EXPR, TREE_TYPE (niters),
1159 niters,
1160 build_one_cst (TREE_TYPE (niters)));
1161 if (!is_gimple_val (ni_minus_gap))
1163 var = create_tmp_var (TREE_TYPE (niters), "ni_gap");
1164 gimple *stmts = NULL;
1165 ni_minus_gap = force_gimple_operand (ni_minus_gap, &stmts,
1166 true, var);
1167 gsi_insert_seq_on_edge_immediate (pe, stmts);
1170 else
1171 ni_minus_gap = niters;
1173 /* Create: niters >> log2(vf) */
1174 /* If it's known that niters == number of latch executions + 1 doesn't
1175 overflow, we can generate niters >> log2(vf); otherwise we generate
1176 (niters - vf) >> log2(vf) + 1 by using the fact that we know ratio
1177 will be at least one. */
1178 if (niters_no_overflow)
1179 niters_vector = fold_build2 (RSHIFT_EXPR, TREE_TYPE (niters),
1180 ni_minus_gap, log_vf);
1181 else
1182 niters_vector
1183 = fold_build2 (PLUS_EXPR, TREE_TYPE (niters),
1184 fold_build2 (RSHIFT_EXPR, TREE_TYPE (niters),
1185 fold_build2 (MINUS_EXPR, TREE_TYPE (niters),
1186 ni_minus_gap,
1187 build_int_cst
1188 (TREE_TYPE (niters), vf)),
1189 log_vf),
1190 build_int_cst (TREE_TYPE (niters), 1));
1192 if (!is_gimple_val (niters_vector))
1194 var = create_tmp_var (TREE_TYPE (niters), "bnd");
1195 gimple *stmts = NULL;
1196 niters_vector = force_gimple_operand (niters_vector, &stmts, true, var);
1197 gsi_insert_seq_on_edge_immediate (pe, stmts);
1199 *niters_vector_ptr = niters_vector;
1201 return;
1204 /* Given NITERS_VECTOR which is the number of iterations for vectorized
1205 loop specified by LOOP_VINFO after vectorization, compute the number
1206 of iterations before vectorization (niters_vector * vf) and store it
1207 to NITERS_VECTOR_MULT_VF_PTR. */
1209 static void
1210 vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo,
1211 tree niters_vector,
1212 tree *niters_vector_mult_vf_ptr)
1214 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1215 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1216 tree type = TREE_TYPE (niters_vector);
1217 tree log_vf = build_int_cst (type, exact_log2 (vf));
1218 basic_block exit_bb = single_exit (loop)->dest;
1220 gcc_assert (niters_vector_mult_vf_ptr != NULL);
1221 tree niters_vector_mult_vf = fold_build2 (LSHIFT_EXPR, type,
1222 niters_vector, log_vf);
1223 if (!is_gimple_val (niters_vector_mult_vf))
1225 tree var = create_tmp_var (type, "niters_vector_mult_vf");
1226 gimple_seq stmts = NULL;
1227 niters_vector_mult_vf = force_gimple_operand (niters_vector_mult_vf,
1228 &stmts, true, var);
1229 gimple_stmt_iterator gsi = gsi_start_bb (exit_bb);
1230 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1232 *niters_vector_mult_vf_ptr = niters_vector_mult_vf;
1235 /* Function slpeel_tree_duplicate_loop_to_edge_cfg duplciates FIRST/SECOND
1236 from SECOND/FIRST and puts it at the original loop's preheader/exit
1237 edge, the two loops are arranged as below:
1239 preheader_a:
1240 first_loop:
1241 header_a:
1242 i_1 = PHI<i_0, i_2>;
1244 i_2 = i_1 + 1;
1245 if (cond_a)
1246 goto latch_a;
1247 else
1248 goto between_bb;
1249 latch_a:
1250 goto header_a;
1252 between_bb:
1253 ;; i_x = PHI<i_2>; ;; LCSSA phi node to be created for FIRST,
1255 second_loop:
1256 header_b:
1257 i_3 = PHI<i_0, i_4>; ;; Use of i_0 to be replaced with i_x,
1258 or with i_2 if no LCSSA phi is created
1259 under condition of CREATE_LCSSA_FOR_IV_PHIS.
1261 i_4 = i_3 + 1;
1262 if (cond_b)
1263 goto latch_b;
1264 else
1265 goto exit_bb;
1266 latch_b:
1267 goto header_b;
1269 exit_bb:
1271 This function creates loop closed SSA for the first loop; update the
1272 second loop's PHI nodes by replacing argument on incoming edge with the
1273 result of newly created lcssa PHI nodes. IF CREATE_LCSSA_FOR_IV_PHIS
1274 is false, Loop closed ssa phis will only be created for non-iv phis for
1275 the first loop.
1277 This function assumes exit bb of the first loop is preheader bb of the
1278 second loop, i.e, between_bb in the example code. With PHIs updated,
1279 the second loop will execute rest iterations of the first. */
1281 static void
1282 slpeel_update_phi_nodes_for_loops (loop_vec_info loop_vinfo,
1283 struct loop *first, struct loop *second,
1284 bool create_lcssa_for_iv_phis)
1286 gphi_iterator gsi_update, gsi_orig;
1287 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1289 edge first_latch_e = EDGE_SUCC (first->latch, 0);
1290 edge second_preheader_e = loop_preheader_edge (second);
1291 basic_block between_bb = single_exit (first)->dest;
1293 gcc_assert (between_bb == second_preheader_e->src);
1294 gcc_assert (single_pred_p (between_bb) && single_succ_p (between_bb));
1295 /* Either the first loop or the second is the loop to be vectorized. */
1296 gcc_assert (loop == first || loop == second);
1298 for (gsi_orig = gsi_start_phis (first->header),
1299 gsi_update = gsi_start_phis (second->header);
1300 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
1301 gsi_next (&gsi_orig), gsi_next (&gsi_update))
1303 gphi *orig_phi = gsi_orig.phi ();
1304 gphi *update_phi = gsi_update.phi ();
1306 tree arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, first_latch_e);
1307 /* Generate lcssa PHI node for the first loop. */
1308 gphi *vect_phi = (loop == first) ? orig_phi : update_phi;
1309 if (create_lcssa_for_iv_phis || !iv_phi_p (vect_phi))
1311 tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
1312 gphi *lcssa_phi = create_phi_node (new_res, between_bb);
1313 add_phi_arg (lcssa_phi, arg, single_exit (first), UNKNOWN_LOCATION);
1314 arg = new_res;
1317 /* Update PHI node in the second loop by replacing arg on the loop's
1318 incoming edge. */
1319 adjust_phi_and_debug_stmts (update_phi, second_preheader_e, arg);
1323 /* Function slpeel_add_loop_guard adds guard skipping from the beginning
1324 of SKIP_LOOP to the beginning of UPDATE_LOOP. GUARD_EDGE and MERGE_EDGE
1325 are two pred edges of the merge point before UPDATE_LOOP. The two loops
1326 appear like below:
1328 guard_bb:
1329 if (cond)
1330 goto merge_bb;
1331 else
1332 goto skip_loop;
1334 skip_loop:
1335 header_a:
1336 i_1 = PHI<i_0, i_2>;
1338 i_2 = i_1 + 1;
1339 if (cond_a)
1340 goto latch_a;
1341 else
1342 goto exit_a;
1343 latch_a:
1344 goto header_a;
1346 exit_a:
1347 i_5 = PHI<i_2>;
1349 merge_bb:
1350 ;; PHI (i_x = PHI<i_0, i_5>) to be created at merge point.
1352 update_loop:
1353 header_b:
1354 i_3 = PHI<i_5, i_4>; ;; Use of i_5 to be replaced with i_x.
1356 i_4 = i_3 + 1;
1357 if (cond_b)
1358 goto latch_b;
1359 else
1360 goto exit_bb;
1361 latch_b:
1362 goto header_b;
1364 exit_bb:
1366 This function creates PHI nodes at merge_bb and replaces the use of i_5
1367 in the update_loop's PHI node with the result of new PHI result. */
1369 static void
1370 slpeel_update_phi_nodes_for_guard1 (struct loop *skip_loop,
1371 struct loop *update_loop,
1372 edge guard_edge, edge merge_edge)
1374 source_location merge_loc, guard_loc;
1375 edge orig_e = loop_preheader_edge (skip_loop);
1376 edge update_e = loop_preheader_edge (update_loop);
1377 gphi_iterator gsi_orig, gsi_update;
1379 for ((gsi_orig = gsi_start_phis (skip_loop->header),
1380 gsi_update = gsi_start_phis (update_loop->header));
1381 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
1382 gsi_next (&gsi_orig), gsi_next (&gsi_update))
1384 gphi *orig_phi = gsi_orig.phi ();
1385 gphi *update_phi = gsi_update.phi ();
1387 /* Generate new phi node at merge bb of the guard. */
1388 tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
1389 gphi *new_phi = create_phi_node (new_res, guard_edge->dest);
1391 /* Merge bb has two incoming edges: GUARD_EDGE and MERGE_EDGE. Set the
1392 args in NEW_PHI for these edges. */
1393 tree merge_arg = PHI_ARG_DEF_FROM_EDGE (update_phi, update_e);
1394 tree guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
1395 merge_loc = gimple_phi_arg_location_from_edge (update_phi, update_e);
1396 guard_loc = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
1397 add_phi_arg (new_phi, merge_arg, merge_edge, merge_loc);
1398 add_phi_arg (new_phi, guard_arg, guard_edge, guard_loc);
1400 /* Update phi in UPDATE_PHI. */
1401 adjust_phi_and_debug_stmts (update_phi, update_e, new_res);
1405 /* LCSSA_PHI is a lcssa phi of EPILOG loop which is copied from LOOP,
1406 this function searches for the corresponding lcssa phi node in exit
1407 bb of LOOP. If it is found, return the phi result; otherwise return
1408 NULL. */
1410 static tree
1411 find_guard_arg (struct loop *loop, struct loop *epilog ATTRIBUTE_UNUSED,
1412 gphi *lcssa_phi)
1414 gphi_iterator gsi;
1415 edge e = single_exit (loop);
1417 gcc_assert (single_pred_p (e->dest));
1418 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
1420 gphi *phi = gsi.phi ();
1421 if (operand_equal_p (PHI_ARG_DEF (phi, 0),
1422 PHI_ARG_DEF (lcssa_phi, 0), 0))
1423 return PHI_RESULT (phi);
1425 return NULL_TREE;
1428 /* LOOP and EPILOG are two consecutive loops in CFG and EPILOG is copied
1429 from LOOP. Function slpeel_add_loop_guard adds guard skipping from a
1430 point between the two loops to the end of EPILOG. Edges GUARD_EDGE
1431 and MERGE_EDGE are the two pred edges of merge_bb at the end of EPILOG.
1432 The CFG looks like:
1434 loop:
1435 header_a:
1436 i_1 = PHI<i_0, i_2>;
1438 i_2 = i_1 + 1;
1439 if (cond_a)
1440 goto latch_a;
1441 else
1442 goto exit_a;
1443 latch_a:
1444 goto header_a;
1446 exit_a:
1448 guard_bb:
1449 if (cond)
1450 goto merge_bb;
1451 else
1452 goto epilog_loop;
1454 ;; fall_through_bb
1456 epilog_loop:
1457 header_b:
1458 i_3 = PHI<i_2, i_4>;
1460 i_4 = i_3 + 1;
1461 if (cond_b)
1462 goto latch_b;
1463 else
1464 goto merge_bb;
1465 latch_b:
1466 goto header_b;
1468 merge_bb:
1469 ; PHI node (i_y = PHI<i_2, i_4>) to be created at merge point.
1471 exit_bb:
1472 i_x = PHI<i_4>; ;Use of i_4 to be replaced with i_y in merge_bb.
1474 For each name used out side EPILOG (i.e - for each name that has a lcssa
1475 phi in exit_bb) we create a new PHI in merge_bb. The new PHI has two
1476 args corresponding to GUARD_EDGE and MERGE_EDGE. Arg for MERGE_EDGE is
1477 the arg of the original PHI in exit_bb, arg for GUARD_EDGE is defined
1478 by LOOP and is found in the exit bb of LOOP. Arg of the original PHI
1479 in exit_bb will also be updated. */
1481 static void
1482 slpeel_update_phi_nodes_for_guard2 (struct loop *loop, struct loop *epilog,
1483 edge guard_edge, edge merge_edge)
1485 gphi_iterator gsi;
1486 basic_block merge_bb = guard_edge->dest;
1488 gcc_assert (single_succ_p (merge_bb));
1489 edge e = single_succ_edge (merge_bb);
1490 basic_block exit_bb = e->dest;
1491 gcc_assert (single_pred_p (exit_bb));
1492 gcc_assert (single_pred (exit_bb) == single_exit (epilog)->dest);
1494 for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1496 gphi *update_phi = gsi.phi ();
1497 tree old_arg = PHI_ARG_DEF (update_phi, 0);
1498 /* This loop-closed-phi actually doesn't represent a use out of the
1499 loop - the phi arg is a constant. */
1500 if (TREE_CODE (old_arg) != SSA_NAME)
1501 continue;
1503 tree merge_arg = get_current_def (old_arg);
1504 if (!merge_arg)
1505 merge_arg = old_arg;
1507 tree guard_arg = find_guard_arg (loop, epilog, update_phi);
1508 /* If the var is live after loop but not a reduction, we simply
1509 use the old arg. */
1510 if (!guard_arg)
1511 guard_arg = old_arg;
1513 /* Create new phi node in MERGE_BB: */
1514 tree new_res = copy_ssa_name (PHI_RESULT (update_phi));
1515 gphi *merge_phi = create_phi_node (new_res, merge_bb);
1517 /* MERGE_BB has two incoming edges: GUARD_EDGE and MERGE_EDGE, Set
1518 the two PHI args in merge_phi for these edges. */
1519 add_phi_arg (merge_phi, merge_arg, merge_edge, UNKNOWN_LOCATION);
1520 add_phi_arg (merge_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
1522 /* Update the original phi in exit_bb. */
1523 adjust_phi_and_debug_stmts (update_phi, e, new_res);
1527 /* EPILOG loop is duplicated from the original loop for vectorizing,
1528 the arg of its loop closed ssa PHI needs to be updated. */
1530 static void
1531 slpeel_update_phi_nodes_for_lcssa (struct loop *epilog)
1533 gphi_iterator gsi;
1534 basic_block exit_bb = single_exit (epilog)->dest;
1536 gcc_assert (single_pred_p (exit_bb));
1537 edge e = EDGE_PRED (exit_bb, 0);
1538 for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1539 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
1542 /* Function vect_do_peeling.
1544 Input:
1545 - LOOP_VINFO: Represent a loop to be vectorized, which looks like:
1547 preheader:
1548 LOOP:
1549 header_bb:
1550 loop_body
1551 if (exit_loop_cond) goto exit_bb
1552 else goto header_bb
1553 exit_bb:
1555 - NITERS: The number of iterations of the loop.
1556 - NITERSM1: The number of iterations of the loop's latch.
1557 - NITERS_NO_OVERFLOW: No overflow in computing NITERS.
1558 - TH, CHECK_PROFITABILITY: Threshold of niters to vectorize loop if
1559 CHECK_PROFITABILITY is true.
1560 Output:
1561 - NITERS_VECTOR: The number of iterations of loop after vectorization.
1563 This function peels prolog and epilog from the loop, adds guards skipping
1564 PROLOG and EPILOG for various conditions. As a result, the changed CFG
1565 would look like:
1567 guard_bb_1:
1568 if (prefer_scalar_loop) goto merge_bb_1
1569 else goto guard_bb_2
1571 guard_bb_2:
1572 if (skip_prolog) goto merge_bb_2
1573 else goto prolog_preheader
1575 prolog_preheader:
1576 PROLOG:
1577 prolog_header_bb:
1578 prolog_body
1579 if (exit_prolog_cond) goto prolog_exit_bb
1580 else goto prolog_header_bb
1581 prolog_exit_bb:
1583 merge_bb_2:
1585 vector_preheader:
1586 VECTOR LOOP:
1587 vector_header_bb:
1588 vector_body
1589 if (exit_vector_cond) goto vector_exit_bb
1590 else goto vector_header_bb
1591 vector_exit_bb:
1593 guard_bb_3:
1594 if (skip_epilog) goto merge_bb_3
1595 else goto epilog_preheader
1597 merge_bb_1:
1599 epilog_preheader:
1600 EPILOG:
1601 epilog_header_bb:
1602 epilog_body
1603 if (exit_epilog_cond) goto merge_bb_3
1604 else goto epilog_header_bb
1606 merge_bb_3:
1608 Note this function peels prolog and epilog only if it's necessary,
1609 as well as guards.
1611 TODO: Guard for prefer_scalar_loop should be emitted along with
1612 versioning conditions if loop versioning is needed. */
1614 void
1615 vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
1616 tree *niters_vector, int th, bool check_profitability,
1617 bool niters_no_overflow)
1619 edge e, guard_e;
1620 tree type = TREE_TYPE (niters), guard_cond;
1621 basic_block guard_bb, guard_to;
1622 int prob_prolog, prob_vector, prob_epilog;
1623 int bound_prolog = 0, bound_epilog = 0, bound = 0;
1624 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1625 int prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1626 bool epilog_peeling = (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)
1627 || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
1629 if (!prolog_peeling && !epilog_peeling)
1630 return;
1632 prob_vector = 9 * REG_BR_PROB_BASE / 10;
1633 if ((vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo)) == 2)
1634 vf = 3;
1635 prob_prolog = prob_epilog = (vf - 1) * REG_BR_PROB_BASE / vf;
1636 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1638 struct loop *prolog, *epilog, *loop = LOOP_VINFO_LOOP (loop_vinfo);
1639 struct loop *first_loop = loop;
1640 create_lcssa_for_virtual_phi (loop);
1641 update_ssa (TODO_update_ssa_only_virtuals);
1643 if (MAY_HAVE_DEBUG_STMTS)
1645 gcc_assert (!adjust_vec.exists ());
1646 adjust_vec.create (32);
1648 initialize_original_copy_tables ();
1650 /* Prolog loop may be skipped. */
1651 bool skip_prolog = (prolog_peeling != 0);
1652 /* Skip to epilog if scalar loop may be preferred. It's only used when
1653 we peel for epilog loop. */
1654 bool skip_vector = (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo));
1655 /* Epilog loop must be executed if the number of iterations for epilog
1656 loop is known at compile time, otherwise we need to add a check at
1657 the end of vector loop and skip to the end of epilog loop. */
1658 bool skip_epilog = (prolog_peeling < 0
1659 || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo));
1660 /* PEELING_FOR_GAPS is special because epilog loop must be executed. */
1661 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1662 skip_epilog = false;
1664 /* Record the anchor bb at which guard should be placed if scalar loop
1665 may be preferred. */
1666 basic_block anchor = loop_preheader_edge (loop)->src;
1667 if (skip_vector)
1668 split_edge (loop_preheader_edge (loop));
1670 tree niters_prolog = build_int_cst (type, 0);
1671 source_location loop_loc = find_loop_location (loop);
1672 struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
1673 if (prolog_peeling)
1675 e = loop_preheader_edge (loop);
1676 if (!slpeel_can_duplicate_loop_p (loop, e))
1678 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1679 "loop can't be duplicated to preheader edge.\n");
1680 gcc_unreachable ();
1682 /* Peel prolog and put it on preheader edge of loop. */
1683 prolog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
1684 if (!prolog)
1686 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1687 "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1688 gcc_unreachable ();
1690 slpeel_update_phi_nodes_for_loops (loop_vinfo, prolog, loop, true);
1691 first_loop = prolog;
1692 reset_original_copy_tables ();
1694 /* Generate and update the number of iterations for prolog loop. */
1695 niters_prolog = vect_gen_prolog_loop_niters (loop_vinfo, anchor,
1696 &bound_prolog);
1697 slpeel_make_loop_iterate_ntimes (prolog, niters_prolog);
1699 /* Skip the prolog loop. */
1700 if (skip_prolog)
1702 guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
1703 niters_prolog, build_int_cst (type, 0));
1704 guard_bb = loop_preheader_edge (prolog)->src;
1705 guard_to = split_edge (loop_preheader_edge (loop));
1706 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
1707 guard_to, guard_bb,
1708 inverse_probability (prob_prolog));
1709 e = EDGE_PRED (guard_to, 0);
1710 e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
1711 slpeel_update_phi_nodes_for_guard1 (prolog, loop, guard_e, e);
1712 scale_loop_profile (prolog, prob_prolog, bound_prolog);
1714 /* Update init address of DRs. */
1715 vect_update_inits_of_drs (loop_vinfo, niters_prolog);
1716 /* Update niters for vector loop. */
1717 LOOP_VINFO_NITERS (loop_vinfo)
1718 = fold_build2 (MINUS_EXPR, type, niters, niters_prolog);
1719 LOOP_VINFO_NITERSM1 (loop_vinfo)
1720 = fold_build2 (MINUS_EXPR, type,
1721 LOOP_VINFO_NITERSM1 (loop_vinfo), niters_prolog);
1722 niters = vect_build_loop_niters (loop_vinfo);
1724 /* Prolog iterates at most bound_prolog - 1 times, latch iterates
1725 at most bound_prolog - 2 times. */
1726 record_niter_bound (prolog, bound_prolog - 2, false, true);
1727 delete_update_ssa ();
1728 adjust_vec_debug_stmts ();
1729 scev_reset ();
1732 if (epilog_peeling)
1734 e = single_exit (loop);
1735 if (!slpeel_can_duplicate_loop_p (loop, e))
1737 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1738 "loop can't be duplicated to exit edge.\n");
1739 gcc_unreachable ();
1741 /* Peel epilog and put it on exit edge of loop. */
1742 epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
1743 if (!epilog)
1745 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1746 "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1747 gcc_unreachable ();
1749 slpeel_update_phi_nodes_for_loops (loop_vinfo, loop, epilog, false);
1751 /* Scalar version loop may be preferred. In this case, add guard
1752 and skip to epilog. Note this only happens when the number of
1753 iterations of loop is unknown at compile time, otherwise this
1754 won't be vectorized. */
1755 if (skip_vector)
1757 /* Guard_cond needs is based on NITERSM1 because NITERS might
1758 overflow, so here it is niters_scalar - 1 generated. In
1759 other words, both niters_scalar and bound_epilog are for
1760 scalar loop's latch. */
1761 tree t = vect_gen_scalar_loop_niters (niters_prolog, prolog_peeling,
1762 bound_prolog, vf - 1, th - 1,
1763 &bound_epilog,
1764 check_profitability);
1765 guard_cond = fold_build2 (LT_EXPR, boolean_type_node,
1766 nitersm1, t);
1767 guard_bb = anchor;
1768 guard_to = split_edge (loop_preheader_edge (epilog));
1769 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
1770 guard_to, guard_bb,
1771 inverse_probability (prob_vector));
1772 e = EDGE_PRED (guard_to, 0);
1773 e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
1774 slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e);
1775 scale_loop_profile (epilog, prob_vector, bound_epilog);
1778 tree niters_vector_mult_vf;
1779 /* If loop is peeled for non-zero constant times, now niters refers to
1780 orig_niters - prolog_peeling, it won't overflow even the orig_niters
1781 overflows. */
1782 niters_no_overflow |= (prolog_peeling > 0);
1783 vect_gen_vector_loop_niters (loop_vinfo, niters,
1784 niters_vector, niters_no_overflow);
1785 vect_gen_vector_loop_niters_mult_vf (loop_vinfo, *niters_vector,
1786 &niters_vector_mult_vf);
1787 /* Update IVs of original loop as if they were advanced by
1788 niters_vector_mult_vf steps. */
1789 gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
1790 edge update_e = skip_vector ? e : loop_preheader_edge (epilog);
1791 vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf,
1792 update_e);
1794 if (skip_epilog)
1796 guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
1797 niters, niters_vector_mult_vf);
1798 guard_bb = single_exit (loop)->dest;
1799 guard_to = split_edge (single_exit (epilog));
1800 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond, guard_to,
1801 skip_vector ? anchor : guard_bb,
1802 inverse_probability (prob_epilog));
1803 slpeel_update_phi_nodes_for_guard2 (loop, epilog, guard_e,
1804 single_exit (epilog));
1805 scale_loop_profile (epilog, prob_epilog, bound);
1807 else
1808 slpeel_update_phi_nodes_for_lcssa (epilog);
1810 bound = (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) ? vf * 2 : vf) - 2;
1811 /* We share epilog loop with scalar version loop. */
1812 bound_epilog = MAX (bound, bound_epilog - 1);
1813 record_niter_bound (epilog, bound_epilog, false, true);
1815 delete_update_ssa ();
1816 adjust_vec_debug_stmts ();
1817 scev_reset ();
1819 adjust_vec.release ();
1820 free_original_copy_tables ();
1823 /* Function vect_create_cond_for_niters_checks.
1825 Create a conditional expression that represents the run-time checks for
1826 loop's niter. The loop is guaranteed to to terminate if the run-time
1827 checks hold.
1829 Input:
1830 COND_EXPR - input conditional expression. New conditions will be chained
1831 with logical AND operation. If it is NULL, then the function
1832 is used to return the number of alias checks.
1833 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
1834 to be checked.
1836 Output:
1837 COND_EXPR - conditional expression.
1839 The returned COND_EXPR is the conditional expression to be used in the
1840 if statement that controls which version of the loop gets executed at
1841 runtime. */
1843 static void
1844 vect_create_cond_for_niters_checks (loop_vec_info loop_vinfo, tree *cond_expr)
1846 tree part_cond_expr = LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo);
1848 if (*cond_expr)
1849 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1850 *cond_expr, part_cond_expr);
1851 else
1852 *cond_expr = part_cond_expr;
1855 /* Function vect_create_cond_for_align_checks.
1857 Create a conditional expression that represents the alignment checks for
1858 all of data references (array element references) whose alignment must be
1859 checked at runtime.
1861 Input:
1862 COND_EXPR - input conditional expression. New conditions will be chained
1863 with logical AND operation.
1864 LOOP_VINFO - two fields of the loop information are used.
1865 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
1866 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
1868 Output:
1869 COND_EXPR_STMT_LIST - statements needed to construct the conditional
1870 expression.
1871 The returned value is the conditional expression to be used in the if
1872 statement that controls which version of the loop gets executed at runtime.
1874 The algorithm makes two assumptions:
1875 1) The number of bytes "n" in a vector is a power of 2.
1876 2) An address "a" is aligned if a%n is zero and that this
1877 test can be done as a&(n-1) == 0. For example, for 16
1878 byte vectors the test is a&0xf == 0. */
1880 static void
1881 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
1882 tree *cond_expr,
1883 gimple_seq *cond_expr_stmt_list)
1885 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1886 vec<gimple *> may_misalign_stmts
1887 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1888 gimple *ref_stmt;
1889 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
1890 tree mask_cst;
1891 unsigned int i;
1892 tree int_ptrsize_type;
1893 char tmp_name[20];
1894 tree or_tmp_name = NULL_TREE;
1895 tree and_tmp_name;
1896 gimple *and_stmt;
1897 tree ptrsize_zero;
1898 tree part_cond_expr;
1900 /* Check that mask is one less than a power of 2, i.e., mask is
1901 all zeros followed by all ones. */
1902 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
1904 int_ptrsize_type = signed_type_for (ptr_type_node);
1906 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
1907 of the first vector of the i'th data reference. */
1909 FOR_EACH_VEC_ELT (may_misalign_stmts, i, ref_stmt)
1911 gimple_seq new_stmt_list = NULL;
1912 tree addr_base;
1913 tree addr_tmp_name;
1914 tree new_or_tmp_name;
1915 gimple *addr_stmt, *or_stmt;
1916 stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
1917 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1918 bool negative = tree_int_cst_compare
1919 (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
1920 tree offset = negative
1921 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
1923 /* create: addr_tmp = (int)(address_of_first_vector) */
1924 addr_base =
1925 vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
1926 offset, loop);
1927 if (new_stmt_list != NULL)
1928 gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
1930 sprintf (tmp_name, "addr2int%d", i);
1931 addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
1932 addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base);
1933 gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
1935 /* The addresses are OR together. */
1937 if (or_tmp_name != NULL_TREE)
1939 /* create: or_tmp = or_tmp | addr_tmp */
1940 sprintf (tmp_name, "orptrs%d", i);
1941 new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
1942 or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
1943 or_tmp_name, addr_tmp_name);
1944 gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
1945 or_tmp_name = new_or_tmp_name;
1947 else
1948 or_tmp_name = addr_tmp_name;
1950 } /* end for i */
1952 mask_cst = build_int_cst (int_ptrsize_type, mask);
1954 /* create: and_tmp = or_tmp & mask */
1955 and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
1957 and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR,
1958 or_tmp_name, mask_cst);
1959 gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
1961 /* Make and_tmp the left operand of the conditional test against zero.
1962 if and_tmp has a nonzero bit then some address is unaligned. */
1963 ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
1964 part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
1965 and_tmp_name, ptrsize_zero);
1966 if (*cond_expr)
1967 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1968 *cond_expr, part_cond_expr);
1969 else
1970 *cond_expr = part_cond_expr;
1973 /* Given two data references and segment lengths described by DR_A and DR_B,
1974 create expression checking if the two addresses ranges intersect with
1975 each other based on index of the two addresses. This can only be done
1976 if DR_A and DR_B referring to the same (array) object and the index is
1977 the only difference. For example:
1979 DR_A DR_B
1980 data-ref arr[i] arr[j]
1981 base_object arr arr
1982 index {i_0, +, 1}_loop {j_0, +, 1}_loop
1984 The addresses and their index are like:
1986 |<- ADDR_A ->| |<- ADDR_B ->|
1987 ------------------------------------------------------->
1988 | | | | | | | | | |
1989 ------------------------------------------------------->
1990 i_0 ... i_0+4 j_0 ... j_0+4
1992 We can create expression based on index rather than address:
1994 (i_0 + 4 < j_0 || j_0 + 4 < i_0)
1996 Note evolution step of index needs to be considered in comparison. */
1998 static bool
1999 create_intersect_range_checks_index (loop_vec_info loop_vinfo, tree *cond_expr,
2000 const dr_with_seg_len& dr_a,
2001 const dr_with_seg_len& dr_b)
2003 if (integer_zerop (DR_STEP (dr_a.dr))
2004 || integer_zerop (DR_STEP (dr_b.dr))
2005 || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
2006 return false;
2008 if (!tree_fits_uhwi_p (dr_a.seg_len) || !tree_fits_uhwi_p (dr_b.seg_len))
2009 return false;
2011 if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
2012 return false;
2014 if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
2015 return false;
2017 if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
2018 return false;
2020 gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
2022 bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
2023 unsigned HOST_WIDE_INT abs_step
2024 = absu_hwi (tree_to_shwi (DR_STEP (dr_a.dr)));
2026 unsigned HOST_WIDE_INT seg_len1 = tree_to_uhwi (dr_a.seg_len);
2027 unsigned HOST_WIDE_INT seg_len2 = tree_to_uhwi (dr_b.seg_len);
2028 /* Infer the number of iterations with which the memory segment is accessed
2029 by DR. In other words, alias is checked if memory segment accessed by
2030 DR_A in some iterations intersect with memory segment accessed by DR_B
2031 in the same amount iterations.
2032 Note segnment length is a linear function of number of iterations with
2033 DR_STEP as the coefficient. */
2034 unsigned HOST_WIDE_INT niter_len1 = (seg_len1 + abs_step - 1) / abs_step;
2035 unsigned HOST_WIDE_INT niter_len2 = (seg_len2 + abs_step - 1) / abs_step;
2037 unsigned int i;
2038 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2039 for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
2041 tree access1 = DR_ACCESS_FN (dr_a.dr, i);
2042 tree access2 = DR_ACCESS_FN (dr_b.dr, i);
2043 /* Two indices must be the same if they are not scev, or not scev wrto
2044 current loop being vecorized. */
2045 if (TREE_CODE (access1) != POLYNOMIAL_CHREC
2046 || TREE_CODE (access2) != POLYNOMIAL_CHREC
2047 || CHREC_VARIABLE (access1) != (unsigned)loop->num
2048 || CHREC_VARIABLE (access2) != (unsigned)loop->num)
2050 if (operand_equal_p (access1, access2, 0))
2051 continue;
2053 return false;
2055 /* The two indices must have the same step. */
2056 if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
2057 return false;
2059 tree idx_step = CHREC_RIGHT (access1);
2060 /* Index must have const step, otherwise DR_STEP won't be constant. */
2061 gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
2062 /* Index must evaluate in the same direction as DR. */
2063 gcc_assert (!neg_step
2064 || tree_int_cst_compare (idx_step, size_zero_node) < 0);
2066 tree min1 = CHREC_LEFT (access1);
2067 tree min2 = CHREC_LEFT (access2);
2068 if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
2069 return false;
2071 /* Ideally, alias can be checked against loop's control IV, but we
2072 need to prove linear mapping between control IV and reference
2073 index. Although that should be true, we check against (array)
2074 index of data reference. Like segment length, index length is
2075 linear function of the number of iterations with index_step as
2076 the coefficient, i.e, niter_len * idx_step. */
2077 tree idx_len1 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
2078 build_int_cst (TREE_TYPE (min1),
2079 niter_len1));
2080 tree idx_len2 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
2081 build_int_cst (TREE_TYPE (min2),
2082 niter_len2));
2083 tree max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1), min1, idx_len1);
2084 tree max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2), min2, idx_len2);
2085 /* Adjust ranges for negative step. */
2086 if (neg_step)
2088 min1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1), max1, idx_step);
2089 max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1),
2090 CHREC_LEFT (access1), idx_step);
2091 min2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2), max2, idx_step);
2092 max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2),
2093 CHREC_LEFT (access2), idx_step);
2095 tree part_cond_expr
2096 = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2097 fold_build2 (LE_EXPR, boolean_type_node, max1, min2),
2098 fold_build2 (LE_EXPR, boolean_type_node, max2, min1));
2099 if (*cond_expr)
2100 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2101 *cond_expr, part_cond_expr);
2102 else
2103 *cond_expr = part_cond_expr;
2105 return true;
2108 /* Given two data references and segment lengths described by DR_A and DR_B,
2109 create expression checking if the two addresses ranges intersect with
2110 each other:
2112 ((DR_A_addr_0 + DR_A_segment_length_0) <= DR_B_addr_0)
2113 || (DR_B_addr_0 + DER_B_segment_length_0) <= DR_A_addr_0)) */
2115 static void
2116 create_intersect_range_checks (loop_vec_info loop_vinfo, tree *cond_expr,
2117 const dr_with_seg_len& dr_a,
2118 const dr_with_seg_len& dr_b)
2120 *cond_expr = NULL_TREE;
2121 if (create_intersect_range_checks_index (loop_vinfo, cond_expr, dr_a, dr_b))
2122 return;
2124 tree segment_length_a = dr_a.seg_len;
2125 tree segment_length_b = dr_b.seg_len;
2126 tree addr_base_a = DR_BASE_ADDRESS (dr_a.dr);
2127 tree addr_base_b = DR_BASE_ADDRESS (dr_b.dr);
2128 tree offset_a = DR_OFFSET (dr_a.dr), offset_b = DR_OFFSET (dr_b.dr);
2130 offset_a = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_a),
2131 offset_a, DR_INIT (dr_a.dr));
2132 offset_b = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_b),
2133 offset_b, DR_INIT (dr_b.dr));
2134 addr_base_a = fold_build_pointer_plus (addr_base_a, offset_a);
2135 addr_base_b = fold_build_pointer_plus (addr_base_b, offset_b);
2137 tree seg_a_min = addr_base_a;
2138 tree seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
2139 /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
2140 bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
2141 [a, a+12) */
2142 if (tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0)
2144 tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a.dr)));
2145 seg_a_min = fold_build_pointer_plus (seg_a_max, unit_size);
2146 seg_a_max = fold_build_pointer_plus (addr_base_a, unit_size);
2149 tree seg_b_min = addr_base_b;
2150 tree seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
2151 if (tree_int_cst_compare (DR_STEP (dr_b.dr), size_zero_node) < 0)
2153 tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b.dr)));
2154 seg_b_min = fold_build_pointer_plus (seg_b_max, unit_size);
2155 seg_b_max = fold_build_pointer_plus (addr_base_b, unit_size);
2157 *cond_expr
2158 = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2159 fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
2160 fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
2163 /* Function vect_create_cond_for_alias_checks.
2165 Create a conditional expression that represents the run-time checks for
2166 overlapping of address ranges represented by a list of data references
2167 relations passed as input.
2169 Input:
2170 COND_EXPR - input conditional expression. New conditions will be chained
2171 with logical AND operation. If it is NULL, then the function
2172 is used to return the number of alias checks.
2173 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2174 to be checked.
2176 Output:
2177 COND_EXPR - conditional expression.
2179 The returned COND_EXPR is the conditional expression to be used in the if
2180 statement that controls which version of the loop gets executed at runtime.
2183 void
2184 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
2186 vec<dr_with_seg_len_pair_t> comp_alias_ddrs =
2187 LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
2188 tree part_cond_expr;
2190 if (comp_alias_ddrs.is_empty ())
2191 return;
2193 for (size_t i = 0, s = comp_alias_ddrs.length (); i < s; ++i)
2195 const dr_with_seg_len& dr_a = comp_alias_ddrs[i].first;
2196 const dr_with_seg_len& dr_b = comp_alias_ddrs[i].second;
2198 if (dump_enabled_p ())
2200 dump_printf_loc (MSG_NOTE, vect_location,
2201 "create runtime check for data references ");
2202 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a.dr));
2203 dump_printf (MSG_NOTE, " and ");
2204 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b.dr));
2205 dump_printf (MSG_NOTE, "\n");
2208 /* Create condition expression for each pair data references. */
2209 create_intersect_range_checks (loop_vinfo, &part_cond_expr, dr_a, dr_b);
2210 if (*cond_expr)
2211 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2212 *cond_expr, part_cond_expr);
2213 else
2214 *cond_expr = part_cond_expr;
2217 if (dump_enabled_p ())
2218 dump_printf_loc (MSG_NOTE, vect_location,
2219 "created %u versioning for alias checks.\n",
2220 comp_alias_ddrs.length ());
2224 /* Function vect_loop_versioning.
2226 If the loop has data references that may or may not be aligned or/and
2227 has data reference relations whose independence was not proven then
2228 two versions of the loop need to be generated, one which is vectorized
2229 and one which isn't. A test is then generated to control which of the
2230 loops is executed. The test checks for the alignment of all of the
2231 data references that may or may not be aligned. An additional
2232 sequence of runtime tests is generated for each pairs of DDRs whose
2233 independence was not proven. The vectorized version of loop is
2234 executed only if both alias and alignment tests are passed.
2236 The test generated to check which version of loop is executed
2237 is modified to also check for profitability as indicated by the
2238 cost model threshold TH.
2240 The versioning precondition(s) are placed in *COND_EXPR and
2241 *COND_EXPR_STMT_LIST. */
2243 void
2244 vect_loop_versioning (loop_vec_info loop_vinfo,
2245 unsigned int th, bool check_profitability)
2247 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *nloop;
2248 struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
2249 basic_block condition_bb;
2250 gphi_iterator gsi;
2251 gimple_stmt_iterator cond_exp_gsi;
2252 basic_block merge_bb;
2253 basic_block new_exit_bb;
2254 edge new_exit_e, e;
2255 gphi *orig_phi, *new_phi;
2256 tree cond_expr = NULL_TREE;
2257 gimple_seq cond_expr_stmt_list = NULL;
2258 tree arg;
2259 unsigned prob = 4 * REG_BR_PROB_BASE / 5;
2260 gimple_seq gimplify_stmt_list = NULL;
2261 tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2262 bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
2263 bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
2264 bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo);
2266 if (check_profitability)
2267 cond_expr = fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
2268 build_int_cst (TREE_TYPE (scalar_loop_iters),
2269 th));
2271 if (version_niter)
2272 vect_create_cond_for_niters_checks (loop_vinfo, &cond_expr);
2274 if (cond_expr)
2275 cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
2276 is_gimple_condexpr, NULL_TREE);
2278 if (version_align)
2279 vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
2280 &cond_expr_stmt_list);
2282 if (version_alias)
2283 vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
2285 cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,
2286 is_gimple_condexpr, NULL_TREE);
2287 gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
2289 initialize_original_copy_tables ();
2290 if (scalar_loop)
2292 edge scalar_e;
2293 basic_block preheader, scalar_preheader;
2295 /* We don't want to scale SCALAR_LOOP's frequencies, we need to
2296 scale LOOP's frequencies instead. */
2297 nloop = loop_version (scalar_loop, cond_expr, &condition_bb, prob,
2298 REG_BR_PROB_BASE, REG_BR_PROB_BASE - prob, true);
2299 scale_loop_frequencies (loop, prob, REG_BR_PROB_BASE);
2300 /* CONDITION_BB was created above SCALAR_LOOP's preheader,
2301 while we need to move it above LOOP's preheader. */
2302 e = loop_preheader_edge (loop);
2303 scalar_e = loop_preheader_edge (scalar_loop);
2304 gcc_assert (empty_block_p (e->src)
2305 && single_pred_p (e->src));
2306 gcc_assert (empty_block_p (scalar_e->src)
2307 && single_pred_p (scalar_e->src));
2308 gcc_assert (single_pred_p (condition_bb));
2309 preheader = e->src;
2310 scalar_preheader = scalar_e->src;
2311 scalar_e = find_edge (condition_bb, scalar_preheader);
2312 e = single_pred_edge (preheader);
2313 redirect_edge_and_branch_force (single_pred_edge (condition_bb),
2314 scalar_preheader);
2315 redirect_edge_and_branch_force (scalar_e, preheader);
2316 redirect_edge_and_branch_force (e, condition_bb);
2317 set_immediate_dominator (CDI_DOMINATORS, condition_bb,
2318 single_pred (condition_bb));
2319 set_immediate_dominator (CDI_DOMINATORS, scalar_preheader,
2320 single_pred (scalar_preheader));
2321 set_immediate_dominator (CDI_DOMINATORS, preheader,
2322 condition_bb);
2324 else
2325 nloop = loop_version (loop, cond_expr, &condition_bb,
2326 prob, prob, REG_BR_PROB_BASE - prob, true);
2328 if (version_niter)
2330 /* The versioned loop could be infinite, we need to clear existing
2331 niter information which is copied from the original loop. */
2332 gcc_assert (loop_constraint_set_p (loop, LOOP_C_FINITE));
2333 vect_free_loop_info_assumptions (nloop);
2334 /* And set constraint LOOP_C_INFINITE for niter analyzer. */
2335 loop_constraint_set (loop, LOOP_C_INFINITE);
2338 if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
2339 && dump_enabled_p ())
2341 if (version_alias)
2342 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2343 "loop versioned for vectorization because of "
2344 "possible aliasing\n");
2345 if (version_align)
2346 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2347 "loop versioned for vectorization to enhance "
2348 "alignment\n");
2351 free_original_copy_tables ();
2353 /* Loop versioning violates an assumption we try to maintain during
2354 vectorization - that the loop exit block has a single predecessor.
2355 After versioning, the exit block of both loop versions is the same
2356 basic block (i.e. it has two predecessors). Just in order to simplify
2357 following transformations in the vectorizer, we fix this situation
2358 here by adding a new (empty) block on the exit-edge of the loop,
2359 with the proper loop-exit phis to maintain loop-closed-form.
2360 If loop versioning wasn't done from loop, but scalar_loop instead,
2361 merge_bb will have already just a single successor. */
2363 merge_bb = single_exit (loop)->dest;
2364 if (scalar_loop == NULL || EDGE_COUNT (merge_bb->preds) >= 2)
2366 gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
2367 new_exit_bb = split_edge (single_exit (loop));
2368 new_exit_e = single_exit (loop);
2369 e = EDGE_SUCC (new_exit_bb, 0);
2371 for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2373 tree new_res;
2374 orig_phi = gsi.phi ();
2375 new_res = copy_ssa_name (PHI_RESULT (orig_phi));
2376 new_phi = create_phi_node (new_res, new_exit_bb);
2377 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2378 add_phi_arg (new_phi, arg, new_exit_e,
2379 gimple_phi_arg_location_from_edge (orig_phi, e));
2380 adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
2384 /* End loop-exit-fixes after versioning. */
2386 if (cond_expr_stmt_list)
2388 cond_exp_gsi = gsi_last_bb (condition_bb);
2389 gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
2390 GSI_SAME_STMT);
2392 update_ssa (TODO_update_ssa);