install.texi: Remove references to java/libjava.
[official-gcc.git] / gcc / tree-vect-loop-manip.c
blob6bfd332a1019f7a72365b61a5aba28f4a0b3c47e
1 /* Vectorizer Specific Loop Manipulations
2 Copyright (C) 2003-2016 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "cfghooks.h"
29 #include "tree-pass.h"
30 #include "ssa.h"
31 #include "fold-const.h"
32 #include "cfganal.h"
33 #include "gimplify.h"
34 #include "gimple-iterator.h"
35 #include "gimplify-me.h"
36 #include "tree-cfg.h"
37 #include "tree-ssa-loop-manip.h"
38 #include "tree-into-ssa.h"
39 #include "tree-ssa.h"
40 #include "cfgloop.h"
41 #include "tree-scalar-evolution.h"
42 #include "tree-vectorizer.h"
43 #include "tree-ssa-loop-ivopts.h"
45 /*************************************************************************
46 Simple Loop Peeling Utilities
48 Utilities to support loop peeling for vectorization purposes.
49 *************************************************************************/
52 /* Renames the use *OP_P. */
54 static void
55 rename_use_op (use_operand_p op_p)
57 tree new_name;
59 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
60 return;
62 new_name = get_current_def (USE_FROM_PTR (op_p));
64 /* Something defined outside of the loop. */
65 if (!new_name)
66 return;
68 /* An ordinary ssa name defined in the loop. */
70 SET_USE (op_p, new_name);
74 /* Renames the variables in basic block BB. Allow renaming of PHI argumnets
75 on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
76 true. */
78 static void
79 rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop)
81 gimple *stmt;
82 use_operand_p use_p;
83 ssa_op_iter iter;
84 edge e;
85 edge_iterator ei;
86 struct loop *loop = bb->loop_father;
87 struct loop *outer_loop = NULL;
89 if (rename_from_outer_loop)
91 gcc_assert (loop);
92 outer_loop = loop_outer (loop);
95 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
96 gsi_next (&gsi))
98 stmt = gsi_stmt (gsi);
99 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
100 rename_use_op (use_p);
103 FOR_EACH_EDGE (e, ei, bb->preds)
105 if (!flow_bb_inside_loop_p (loop, e->src)
106 && (!rename_from_outer_loop || e->src != outer_loop->header))
107 continue;
108 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
109 gsi_next (&gsi))
110 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
115 struct adjust_info
117 tree from, to;
118 basic_block bb;
121 /* A stack of values to be adjusted in debug stmts. We have to
122 process them LIFO, so that the closest substitution applies. If we
123 processed them FIFO, without the stack, we might substitute uses
124 with a PHI DEF that would soon become non-dominant, and when we got
125 to the suitable one, it wouldn't have anything to substitute any
126 more. */
127 static vec<adjust_info, va_heap> adjust_vec;
129 /* Adjust any debug stmts that referenced AI->from values to use the
130 loop-closed AI->to, if the references are dominated by AI->bb and
131 not by the definition of AI->from. */
133 static void
134 adjust_debug_stmts_now (adjust_info *ai)
136 basic_block bbphi = ai->bb;
137 tree orig_def = ai->from;
138 tree new_def = ai->to;
139 imm_use_iterator imm_iter;
140 gimple *stmt;
141 basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
143 gcc_assert (dom_info_available_p (CDI_DOMINATORS));
145 /* Adjust any debug stmts that held onto non-loop-closed
146 references. */
147 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
149 use_operand_p use_p;
150 basic_block bbuse;
152 if (!is_gimple_debug (stmt))
153 continue;
155 gcc_assert (gimple_debug_bind_p (stmt));
157 bbuse = gimple_bb (stmt);
159 if ((bbuse == bbphi
160 || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
161 && !(bbuse == bbdef
162 || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
164 if (new_def)
165 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
166 SET_USE (use_p, new_def);
167 else
169 gimple_debug_bind_reset_value (stmt);
170 update_stmt (stmt);
176 /* Adjust debug stmts as scheduled before. */
178 static void
179 adjust_vec_debug_stmts (void)
181 if (!MAY_HAVE_DEBUG_STMTS)
182 return;
184 gcc_assert (adjust_vec.exists ());
186 while (!adjust_vec.is_empty ())
188 adjust_debug_stmts_now (&adjust_vec.last ());
189 adjust_vec.pop ();
193 /* Adjust any debug stmts that referenced FROM values to use the
194 loop-closed TO, if the references are dominated by BB and not by
195 the definition of FROM. If adjust_vec is non-NULL, adjustments
196 will be postponed until adjust_vec_debug_stmts is called. */
198 static void
199 adjust_debug_stmts (tree from, tree to, basic_block bb)
201 adjust_info ai;
203 if (MAY_HAVE_DEBUG_STMTS
204 && TREE_CODE (from) == SSA_NAME
205 && ! SSA_NAME_IS_DEFAULT_DEF (from)
206 && ! virtual_operand_p (from))
208 ai.from = from;
209 ai.to = to;
210 ai.bb = bb;
212 if (adjust_vec.exists ())
213 adjust_vec.safe_push (ai);
214 else
215 adjust_debug_stmts_now (&ai);
219 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
220 to adjust any debug stmts that referenced the old phi arg,
221 presumably non-loop-closed references left over from other
222 transformations. */
224 static void
225 adjust_phi_and_debug_stmts (gimple *update_phi, edge e, tree new_def)
227 tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
229 SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
231 if (MAY_HAVE_DEBUG_STMTS)
232 adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
233 gimple_bb (update_phi));
236 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
237 that starts at zero, increases by one and its limit is NITERS.
239 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
241 void
242 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
244 tree indx_before_incr, indx_after_incr;
245 gcond *cond_stmt;
246 gcond *orig_cond;
247 edge exit_edge = single_exit (loop);
248 gimple_stmt_iterator loop_cond_gsi;
249 gimple_stmt_iterator incr_gsi;
250 bool insert_after;
251 tree init = build_int_cst (TREE_TYPE (niters), 0);
252 tree step = build_int_cst (TREE_TYPE (niters), 1);
253 source_location loop_loc;
254 enum tree_code code;
256 orig_cond = get_loop_exit_condition (loop);
257 gcc_assert (orig_cond);
258 loop_cond_gsi = gsi_for_stmt (orig_cond);
260 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
261 create_iv (init, step, NULL_TREE, loop,
262 &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
264 indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
265 true, NULL_TREE, true,
266 GSI_SAME_STMT);
267 niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
268 true, GSI_SAME_STMT);
270 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
271 cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
272 NULL_TREE);
274 gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
276 /* Remove old loop exit test: */
277 gsi_remove (&loop_cond_gsi, true);
278 free_stmt_vec_info (orig_cond);
280 loop_loc = find_loop_location (loop);
281 if (dump_enabled_p ())
283 if (LOCATION_LOCUS (loop_loc) != UNKNOWN_LOCATION)
284 dump_printf (MSG_NOTE, "\nloop at %s:%d: ", LOCATION_FILE (loop_loc),
285 LOCATION_LINE (loop_loc));
286 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, cond_stmt, 0);
288 loop->nb_iterations = niters;
291 /* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg.
292 For all PHI arguments in FROM->dest and TO->dest from those
293 edges ensure that TO->dest PHI arguments have current_def
294 to that in from. */
296 static void
297 slpeel_duplicate_current_defs_from_edges (edge from, edge to)
299 gimple_stmt_iterator gsi_from, gsi_to;
301 for (gsi_from = gsi_start_phis (from->dest),
302 gsi_to = gsi_start_phis (to->dest);
303 !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);)
305 gimple *from_phi = gsi_stmt (gsi_from);
306 gimple *to_phi = gsi_stmt (gsi_to);
307 tree from_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, from);
308 tree to_arg = PHI_ARG_DEF_FROM_EDGE (to_phi, to);
309 if (virtual_operand_p (from_arg))
311 gsi_next (&gsi_from);
312 continue;
314 if (virtual_operand_p (to_arg))
316 gsi_next (&gsi_to);
317 continue;
319 if (TREE_CODE (from_arg) != SSA_NAME)
320 gcc_assert (operand_equal_p (from_arg, to_arg, 0));
321 else
323 if (get_current_def (to_arg) == NULL_TREE)
324 set_current_def (to_arg, get_current_def (from_arg));
326 gsi_next (&gsi_from);
327 gsi_next (&gsi_to);
330 gphi *from_phi = get_virtual_phi (from->dest);
331 gphi *to_phi = get_virtual_phi (to->dest);
332 if (from_phi)
333 set_current_def (PHI_ARG_DEF_FROM_EDGE (to_phi, to),
334 get_current_def (PHI_ARG_DEF_FROM_EDGE (from_phi, from)));
338 /* Given LOOP this function generates a new copy of it and puts it
339 on E which is either the entry or exit of LOOP. If SCALAR_LOOP is
340 non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
341 basic blocks from SCALAR_LOOP instead of LOOP, but to either the
342 entry or exit of LOOP. */
344 struct loop *
345 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop,
346 struct loop *scalar_loop, edge e)
348 struct loop *new_loop;
349 basic_block *new_bbs, *bbs, *pbbs;
350 bool at_exit;
351 bool was_imm_dom;
352 basic_block exit_dest;
353 edge exit, new_exit;
354 bool duplicate_outer_loop = false;
356 exit = single_exit (loop);
357 at_exit = (e == exit);
358 if (!at_exit && e != loop_preheader_edge (loop))
359 return NULL;
361 if (scalar_loop == NULL)
362 scalar_loop = loop;
364 bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
365 pbbs = bbs + 1;
366 get_loop_body_with_size (scalar_loop, pbbs, scalar_loop->num_nodes);
367 /* Allow duplication of outer loops. */
368 if (scalar_loop->inner)
369 duplicate_outer_loop = true;
370 /* Check whether duplication is possible. */
371 if (!can_copy_bbs_p (pbbs, scalar_loop->num_nodes))
373 free (bbs);
374 return NULL;
377 /* Generate new loop structure. */
378 new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
379 duplicate_subloops (scalar_loop, new_loop);
381 exit_dest = exit->dest;
382 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
383 exit_dest) == loop->header ?
384 true : false);
386 /* Also copy the pre-header, this avoids jumping through hoops to
387 duplicate the loop entry PHI arguments. Create an empty
388 pre-header unconditionally for this. */
389 basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
390 edge entry_e = single_pred_edge (preheader);
391 bbs[0] = preheader;
392 new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
394 exit = single_exit (scalar_loop);
395 copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
396 &exit, 1, &new_exit, NULL,
397 at_exit ? loop->latch : e->src, true);
398 exit = single_exit (loop);
399 basic_block new_preheader = new_bbs[0];
401 add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
403 if (scalar_loop != loop)
405 /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
406 SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop,
407 but LOOP will not. slpeel_update_phi_nodes_for_guard{1,2} expects
408 the LOOP SSA_NAMEs (on the exit edge and edge from latch to
409 header) to have current_def set, so copy them over. */
410 slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop),
411 exit);
412 slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop->latch,
414 EDGE_SUCC (loop->latch, 0));
417 if (at_exit) /* Add the loop copy at exit. */
419 if (scalar_loop != loop)
421 gphi_iterator gsi;
422 new_exit = redirect_edge_and_branch (new_exit, exit_dest);
424 for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi);
425 gsi_next (&gsi))
427 gphi *phi = gsi.phi ();
428 tree orig_arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
429 location_t orig_locus
430 = gimple_phi_arg_location_from_edge (phi, e);
432 add_phi_arg (phi, orig_arg, new_exit, orig_locus);
435 redirect_edge_and_branch_force (e, new_preheader);
436 flush_pending_stmts (e);
437 set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
438 if (was_imm_dom || duplicate_outer_loop)
439 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
441 /* And remove the non-necessary forwarder again. Keep the other
442 one so we have a proper pre-header for the loop at the exit edge. */
443 redirect_edge_pred (single_succ_edge (preheader),
444 single_pred (preheader));
445 delete_basic_block (preheader);
446 set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
447 loop_preheader_edge (scalar_loop)->src);
449 else /* Add the copy at entry. */
451 if (scalar_loop != loop)
453 /* Remove the non-necessary forwarder of scalar_loop again. */
454 redirect_edge_pred (single_succ_edge (preheader),
455 single_pred (preheader));
456 delete_basic_block (preheader);
457 set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
458 loop_preheader_edge (scalar_loop)->src);
459 preheader = split_edge (loop_preheader_edge (loop));
460 entry_e = single_pred_edge (preheader);
463 redirect_edge_and_branch_force (entry_e, new_preheader);
464 flush_pending_stmts (entry_e);
465 set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
467 redirect_edge_and_branch_force (new_exit, preheader);
468 flush_pending_stmts (new_exit);
469 set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
471 /* And remove the non-necessary forwarder again. Keep the other
472 one so we have a proper pre-header for the loop at the exit edge. */
473 redirect_edge_pred (single_succ_edge (new_preheader),
474 single_pred (new_preheader));
475 delete_basic_block (new_preheader);
476 set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
477 loop_preheader_edge (new_loop)->src);
480 for (unsigned i = 0; i < scalar_loop->num_nodes + 1; i++)
481 rename_variables_in_bb (new_bbs[i], duplicate_outer_loop);
483 if (scalar_loop != loop)
485 /* Update new_loop->header PHIs, so that on the preheader
486 edge they are the ones from loop rather than scalar_loop. */
487 gphi_iterator gsi_orig, gsi_new;
488 edge orig_e = loop_preheader_edge (loop);
489 edge new_e = loop_preheader_edge (new_loop);
491 for (gsi_orig = gsi_start_phis (loop->header),
492 gsi_new = gsi_start_phis (new_loop->header);
493 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_new);
494 gsi_next (&gsi_orig), gsi_next (&gsi_new))
496 gphi *orig_phi = gsi_orig.phi ();
497 gphi *new_phi = gsi_new.phi ();
498 tree orig_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
499 location_t orig_locus
500 = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
502 add_phi_arg (new_phi, orig_arg, new_e, orig_locus);
506 free (new_bbs);
507 free (bbs);
509 checking_verify_dominators (CDI_DOMINATORS);
511 return new_loop;
515 /* Given the condition expression COND, put it as the last statement of
516 GUARD_BB; set both edges' probability; set dominator of GUARD_TO to
517 DOM_BB; return the skip edge. GUARD_TO is the target basic block to
518 skip the loop. PROBABILITY is the skip edge's probability. */
520 static edge
521 slpeel_add_loop_guard (basic_block guard_bb, tree cond,
522 basic_block guard_to, basic_block dom_bb,
523 int probability)
525 gimple_stmt_iterator gsi;
526 edge new_e, enter_e;
527 gcond *cond_stmt;
528 gimple_seq gimplify_stmt_list = NULL;
530 enter_e = EDGE_SUCC (guard_bb, 0);
531 enter_e->flags &= ~EDGE_FALLTHRU;
532 enter_e->flags |= EDGE_FALSE_VALUE;
533 gsi = gsi_last_bb (guard_bb);
535 cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
536 NULL_TREE);
537 if (gimplify_stmt_list)
538 gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
540 cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
541 gsi = gsi_last_bb (guard_bb);
542 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
544 /* Add new edge to connect guard block to the merge/loop-exit block. */
545 new_e = make_edge (guard_bb, guard_to, EDGE_TRUE_VALUE);
547 new_e->count = guard_bb->count;
548 new_e->probability = probability;
549 new_e->count = apply_probability (enter_e->count, probability);
550 enter_e->count -= new_e->count;
551 enter_e->probability = inverse_probability (probability);
552 set_immediate_dominator (CDI_DOMINATORS, guard_to, dom_bb);
553 return new_e;
557 /* This function verifies that the following restrictions apply to LOOP:
558 (1) it consists of exactly 2 basic blocks - header, and an empty latch
559 for innermost loop and 5 basic blocks for outer-loop.
560 (2) it is single entry, single exit
561 (3) its exit condition is the last stmt in the header
562 (4) E is the entry/exit edge of LOOP.
565 bool
566 slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
568 edge exit_e = single_exit (loop);
569 edge entry_e = loop_preheader_edge (loop);
570 gcond *orig_cond = get_loop_exit_condition (loop);
571 gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
572 unsigned int num_bb = loop->inner? 5 : 2;
574 /* All loops have an outer scope; the only case loop->outer is NULL is for
575 the function itself. */
576 if (!loop_outer (loop)
577 || loop->num_nodes != num_bb
578 || !empty_block_p (loop->latch)
579 || !single_exit (loop)
580 /* Verify that new loop exit condition can be trivially modified. */
581 || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
582 || (e != exit_e && e != entry_e))
583 return false;
585 return true;
588 /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
589 in the exit bb and rename all the uses after the loop. This simplifies
590 the *guard[12] routines, which assume loop closed SSA form for all PHIs
591 (but normally loop closed SSA form doesn't require virtual PHIs to be
592 in the same form). Doing this early simplifies the checking what
593 uses should be renamed. */
595 static void
596 create_lcssa_for_virtual_phi (struct loop *loop)
598 gphi_iterator gsi;
599 edge exit_e = single_exit (loop);
601 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
602 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
604 gphi *phi = gsi.phi ();
605 for (gsi = gsi_start_phis (exit_e->dest);
606 !gsi_end_p (gsi); gsi_next (&gsi))
607 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
608 break;
609 if (gsi_end_p (gsi))
611 tree new_vop = copy_ssa_name (PHI_RESULT (phi));
612 gphi *new_phi = create_phi_node (new_vop, exit_e->dest);
613 tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
614 imm_use_iterator imm_iter;
615 gimple *stmt;
616 use_operand_p use_p;
618 add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
619 gimple_phi_set_result (new_phi, new_vop);
620 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
621 if (stmt != new_phi
622 && !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
623 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
624 SET_USE (use_p, new_vop);
626 break;
631 /* Function vect_get_loop_location.
633 Extract the location of the loop in the source code.
634 If the loop is not well formed for vectorization, an estimated
635 location is calculated.
636 Return the loop location if succeed and NULL if not. */
638 source_location
639 find_loop_location (struct loop *loop)
641 gimple *stmt = NULL;
642 basic_block bb;
643 gimple_stmt_iterator si;
645 if (!loop)
646 return UNKNOWN_LOCATION;
648 stmt = get_loop_exit_condition (loop);
650 if (stmt
651 && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
652 return gimple_location (stmt);
654 /* If we got here the loop is probably not "well formed",
655 try to estimate the loop location */
657 if (!loop->header)
658 return UNKNOWN_LOCATION;
660 bb = loop->header;
662 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
664 stmt = gsi_stmt (si);
665 if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
666 return gimple_location (stmt);
669 return UNKNOWN_LOCATION;
672 /* Return true if PHI defines an IV of the loop to be vectorized. */
674 static bool
675 iv_phi_p (gphi *phi)
677 if (virtual_operand_p (PHI_RESULT (phi)))
678 return false;
680 stmt_vec_info stmt_info = vinfo_for_stmt (phi);
681 gcc_assert (stmt_info != NULL);
682 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
683 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
684 return false;
686 return true;
689 /* Function vect_can_advance_ivs_p
691 In case the number of iterations that LOOP iterates is unknown at compile
692 time, an epilog loop will be generated, and the loop induction variables
693 (IVs) will be "advanced" to the value they are supposed to take just before
694 the epilog loop. Here we check that the access function of the loop IVs
695 and the expression that represents the loop bound are simple enough.
696 These restrictions will be relaxed in the future. */
698 bool
699 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
701 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
702 basic_block bb = loop->header;
703 gphi_iterator gsi;
705 /* Analyze phi functions of the loop header. */
707 if (dump_enabled_p ())
708 dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
709 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
711 tree evolution_part;
713 gphi *phi = gsi.phi ();
714 if (dump_enabled_p ())
716 dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
717 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
720 /* Skip virtual phi's. The data dependences that are associated with
721 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.
723 Skip reduction phis. */
724 if (!iv_phi_p (phi))
726 if (dump_enabled_p ())
727 dump_printf_loc (MSG_NOTE, vect_location,
728 "reduc or virtual phi. skip.\n");
729 continue;
732 /* Analyze the evolution function. */
734 evolution_part
735 = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
736 if (evolution_part == NULL_TREE)
738 if (dump_enabled_p ())
739 dump_printf (MSG_MISSED_OPTIMIZATION,
740 "No access function or evolution.\n");
741 return false;
744 /* FORNOW: We do not transform initial conditions of IVs
745 which evolution functions are not invariants in the loop. */
747 if (!expr_invariant_in_loop_p (loop, evolution_part))
749 if (dump_enabled_p ())
750 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
751 "evolution not invariant in loop.\n");
752 return false;
755 /* FORNOW: We do not transform initial conditions of IVs
756 which evolution functions are a polynomial of degree >= 2. */
758 if (tree_is_chrec (evolution_part))
760 if (dump_enabled_p ())
761 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
762 "evolution is chrec.\n");
763 return false;
767 return true;
771 /* Function vect_update_ivs_after_vectorizer.
773 "Advance" the induction variables of LOOP to the value they should take
774 after the execution of LOOP. This is currently necessary because the
775 vectorizer does not handle induction variables that are used after the
776 loop. Such a situation occurs when the last iterations of LOOP are
777 peeled, because:
778 1. We introduced new uses after LOOP for IVs that were not originally used
779 after LOOP: the IVs of LOOP are now used by an epilog loop.
780 2. LOOP is going to be vectorized; this means that it will iterate N/VF
781 times, whereas the loop IVs should be bumped N times.
783 Input:
784 - LOOP - a loop that is going to be vectorized. The last few iterations
785 of LOOP were peeled.
786 - NITERS - the number of iterations that LOOP executes (before it is
787 vectorized). i.e, the number of times the ivs should be bumped.
788 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
789 coming out from LOOP on which there are uses of the LOOP ivs
790 (this is the path from LOOP->exit to epilog_loop->preheader).
792 The new definitions of the ivs are placed in LOOP->exit.
793 The phi args associated with the edge UPDATE_E in the bb
794 UPDATE_E->dest are updated accordingly.
796 Assumption 1: Like the rest of the vectorizer, this function assumes
797 a single loop exit that has a single predecessor.
799 Assumption 2: The phi nodes in the LOOP header and in update_bb are
800 organized in the same order.
802 Assumption 3: The access function of the ivs is simple enough (see
803 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
805 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
806 coming out of LOOP on which the ivs of LOOP are used (this is the path
807 that leads to the epilog loop; other paths skip the epilog loop). This
808 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
809 needs to have its phis updated.
812 static void
813 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo,
814 tree niters, edge update_e)
816 gphi_iterator gsi, gsi1;
817 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
818 basic_block update_bb = update_e->dest;
819 basic_block exit_bb = single_exit (loop)->dest;
821 /* Make sure there exists a single-predecessor exit bb: */
822 gcc_assert (single_pred_p (exit_bb));
823 gcc_assert (single_succ_edge (exit_bb) == update_e);
825 for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
826 !gsi_end_p (gsi) && !gsi_end_p (gsi1);
827 gsi_next (&gsi), gsi_next (&gsi1))
829 tree init_expr;
830 tree step_expr, off;
831 tree type;
832 tree var, ni, ni_name;
833 gimple_stmt_iterator last_gsi;
835 gphi *phi = gsi.phi ();
836 gphi *phi1 = gsi1.phi ();
837 if (dump_enabled_p ())
839 dump_printf_loc (MSG_NOTE, vect_location,
840 "vect_update_ivs_after_vectorizer: phi: ");
841 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
844 /* Skip reduction and virtual phis. */
845 if (!iv_phi_p (phi))
847 if (dump_enabled_p ())
848 dump_printf_loc (MSG_NOTE, vect_location,
849 "reduc or virtual phi. skip.\n");
850 continue;
853 type = TREE_TYPE (gimple_phi_result (phi));
854 step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
855 step_expr = unshare_expr (step_expr);
857 /* FORNOW: We do not support IVs whose evolution function is a polynomial
858 of degree >= 2 or exponential. */
859 gcc_assert (!tree_is_chrec (step_expr));
861 init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
863 off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
864 fold_convert (TREE_TYPE (step_expr), niters),
865 step_expr);
866 if (POINTER_TYPE_P (type))
867 ni = fold_build_pointer_plus (init_expr, off);
868 else
869 ni = fold_build2 (PLUS_EXPR, type,
870 init_expr, fold_convert (type, off));
872 var = create_tmp_var (type, "tmp");
874 last_gsi = gsi_last_bb (exit_bb);
875 gimple_seq new_stmts = NULL;
876 ni_name = force_gimple_operand (ni, &new_stmts, false, var);
877 /* Exit_bb shouldn't be empty. */
878 if (!gsi_end_p (last_gsi))
879 gsi_insert_seq_after (&last_gsi, new_stmts, GSI_SAME_STMT);
880 else
881 gsi_insert_seq_before (&last_gsi, new_stmts, GSI_SAME_STMT);
883 /* Fix phi expressions in the successor bb. */
884 adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
888 /* Function vect_gen_prolog_loop_niters
890 Generate the number of iterations which should be peeled as prolog for the
891 loop represented by LOOP_VINFO. It is calculated as the misalignment of
892 DR - the data reference recorded in LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).
893 As a result, after the execution of this loop, the data reference DR will
894 refer to an aligned location. The following computation is generated:
896 If the misalignment of DR is known at compile time:
897 addr_mis = int mis = DR_MISALIGNMENT (dr);
898 Else, compute address misalignment in bytes:
899 addr_mis = addr & (vectype_align - 1)
901 prolog_niters = ((VF - addr_mis/elem_size)&(VF-1))/step
903 (elem_size = element type size; an element is the scalar element whose type
904 is the inner type of the vectype)
906 The computations will be emitted at the end of BB. We also compute and
907 store upper bound (included) of the result in BOUND.
909 When the step of the data-ref in the loop is not 1 (as in interleaved data
910 and SLP), the number of iterations of the prolog must be divided by the step
911 (which is equal to the size of interleaved group).
913 The above formulas assume that VF == number of elements in the vector. This
914 may not hold when there are multiple-types in the loop.
915 In this case, for some data-references in the loop the VF does not represent
916 the number of elements that fit in the vector. Therefore, instead of VF we
917 use TYPE_VECTOR_SUBPARTS. */
919 static tree
920 vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
921 basic_block bb, int *bound)
923 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
924 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
925 tree var;
926 tree niters_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
927 gimple_seq stmts = NULL, new_stmts = NULL;
928 tree iters, iters_name;
929 gimple *dr_stmt = DR_STMT (dr);
930 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
931 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
932 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
933 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
935 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
937 int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
939 if (dump_enabled_p ())
940 dump_printf_loc (MSG_NOTE, vect_location,
941 "known peeling = %d.\n", npeel);
943 iters = build_int_cst (niters_type, npeel);
944 *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
946 else
948 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
949 tree offset = negative
950 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
951 tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
952 &stmts, offset, loop);
953 tree type = unsigned_type_for (TREE_TYPE (start_addr));
954 tree vectype_align_minus_1 = build_int_cst (type, vectype_align - 1);
955 HOST_WIDE_INT elem_size =
956 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
957 tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
958 tree nelements_minus_1 = build_int_cst (type, nelements - 1);
959 tree nelements_tree = build_int_cst (type, nelements);
960 tree byte_misalign;
961 tree elem_misalign;
963 /* Create: byte_misalign = addr & (vectype_align - 1) */
964 byte_misalign =
965 fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
966 vectype_align_minus_1);
968 /* Create: elem_misalign = byte_misalign / element_size */
969 elem_misalign =
970 fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
972 /* Create: (niters_type) (nelements - elem_misalign)&(nelements - 1) */
973 if (negative)
974 iters = fold_build2 (MINUS_EXPR, type, elem_misalign, nelements_tree);
975 else
976 iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
977 iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
978 iters = fold_convert (niters_type, iters);
979 *bound = nelements - 1;
982 if (dump_enabled_p ())
984 dump_printf_loc (MSG_NOTE, vect_location,
985 "niters for prolog loop: ");
986 dump_generic_expr (MSG_NOTE, TDF_SLIM, iters);
987 dump_printf (MSG_NOTE, "\n");
990 var = create_tmp_var (niters_type, "prolog_loop_niters");
991 iters_name = force_gimple_operand (iters, &new_stmts, false, var);
993 if (new_stmts)
994 gimple_seq_add_seq (&stmts, new_stmts);
995 if (stmts)
997 gcc_assert (single_succ_p (bb));
998 gimple_stmt_iterator gsi = gsi_last_bb (bb);
999 if (gsi_end_p (gsi))
1000 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1001 else
1002 gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
1004 return iters_name;
1008 /* Function vect_update_init_of_dr
1010 NITERS iterations were peeled from LOOP. DR represents a data reference
1011 in LOOP. This function updates the information recorded in DR to
1012 account for the fact that the first NITERS iterations had already been
1013 executed. Specifically, it updates the OFFSET field of DR. */
1015 static void
1016 vect_update_init_of_dr (struct data_reference *dr, tree niters)
1018 tree offset = DR_OFFSET (dr);
1020 niters = fold_build2 (MULT_EXPR, sizetype,
1021 fold_convert (sizetype, niters),
1022 fold_convert (sizetype, DR_STEP (dr)));
1023 offset = fold_build2 (PLUS_EXPR, sizetype,
1024 fold_convert (sizetype, offset), niters);
1025 DR_OFFSET (dr) = offset;
1029 /* Function vect_update_inits_of_drs
1031 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1032 This function updates the information recorded for the data references in
1033 the loop to account for the fact that the first NITERS iterations had
1034 already been executed. Specifically, it updates the initial_condition of
1035 the access_function of all the data_references in the loop. */
1037 static void
1038 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
1040 unsigned int i;
1041 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1042 struct data_reference *dr;
1044 if (dump_enabled_p ())
1045 dump_printf_loc (MSG_NOTE, vect_location,
1046 "=== vect_update_inits_of_dr ===\n");
1048 /* Adjust niters to sizetype and insert stmts on loop preheader edge. */
1049 if (!types_compatible_p (sizetype, TREE_TYPE (niters)))
1051 gimple_seq seq;
1052 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1053 tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
1055 niters = fold_convert (sizetype, niters);
1056 niters = force_gimple_operand (niters, &seq, false, var);
1057 if (seq)
1059 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1060 gcc_assert (!new_bb);
1064 FOR_EACH_VEC_ELT (datarefs, i, dr)
1065 vect_update_init_of_dr (dr, niters);
1069 /* This function builds ni_name = number of iterations. Statements
1070 are emitted on the loop preheader edge. */
1072 tree
1073 vect_build_loop_niters (loop_vec_info loop_vinfo)
1075 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1076 if (TREE_CODE (ni) == INTEGER_CST)
1077 return ni;
1078 else
1080 tree ni_name, var;
1081 gimple_seq stmts = NULL;
1082 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1084 var = create_tmp_var (TREE_TYPE (ni), "niters");
1085 ni_name = force_gimple_operand (ni, &stmts, false, var);
1086 if (stmts)
1087 gsi_insert_seq_on_edge_immediate (pe, stmts);
1089 return ni_name;
1093 /* Calculate the number of iterations above which vectorized loop will be
1094 preferred than scalar loop. NITERS_PROLOG is the number of iterations
1095 of prolog loop. If it's integer const, the integer number is also passed
1096 in INT_NITERS_PROLOG. BOUND_PROLOG is the upper bound (included) of
1097 number of iterations of prolog loop. VFM1 is vector factor minus one.
1098 If CHECK_PROFITABILITY is true, TH is the threshold below which scalar
1099 (rather than vectorized) loop will be executed. This function stores
1100 upper bound (included) of the result in BOUND_SCALAR. */
1102 static tree
1103 vect_gen_scalar_loop_niters (tree niters_prolog, int int_niters_prolog,
1104 int bound_prolog, int vfm1, int th,
1105 int *bound_scalar, bool check_profitability)
1107 tree type = TREE_TYPE (niters_prolog);
1108 tree niters = fold_build2 (PLUS_EXPR, type, niters_prolog,
1109 build_int_cst (type, vfm1));
1111 *bound_scalar = vfm1 + bound_prolog;
1112 if (check_profitability)
1114 /* TH indicates the minimum niters of vectorized loop, while we
1115 compute the maximum niters of scalar loop. */
1116 th--;
1117 /* Peeling for constant times. */
1118 if (int_niters_prolog >= 0)
1120 *bound_scalar = (int_niters_prolog + vfm1 < th
1121 ? th
1122 : vfm1 + int_niters_prolog);
1123 return build_int_cst (type, *bound_scalar);
1125 /* Peeling for unknown times. Note BOUND_PROLOG is the upper
1126 bound (inlcuded) of niters of prolog loop. */
1127 if (th >= vfm1 + bound_prolog)
1129 *bound_scalar = th;
1130 return build_int_cst (type, th);
1132 /* Need to do runtime comparison, but BOUND_SCALAR remains the same. */
1133 else if (th > vfm1)
1134 return fold_build2 (MAX_EXPR, type, build_int_cst (type, th), niters);
1136 return niters;
1139 /* This function generates the following statements:
1141 niters = number of iterations loop executes (after peeling)
1142 niters_vector = niters / vf
1144 and places them on the loop preheader edge. NITERS_NO_OVERFLOW is
1145 true if NITERS doesn't overflow. */
1147 void
1148 vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters,
1149 tree *niters_vector_ptr, bool niters_no_overflow)
1151 tree ni_minus_gap, var;
1152 tree niters_vector;
1153 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1154 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1155 tree log_vf = build_int_cst (TREE_TYPE (niters), exact_log2 (vf));
1157 /* If epilogue loop is required because of data accesses with gaps, we
1158 subtract one iteration from the total number of iterations here for
1159 correct calculation of RATIO. */
1160 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1162 ni_minus_gap = fold_build2 (MINUS_EXPR, TREE_TYPE (niters),
1163 niters,
1164 build_one_cst (TREE_TYPE (niters)));
1165 if (!is_gimple_val (ni_minus_gap))
1167 var = create_tmp_var (TREE_TYPE (niters), "ni_gap");
1168 gimple *stmts = NULL;
1169 ni_minus_gap = force_gimple_operand (ni_minus_gap, &stmts,
1170 true, var);
1171 gsi_insert_seq_on_edge_immediate (pe, stmts);
1174 else
1175 ni_minus_gap = niters;
1177 /* Create: niters >> log2(vf) */
1178 /* If it's known that niters == number of latch executions + 1 doesn't
1179 overflow, we can generate niters >> log2(vf); otherwise we generate
1180 (niters - vf) >> log2(vf) + 1 by using the fact that we know ratio
1181 will be at least one. */
1182 if (niters_no_overflow)
1183 niters_vector = fold_build2 (RSHIFT_EXPR, TREE_TYPE (niters),
1184 ni_minus_gap, log_vf);
1185 else
1186 niters_vector
1187 = fold_build2 (PLUS_EXPR, TREE_TYPE (niters),
1188 fold_build2 (RSHIFT_EXPR, TREE_TYPE (niters),
1189 fold_build2 (MINUS_EXPR, TREE_TYPE (niters),
1190 ni_minus_gap,
1191 build_int_cst
1192 (TREE_TYPE (niters), vf)),
1193 log_vf),
1194 build_int_cst (TREE_TYPE (niters), 1));
1196 if (!is_gimple_val (niters_vector))
1198 var = create_tmp_var (TREE_TYPE (niters), "bnd");
1199 gimple *stmts = NULL;
1200 niters_vector = force_gimple_operand (niters_vector, &stmts, true, var);
1201 gsi_insert_seq_on_edge_immediate (pe, stmts);
1203 *niters_vector_ptr = niters_vector;
1205 return;
1208 /* Given NITERS_VECTOR which is the number of iterations for vectorized
1209 loop specified by LOOP_VINFO after vectorization, compute the number
1210 of iterations before vectorization (niters_vector * vf) and store it
1211 to NITERS_VECTOR_MULT_VF_PTR. */
1213 static void
1214 vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo,
1215 tree niters_vector,
1216 tree *niters_vector_mult_vf_ptr)
1218 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1219 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1220 tree type = TREE_TYPE (niters_vector);
1221 tree log_vf = build_int_cst (type, exact_log2 (vf));
1222 basic_block exit_bb = single_exit (loop)->dest;
1224 gcc_assert (niters_vector_mult_vf_ptr != NULL);
1225 tree niters_vector_mult_vf = fold_build2 (LSHIFT_EXPR, type,
1226 niters_vector, log_vf);
1227 if (!is_gimple_val (niters_vector_mult_vf))
1229 tree var = create_tmp_var (type, "niters_vector_mult_vf");
1230 gimple_seq stmts = NULL;
1231 niters_vector_mult_vf = force_gimple_operand (niters_vector_mult_vf,
1232 &stmts, true, var);
1233 gimple_stmt_iterator gsi = gsi_start_bb (exit_bb);
1234 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1236 *niters_vector_mult_vf_ptr = niters_vector_mult_vf;
1239 /* Function slpeel_tree_duplicate_loop_to_edge_cfg duplciates FIRST/SECOND
1240 from SECOND/FIRST and puts it at the original loop's preheader/exit
1241 edge, the two loops are arranged as below:
1243 preheader_a:
1244 first_loop:
1245 header_a:
1246 i_1 = PHI<i_0, i_2>;
1248 i_2 = i_1 + 1;
1249 if (cond_a)
1250 goto latch_a;
1251 else
1252 goto between_bb;
1253 latch_a:
1254 goto header_a;
1256 between_bb:
1257 ;; i_x = PHI<i_2>; ;; LCSSA phi node to be created for FIRST,
1259 second_loop:
1260 header_b:
1261 i_3 = PHI<i_0, i_4>; ;; Use of i_0 to be replaced with i_x,
1262 or with i_2 if no LCSSA phi is created
1263 under condition of CREATE_LCSSA_FOR_IV_PHIS.
1265 i_4 = i_3 + 1;
1266 if (cond_b)
1267 goto latch_b;
1268 else
1269 goto exit_bb;
1270 latch_b:
1271 goto header_b;
1273 exit_bb:
1275 This function creates loop closed SSA for the first loop; update the
1276 second loop's PHI nodes by replacing argument on incoming edge with the
1277 result of newly created lcssa PHI nodes. IF CREATE_LCSSA_FOR_IV_PHIS
1278 is false, Loop closed ssa phis will only be created for non-iv phis for
1279 the first loop.
1281 This function assumes exit bb of the first loop is preheader bb of the
1282 second loop, i.e, between_bb in the example code. With PHIs updated,
1283 the second loop will execute rest iterations of the first. */
1285 static void
1286 slpeel_update_phi_nodes_for_loops (loop_vec_info loop_vinfo,
1287 struct loop *first, struct loop *second,
1288 bool create_lcssa_for_iv_phis)
1290 gphi_iterator gsi_update, gsi_orig;
1291 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1293 edge first_latch_e = EDGE_SUCC (first->latch, 0);
1294 edge second_preheader_e = loop_preheader_edge (second);
1295 basic_block between_bb = single_exit (first)->dest;
1297 gcc_assert (between_bb == second_preheader_e->src);
1298 gcc_assert (single_pred_p (between_bb) && single_succ_p (between_bb));
1299 /* Either the first loop or the second is the loop to be vectorized. */
1300 gcc_assert (loop == first || loop == second);
1302 for (gsi_orig = gsi_start_phis (first->header),
1303 gsi_update = gsi_start_phis (second->header);
1304 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
1305 gsi_next (&gsi_orig), gsi_next (&gsi_update))
1307 gphi *orig_phi = gsi_orig.phi ();
1308 gphi *update_phi = gsi_update.phi ();
1310 tree arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, first_latch_e);
1311 /* Generate lcssa PHI node for the first loop. */
1312 gphi *vect_phi = (loop == first) ? orig_phi : update_phi;
1313 if (create_lcssa_for_iv_phis || !iv_phi_p (vect_phi))
1315 tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
1316 gphi *lcssa_phi = create_phi_node (new_res, between_bb);
1317 add_phi_arg (lcssa_phi, arg, single_exit (first), UNKNOWN_LOCATION);
1318 arg = new_res;
1321 /* Update PHI node in the second loop by replacing arg on the loop's
1322 incoming edge. */
1323 adjust_phi_and_debug_stmts (update_phi, second_preheader_e, arg);
1327 /* Function slpeel_add_loop_guard adds guard skipping from the beginning
1328 of SKIP_LOOP to the beginning of UPDATE_LOOP. GUARD_EDGE and MERGE_EDGE
1329 are two pred edges of the merge point before UPDATE_LOOP. The two loops
1330 appear like below:
1332 guard_bb:
1333 if (cond)
1334 goto merge_bb;
1335 else
1336 goto skip_loop;
1338 skip_loop:
1339 header_a:
1340 i_1 = PHI<i_0, i_2>;
1342 i_2 = i_1 + 1;
1343 if (cond_a)
1344 goto latch_a;
1345 else
1346 goto exit_a;
1347 latch_a:
1348 goto header_a;
1350 exit_a:
1351 i_5 = PHI<i_2>;
1353 merge_bb:
1354 ;; PHI (i_x = PHI<i_0, i_5>) to be created at merge point.
1356 update_loop:
1357 header_b:
1358 i_3 = PHI<i_5, i_4>; ;; Use of i_5 to be replaced with i_x.
1360 i_4 = i_3 + 1;
1361 if (cond_b)
1362 goto latch_b;
1363 else
1364 goto exit_bb;
1365 latch_b:
1366 goto header_b;
1368 exit_bb:
1370 This function creates PHI nodes at merge_bb and replaces the use of i_5
1371 in the update_loop's PHI node with the result of new PHI result. */
1373 static void
1374 slpeel_update_phi_nodes_for_guard1 (struct loop *skip_loop,
1375 struct loop *update_loop,
1376 edge guard_edge, edge merge_edge)
1378 source_location merge_loc, guard_loc;
1379 edge orig_e = loop_preheader_edge (skip_loop);
1380 edge update_e = loop_preheader_edge (update_loop);
1381 gphi_iterator gsi_orig, gsi_update;
1383 for ((gsi_orig = gsi_start_phis (skip_loop->header),
1384 gsi_update = gsi_start_phis (update_loop->header));
1385 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
1386 gsi_next (&gsi_orig), gsi_next (&gsi_update))
1388 gphi *orig_phi = gsi_orig.phi ();
1389 gphi *update_phi = gsi_update.phi ();
1391 /* Generate new phi node at merge bb of the guard. */
1392 tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
1393 gphi *new_phi = create_phi_node (new_res, guard_edge->dest);
1395 /* Merge bb has two incoming edges: GUARD_EDGE and MERGE_EDGE. Set the
1396 args in NEW_PHI for these edges. */
1397 tree merge_arg = PHI_ARG_DEF_FROM_EDGE (update_phi, update_e);
1398 tree guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
1399 merge_loc = gimple_phi_arg_location_from_edge (update_phi, update_e);
1400 guard_loc = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
1401 add_phi_arg (new_phi, merge_arg, merge_edge, merge_loc);
1402 add_phi_arg (new_phi, guard_arg, guard_edge, guard_loc);
1404 /* Update phi in UPDATE_PHI. */
1405 adjust_phi_and_debug_stmts (update_phi, update_e, new_res);
1409 /* LCSSA_PHI is a lcssa phi of EPILOG loop which is copied from LOOP,
1410 this function searches for the corresponding lcssa phi node in exit
1411 bb of LOOP. If it is found, return the phi result; otherwise return
1412 NULL. */
1414 static tree
1415 find_guard_arg (struct loop *loop, struct loop *epilog ATTRIBUTE_UNUSED,
1416 gphi *lcssa_phi)
1418 gphi_iterator gsi;
1419 edge e = single_exit (loop);
1421 gcc_assert (single_pred_p (e->dest));
1422 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
1424 gphi *phi = gsi.phi ();
1425 if (operand_equal_p (PHI_ARG_DEF (phi, 0),
1426 PHI_ARG_DEF (lcssa_phi, 0), 0))
1427 return PHI_RESULT (phi);
1429 return NULL_TREE;
1432 /* LOOP and EPILOG are two consecutive loops in CFG and EPILOG is copied
1433 from LOOP. Function slpeel_add_loop_guard adds guard skipping from a
1434 point between the two loops to the end of EPILOG. Edges GUARD_EDGE
1435 and MERGE_EDGE are the two pred edges of merge_bb at the end of EPILOG.
1436 The CFG looks like:
1438 loop:
1439 header_a:
1440 i_1 = PHI<i_0, i_2>;
1442 i_2 = i_1 + 1;
1443 if (cond_a)
1444 goto latch_a;
1445 else
1446 goto exit_a;
1447 latch_a:
1448 goto header_a;
1450 exit_a:
1452 guard_bb:
1453 if (cond)
1454 goto merge_bb;
1455 else
1456 goto epilog_loop;
1458 ;; fall_through_bb
1460 epilog_loop:
1461 header_b:
1462 i_3 = PHI<i_2, i_4>;
1464 i_4 = i_3 + 1;
1465 if (cond_b)
1466 goto latch_b;
1467 else
1468 goto merge_bb;
1469 latch_b:
1470 goto header_b;
1472 merge_bb:
1473 ; PHI node (i_y = PHI<i_2, i_4>) to be created at merge point.
1475 exit_bb:
1476 i_x = PHI<i_4>; ;Use of i_4 to be replaced with i_y in merge_bb.
1478 For each name used out side EPILOG (i.e - for each name that has a lcssa
1479 phi in exit_bb) we create a new PHI in merge_bb. The new PHI has two
1480 args corresponding to GUARD_EDGE and MERGE_EDGE. Arg for MERGE_EDGE is
1481 the arg of the original PHI in exit_bb, arg for GUARD_EDGE is defined
1482 by LOOP and is found in the exit bb of LOOP. Arg of the original PHI
1483 in exit_bb will also be updated. */
1485 static void
1486 slpeel_update_phi_nodes_for_guard2 (struct loop *loop, struct loop *epilog,
1487 edge guard_edge, edge merge_edge)
1489 gphi_iterator gsi;
1490 basic_block merge_bb = guard_edge->dest;
1492 gcc_assert (single_succ_p (merge_bb));
1493 edge e = single_succ_edge (merge_bb);
1494 basic_block exit_bb = e->dest;
1495 gcc_assert (single_pred_p (exit_bb));
1496 gcc_assert (single_pred (exit_bb) == single_exit (epilog)->dest);
1498 for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1500 gphi *update_phi = gsi.phi ();
1501 tree old_arg = PHI_ARG_DEF (update_phi, 0);
1502 /* This loop-closed-phi actually doesn't represent a use out of the
1503 loop - the phi arg is a constant. */
1504 if (TREE_CODE (old_arg) != SSA_NAME)
1505 continue;
1507 tree merge_arg = get_current_def (old_arg);
1508 if (!merge_arg)
1509 merge_arg = old_arg;
1511 tree guard_arg = find_guard_arg (loop, epilog, update_phi);
1512 /* If the var is live after loop but not a reduction, we simply
1513 use the old arg. */
1514 if (!guard_arg)
1515 guard_arg = old_arg;
1517 /* Create new phi node in MERGE_BB: */
1518 tree new_res = copy_ssa_name (PHI_RESULT (update_phi));
1519 gphi *merge_phi = create_phi_node (new_res, merge_bb);
1521 /* MERGE_BB has two incoming edges: GUARD_EDGE and MERGE_EDGE, Set
1522 the two PHI args in merge_phi for these edges. */
1523 add_phi_arg (merge_phi, merge_arg, merge_edge, UNKNOWN_LOCATION);
1524 add_phi_arg (merge_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
1526 /* Update the original phi in exit_bb. */
1527 adjust_phi_and_debug_stmts (update_phi, e, new_res);
1531 /* EPILOG loop is duplicated from the original loop for vectorizing,
1532 the arg of its loop closed ssa PHI needs to be updated. */
1534 static void
1535 slpeel_update_phi_nodes_for_lcssa (struct loop *epilog)
1537 gphi_iterator gsi;
1538 basic_block exit_bb = single_exit (epilog)->dest;
1540 gcc_assert (single_pred_p (exit_bb));
1541 edge e = EDGE_PRED (exit_bb, 0);
1542 for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1543 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
1546 /* Function vect_do_peeling.
1548 Input:
1549 - LOOP_VINFO: Represent a loop to be vectorized, which looks like:
1551 preheader:
1552 LOOP:
1553 header_bb:
1554 loop_body
1555 if (exit_loop_cond) goto exit_bb
1556 else goto header_bb
1557 exit_bb:
1559 - NITERS: The number of iterations of the loop.
1560 - NITERSM1: The number of iterations of the loop's latch.
1561 - NITERS_NO_OVERFLOW: No overflow in computing NITERS.
1562 - TH, CHECK_PROFITABILITY: Threshold of niters to vectorize loop if
1563 CHECK_PROFITABILITY is true.
1564 Output:
1565 - NITERS_VECTOR: The number of iterations of loop after vectorization.
1567 This function peels prolog and epilog from the loop, adds guards skipping
1568 PROLOG and EPILOG for various conditions. As a result, the changed CFG
1569 would look like:
1571 guard_bb_1:
1572 if (prefer_scalar_loop) goto merge_bb_1
1573 else goto guard_bb_2
1575 guard_bb_2:
1576 if (skip_prolog) goto merge_bb_2
1577 else goto prolog_preheader
1579 prolog_preheader:
1580 PROLOG:
1581 prolog_header_bb:
1582 prolog_body
1583 if (exit_prolog_cond) goto prolog_exit_bb
1584 else goto prolog_header_bb
1585 prolog_exit_bb:
1587 merge_bb_2:
1589 vector_preheader:
1590 VECTOR LOOP:
1591 vector_header_bb:
1592 vector_body
1593 if (exit_vector_cond) goto vector_exit_bb
1594 else goto vector_header_bb
1595 vector_exit_bb:
1597 guard_bb_3:
1598 if (skip_epilog) goto merge_bb_3
1599 else goto epilog_preheader
1601 merge_bb_1:
1603 epilog_preheader:
1604 EPILOG:
1605 epilog_header_bb:
1606 epilog_body
1607 if (exit_epilog_cond) goto merge_bb_3
1608 else goto epilog_header_bb
1610 merge_bb_3:
1612 Note this function peels prolog and epilog only if it's necessary,
1613 as well as guards.
1615 TODO: Guard for prefer_scalar_loop should be emitted along with
1616 versioning conditions if loop versioning is needed. */
1618 void
1619 vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
1620 tree *niters_vector, int th, bool check_profitability,
1621 bool niters_no_overflow)
1623 edge e, guard_e;
1624 tree type = TREE_TYPE (niters), guard_cond;
1625 basic_block guard_bb, guard_to;
1626 int prob_prolog, prob_vector, prob_epilog;
1627 int bound_prolog = 0, bound_scalar = 0, bound = 0;
1628 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1629 int prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1630 bool epilog_peeling = (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)
1631 || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
1633 if (!prolog_peeling && !epilog_peeling)
1634 return;
1636 prob_vector = 9 * REG_BR_PROB_BASE / 10;
1637 if ((vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo)) == 2)
1638 vf = 3;
1639 prob_prolog = prob_epilog = (vf - 1) * REG_BR_PROB_BASE / vf;
1640 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1642 struct loop *prolog, *epilog, *loop = LOOP_VINFO_LOOP (loop_vinfo);
1643 struct loop *first_loop = loop;
1644 create_lcssa_for_virtual_phi (loop);
1645 update_ssa (TODO_update_ssa_only_virtuals);
1647 if (MAY_HAVE_DEBUG_STMTS)
1649 gcc_assert (!adjust_vec.exists ());
1650 adjust_vec.create (32);
1652 initialize_original_copy_tables ();
1654 /* Prolog loop may be skipped. */
1655 bool skip_prolog = (prolog_peeling != 0);
1656 /* Skip to epilog if scalar loop may be preferred. It's only used when
1657 we peel for epilog loop. */
1658 bool skip_vector = (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo));
1659 /* Epilog loop must be executed if the number of iterations for epilog
1660 loop is known at compile time, otherwise we need to add a check at
1661 the end of vector loop and skip to the end of epilog loop. */
1662 bool skip_epilog = (prolog_peeling < 0
1663 || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo));
1664 /* PEELING_FOR_GAPS is special because epilog loop must be executed. */
1665 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1666 skip_epilog = false;
1668 /* Record the anchor bb at which guard should be placed if scalar loop
1669 may be preferred. */
1670 basic_block anchor = loop_preheader_edge (loop)->src;
1671 if (skip_vector)
1672 split_edge (loop_preheader_edge (loop));
1674 tree niters_prolog = build_int_cst (type, 0);
1675 source_location loop_loc = find_loop_location (loop);
1676 struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
1677 if (prolog_peeling)
1679 e = loop_preheader_edge (loop);
1680 if (!slpeel_can_duplicate_loop_p (loop, e))
1682 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1683 "loop can't be duplicated to preheader edge.\n");
1684 gcc_unreachable ();
1686 /* Peel prolog and put it on preheader edge of loop. */
1687 prolog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
1688 if (!prolog)
1690 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1691 "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1692 gcc_unreachable ();
1694 slpeel_update_phi_nodes_for_loops (loop_vinfo, prolog, loop, true);
1695 first_loop = prolog;
1696 reset_original_copy_tables ();
1698 /* Generate and update the number of iterations for prolog loop. */
1699 niters_prolog = vect_gen_prolog_loop_niters (loop_vinfo, anchor,
1700 &bound_prolog);
1701 slpeel_make_loop_iterate_ntimes (prolog, niters_prolog);
1703 /* Skip the prolog loop. */
1704 if (skip_prolog)
1706 guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
1707 niters_prolog, build_int_cst (type, 0));
1708 guard_bb = loop_preheader_edge (prolog)->src;
1709 guard_to = split_edge (loop_preheader_edge (loop));
1710 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
1711 guard_to, guard_bb,
1712 inverse_probability (prob_prolog));
1713 e = EDGE_PRED (guard_to, 0);
1714 e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
1715 slpeel_update_phi_nodes_for_guard1 (prolog, loop, guard_e, e);
1716 scale_loop_profile (prolog, prob_prolog, bound_prolog);
1718 /* Update init address of DRs. */
1719 vect_update_inits_of_drs (loop_vinfo, niters_prolog);
1720 /* Update niters for vector loop. */
1721 LOOP_VINFO_NITERS (loop_vinfo)
1722 = fold_build2 (MINUS_EXPR, type, niters, niters_prolog);
1723 LOOP_VINFO_NITERSM1 (loop_vinfo)
1724 = fold_build2 (MINUS_EXPR, type,
1725 LOOP_VINFO_NITERSM1 (loop_vinfo), niters_prolog);
1726 niters = vect_build_loop_niters (loop_vinfo);
1728 /* Prolog iterates at most bound_prolog times, latch iterates at
1729 most bound_prolog - 1 times. */
1730 record_niter_bound (prolog, bound_prolog - 1, false, true);
1731 delete_update_ssa ();
1732 adjust_vec_debug_stmts ();
1733 scev_reset ();
1736 if (epilog_peeling)
1738 e = single_exit (loop);
1739 if (!slpeel_can_duplicate_loop_p (loop, e))
1741 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1742 "loop can't be duplicated to exit edge.\n");
1743 gcc_unreachable ();
1745 /* Peel epilog and put it on exit edge of loop. */
1746 epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
1747 if (!epilog)
1749 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1750 "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1751 gcc_unreachable ();
1753 slpeel_update_phi_nodes_for_loops (loop_vinfo, loop, epilog, false);
1755 /* Scalar version loop may be preferred. In this case, add guard
1756 and skip to epilog. Note this only happens when the number of
1757 iterations of loop is unknown at compile time, otherwise this
1758 won't be vectorized. */
1759 if (skip_vector)
1761 /* Additional epilogue iteration is peeled if gap exists. */
1762 bool peel_for_gaps = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo);
1763 tree t = vect_gen_scalar_loop_niters (niters_prolog, prolog_peeling,
1764 bound_prolog,
1765 peel_for_gaps ? vf : vf - 1,
1766 th, &bound_scalar,
1767 check_profitability);
1768 /* Build guard against NITERSM1 since NITERS may overflow. */
1769 guard_cond = fold_build2 (LT_EXPR, boolean_type_node, nitersm1, t);
1770 guard_bb = anchor;
1771 guard_to = split_edge (loop_preheader_edge (epilog));
1772 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
1773 guard_to, guard_bb,
1774 inverse_probability (prob_vector));
1775 e = EDGE_PRED (guard_to, 0);
1776 e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
1777 slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e);
1778 scale_loop_profile (epilog, prob_vector, bound_scalar);
1781 tree niters_vector_mult_vf;
1782 /* If loop is peeled for non-zero constant times, now niters refers to
1783 orig_niters - prolog_peeling, it won't overflow even the orig_niters
1784 overflows. */
1785 niters_no_overflow |= (prolog_peeling > 0);
1786 vect_gen_vector_loop_niters (loop_vinfo, niters,
1787 niters_vector, niters_no_overflow);
1788 vect_gen_vector_loop_niters_mult_vf (loop_vinfo, *niters_vector,
1789 &niters_vector_mult_vf);
1790 /* Update IVs of original loop as if they were advanced by
1791 niters_vector_mult_vf steps. */
1792 gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
1793 edge update_e = skip_vector ? e : loop_preheader_edge (epilog);
1794 vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf,
1795 update_e);
1797 if (skip_epilog)
1799 guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
1800 niters, niters_vector_mult_vf);
1801 guard_bb = single_exit (loop)->dest;
1802 guard_to = split_edge (single_exit (epilog));
1803 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond, guard_to,
1804 skip_vector ? anchor : guard_bb,
1805 inverse_probability (prob_epilog));
1806 slpeel_update_phi_nodes_for_guard2 (loop, epilog, guard_e,
1807 single_exit (epilog));
1808 scale_loop_profile (epilog, prob_epilog, bound);
1810 else
1811 slpeel_update_phi_nodes_for_lcssa (epilog);
1813 bound = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) ? vf - 1 : vf - 2;
1814 /* We share epilog loop with scalar version loop. */
1815 bound = MAX (bound, bound_scalar - 1);
1816 record_niter_bound (epilog, bound, false, true);
1818 delete_update_ssa ();
1819 adjust_vec_debug_stmts ();
1820 scev_reset ();
1822 adjust_vec.release ();
1823 free_original_copy_tables ();
1826 /* Function vect_create_cond_for_niters_checks.
1828 Create a conditional expression that represents the run-time checks for
1829 loop's niter. The loop is guaranteed to to terminate if the run-time
1830 checks hold.
1832 Input:
1833 COND_EXPR - input conditional expression. New conditions will be chained
1834 with logical AND operation. If it is NULL, then the function
1835 is used to return the number of alias checks.
1836 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
1837 to be checked.
1839 Output:
1840 COND_EXPR - conditional expression.
1842 The returned COND_EXPR is the conditional expression to be used in the
1843 if statement that controls which version of the loop gets executed at
1844 runtime. */
1846 static void
1847 vect_create_cond_for_niters_checks (loop_vec_info loop_vinfo, tree *cond_expr)
1849 tree part_cond_expr = LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo);
1851 if (*cond_expr)
1852 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1853 *cond_expr, part_cond_expr);
1854 else
1855 *cond_expr = part_cond_expr;
1858 /* Function vect_create_cond_for_align_checks.
1860 Create a conditional expression that represents the alignment checks for
1861 all of data references (array element references) whose alignment must be
1862 checked at runtime.
1864 Input:
1865 COND_EXPR - input conditional expression. New conditions will be chained
1866 with logical AND operation.
1867 LOOP_VINFO - two fields of the loop information are used.
1868 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
1869 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
1871 Output:
1872 COND_EXPR_STMT_LIST - statements needed to construct the conditional
1873 expression.
1874 The returned value is the conditional expression to be used in the if
1875 statement that controls which version of the loop gets executed at runtime.
1877 The algorithm makes two assumptions:
1878 1) The number of bytes "n" in a vector is a power of 2.
1879 2) An address "a" is aligned if a%n is zero and that this
1880 test can be done as a&(n-1) == 0. For example, for 16
1881 byte vectors the test is a&0xf == 0. */
1883 static void
1884 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
1885 tree *cond_expr,
1886 gimple_seq *cond_expr_stmt_list)
1888 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1889 vec<gimple *> may_misalign_stmts
1890 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1891 gimple *ref_stmt;
1892 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
1893 tree mask_cst;
1894 unsigned int i;
1895 tree int_ptrsize_type;
1896 char tmp_name[20];
1897 tree or_tmp_name = NULL_TREE;
1898 tree and_tmp_name;
1899 gimple *and_stmt;
1900 tree ptrsize_zero;
1901 tree part_cond_expr;
1903 /* Check that mask is one less than a power of 2, i.e., mask is
1904 all zeros followed by all ones. */
1905 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
1907 int_ptrsize_type = signed_type_for (ptr_type_node);
1909 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
1910 of the first vector of the i'th data reference. */
1912 FOR_EACH_VEC_ELT (may_misalign_stmts, i, ref_stmt)
1914 gimple_seq new_stmt_list = NULL;
1915 tree addr_base;
1916 tree addr_tmp_name;
1917 tree new_or_tmp_name;
1918 gimple *addr_stmt, *or_stmt;
1919 stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
1920 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1921 bool negative = tree_int_cst_compare
1922 (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
1923 tree offset = negative
1924 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
1926 /* create: addr_tmp = (int)(address_of_first_vector) */
1927 addr_base =
1928 vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
1929 offset, loop);
1930 if (new_stmt_list != NULL)
1931 gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
1933 sprintf (tmp_name, "addr2int%d", i);
1934 addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
1935 addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base);
1936 gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
1938 /* The addresses are OR together. */
1940 if (or_tmp_name != NULL_TREE)
1942 /* create: or_tmp = or_tmp | addr_tmp */
1943 sprintf (tmp_name, "orptrs%d", i);
1944 new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
1945 or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
1946 or_tmp_name, addr_tmp_name);
1947 gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
1948 or_tmp_name = new_or_tmp_name;
1950 else
1951 or_tmp_name = addr_tmp_name;
1953 } /* end for i */
1955 mask_cst = build_int_cst (int_ptrsize_type, mask);
1957 /* create: and_tmp = or_tmp & mask */
1958 and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
1960 and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR,
1961 or_tmp_name, mask_cst);
1962 gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
1964 /* Make and_tmp the left operand of the conditional test against zero.
1965 if and_tmp has a nonzero bit then some address is unaligned. */
1966 ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
1967 part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
1968 and_tmp_name, ptrsize_zero);
1969 if (*cond_expr)
1970 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1971 *cond_expr, part_cond_expr);
1972 else
1973 *cond_expr = part_cond_expr;
1976 /* Given two data references and segment lengths described by DR_A and DR_B,
1977 create expression checking if the two addresses ranges intersect with
1978 each other based on index of the two addresses. This can only be done
1979 if DR_A and DR_B referring to the same (array) object and the index is
1980 the only difference. For example:
1982 DR_A DR_B
1983 data-ref arr[i] arr[j]
1984 base_object arr arr
1985 index {i_0, +, 1}_loop {j_0, +, 1}_loop
1987 The addresses and their index are like:
1989 |<- ADDR_A ->| |<- ADDR_B ->|
1990 ------------------------------------------------------->
1991 | | | | | | | | | |
1992 ------------------------------------------------------->
1993 i_0 ... i_0+4 j_0 ... j_0+4
1995 We can create expression based on index rather than address:
1997 (i_0 + 4 < j_0 || j_0 + 4 < i_0)
1999 Note evolution step of index needs to be considered in comparison. */
2001 static bool
2002 create_intersect_range_checks_index (loop_vec_info loop_vinfo, tree *cond_expr,
2003 const dr_with_seg_len& dr_a,
2004 const dr_with_seg_len& dr_b)
2006 if (integer_zerop (DR_STEP (dr_a.dr))
2007 || integer_zerop (DR_STEP (dr_b.dr))
2008 || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
2009 return false;
2011 if (!tree_fits_uhwi_p (dr_a.seg_len) || !tree_fits_uhwi_p (dr_b.seg_len))
2012 return false;
2014 if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
2015 return false;
2017 if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
2018 return false;
2020 if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
2021 return false;
2023 gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
2025 bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
2026 unsigned HOST_WIDE_INT abs_step
2027 = absu_hwi (tree_to_shwi (DR_STEP (dr_a.dr)));
2029 unsigned HOST_WIDE_INT seg_len1 = tree_to_uhwi (dr_a.seg_len);
2030 unsigned HOST_WIDE_INT seg_len2 = tree_to_uhwi (dr_b.seg_len);
2031 /* Infer the number of iterations with which the memory segment is accessed
2032 by DR. In other words, alias is checked if memory segment accessed by
2033 DR_A in some iterations intersect with memory segment accessed by DR_B
2034 in the same amount iterations.
2035 Note segnment length is a linear function of number of iterations with
2036 DR_STEP as the coefficient. */
2037 unsigned HOST_WIDE_INT niter_len1 = (seg_len1 + abs_step - 1) / abs_step;
2038 unsigned HOST_WIDE_INT niter_len2 = (seg_len2 + abs_step - 1) / abs_step;
2040 unsigned int i;
2041 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2042 for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
2044 tree access1 = DR_ACCESS_FN (dr_a.dr, i);
2045 tree access2 = DR_ACCESS_FN (dr_b.dr, i);
2046 /* Two indices must be the same if they are not scev, or not scev wrto
2047 current loop being vecorized. */
2048 if (TREE_CODE (access1) != POLYNOMIAL_CHREC
2049 || TREE_CODE (access2) != POLYNOMIAL_CHREC
2050 || CHREC_VARIABLE (access1) != (unsigned)loop->num
2051 || CHREC_VARIABLE (access2) != (unsigned)loop->num)
2053 if (operand_equal_p (access1, access2, 0))
2054 continue;
2056 return false;
2058 /* The two indices must have the same step. */
2059 if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
2060 return false;
2062 tree idx_step = CHREC_RIGHT (access1);
2063 /* Index must have const step, otherwise DR_STEP won't be constant. */
2064 gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
2065 /* Index must evaluate in the same direction as DR. */
2066 gcc_assert (!neg_step
2067 || tree_int_cst_compare (idx_step, size_zero_node) < 0);
2069 tree min1 = CHREC_LEFT (access1);
2070 tree min2 = CHREC_LEFT (access2);
2071 if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
2072 return false;
2074 /* Ideally, alias can be checked against loop's control IV, but we
2075 need to prove linear mapping between control IV and reference
2076 index. Although that should be true, we check against (array)
2077 index of data reference. Like segment length, index length is
2078 linear function of the number of iterations with index_step as
2079 the coefficient, i.e, niter_len * idx_step. */
2080 tree idx_len1 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
2081 build_int_cst (TREE_TYPE (min1),
2082 niter_len1));
2083 tree idx_len2 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
2084 build_int_cst (TREE_TYPE (min2),
2085 niter_len2));
2086 tree max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1), min1, idx_len1);
2087 tree max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2), min2, idx_len2);
2088 /* Adjust ranges for negative step. */
2089 if (neg_step)
2091 min1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1), max1, idx_step);
2092 max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1),
2093 CHREC_LEFT (access1), idx_step);
2094 min2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2), max2, idx_step);
2095 max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2),
2096 CHREC_LEFT (access2), idx_step);
2098 tree part_cond_expr
2099 = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2100 fold_build2 (LE_EXPR, boolean_type_node, max1, min2),
2101 fold_build2 (LE_EXPR, boolean_type_node, max2, min1));
2102 if (*cond_expr)
2103 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2104 *cond_expr, part_cond_expr);
2105 else
2106 *cond_expr = part_cond_expr;
2108 return true;
2111 /* Given two data references and segment lengths described by DR_A and DR_B,
2112 create expression checking if the two addresses ranges intersect with
2113 each other:
2115 ((DR_A_addr_0 + DR_A_segment_length_0) <= DR_B_addr_0)
2116 || (DR_B_addr_0 + DER_B_segment_length_0) <= DR_A_addr_0)) */
2118 static void
2119 create_intersect_range_checks (loop_vec_info loop_vinfo, tree *cond_expr,
2120 const dr_with_seg_len& dr_a,
2121 const dr_with_seg_len& dr_b)
2123 *cond_expr = NULL_TREE;
2124 if (create_intersect_range_checks_index (loop_vinfo, cond_expr, dr_a, dr_b))
2125 return;
2127 tree segment_length_a = dr_a.seg_len;
2128 tree segment_length_b = dr_b.seg_len;
2129 tree addr_base_a = DR_BASE_ADDRESS (dr_a.dr);
2130 tree addr_base_b = DR_BASE_ADDRESS (dr_b.dr);
2131 tree offset_a = DR_OFFSET (dr_a.dr), offset_b = DR_OFFSET (dr_b.dr);
2133 offset_a = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_a),
2134 offset_a, DR_INIT (dr_a.dr));
2135 offset_b = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_b),
2136 offset_b, DR_INIT (dr_b.dr));
2137 addr_base_a = fold_build_pointer_plus (addr_base_a, offset_a);
2138 addr_base_b = fold_build_pointer_plus (addr_base_b, offset_b);
2140 tree seg_a_min = addr_base_a;
2141 tree seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
2142 /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
2143 bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
2144 [a, a+12) */
2145 if (tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0)
2147 tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a.dr)));
2148 seg_a_min = fold_build_pointer_plus (seg_a_max, unit_size);
2149 seg_a_max = fold_build_pointer_plus (addr_base_a, unit_size);
2152 tree seg_b_min = addr_base_b;
2153 tree seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
2154 if (tree_int_cst_compare (DR_STEP (dr_b.dr), size_zero_node) < 0)
2156 tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b.dr)));
2157 seg_b_min = fold_build_pointer_plus (seg_b_max, unit_size);
2158 seg_b_max = fold_build_pointer_plus (addr_base_b, unit_size);
2160 *cond_expr
2161 = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2162 fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
2163 fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
2166 /* Function vect_create_cond_for_alias_checks.
2168 Create a conditional expression that represents the run-time checks for
2169 overlapping of address ranges represented by a list of data references
2170 relations passed as input.
2172 Input:
2173 COND_EXPR - input conditional expression. New conditions will be chained
2174 with logical AND operation. If it is NULL, then the function
2175 is used to return the number of alias checks.
2176 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2177 to be checked.
2179 Output:
2180 COND_EXPR - conditional expression.
2182 The returned COND_EXPR is the conditional expression to be used in the if
2183 statement that controls which version of the loop gets executed at runtime.
2186 void
2187 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
2189 vec<dr_with_seg_len_pair_t> comp_alias_ddrs =
2190 LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
2191 tree part_cond_expr;
2193 if (comp_alias_ddrs.is_empty ())
2194 return;
2196 for (size_t i = 0, s = comp_alias_ddrs.length (); i < s; ++i)
2198 const dr_with_seg_len& dr_a = comp_alias_ddrs[i].first;
2199 const dr_with_seg_len& dr_b = comp_alias_ddrs[i].second;
2201 if (dump_enabled_p ())
2203 dump_printf_loc (MSG_NOTE, vect_location,
2204 "create runtime check for data references ");
2205 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a.dr));
2206 dump_printf (MSG_NOTE, " and ");
2207 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b.dr));
2208 dump_printf (MSG_NOTE, "\n");
2211 /* Create condition expression for each pair data references. */
2212 create_intersect_range_checks (loop_vinfo, &part_cond_expr, dr_a, dr_b);
2213 if (*cond_expr)
2214 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2215 *cond_expr, part_cond_expr);
2216 else
2217 *cond_expr = part_cond_expr;
2220 if (dump_enabled_p ())
2221 dump_printf_loc (MSG_NOTE, vect_location,
2222 "created %u versioning for alias checks.\n",
2223 comp_alias_ddrs.length ());
2227 /* Function vect_loop_versioning.
2229 If the loop has data references that may or may not be aligned or/and
2230 has data reference relations whose independence was not proven then
2231 two versions of the loop need to be generated, one which is vectorized
2232 and one which isn't. A test is then generated to control which of the
2233 loops is executed. The test checks for the alignment of all of the
2234 data references that may or may not be aligned. An additional
2235 sequence of runtime tests is generated for each pairs of DDRs whose
2236 independence was not proven. The vectorized version of loop is
2237 executed only if both alias and alignment tests are passed.
2239 The test generated to check which version of loop is executed
2240 is modified to also check for profitability as indicated by the
2241 cost model threshold TH.
2243 The versioning precondition(s) are placed in *COND_EXPR and
2244 *COND_EXPR_STMT_LIST. */
2246 void
2247 vect_loop_versioning (loop_vec_info loop_vinfo,
2248 unsigned int th, bool check_profitability)
2250 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *nloop;
2251 struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
2252 basic_block condition_bb;
2253 gphi_iterator gsi;
2254 gimple_stmt_iterator cond_exp_gsi;
2255 basic_block merge_bb;
2256 basic_block new_exit_bb;
2257 edge new_exit_e, e;
2258 gphi *orig_phi, *new_phi;
2259 tree cond_expr = NULL_TREE;
2260 gimple_seq cond_expr_stmt_list = NULL;
2261 tree arg;
2262 unsigned prob = 4 * REG_BR_PROB_BASE / 5;
2263 gimple_seq gimplify_stmt_list = NULL;
2264 tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2265 bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
2266 bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
2267 bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo);
2269 if (check_profitability)
2270 cond_expr = fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
2271 build_int_cst (TREE_TYPE (scalar_loop_iters),
2272 th));
2274 if (version_niter)
2275 vect_create_cond_for_niters_checks (loop_vinfo, &cond_expr);
2277 if (cond_expr)
2278 cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
2279 is_gimple_condexpr, NULL_TREE);
2281 if (version_align)
2282 vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
2283 &cond_expr_stmt_list);
2285 if (version_alias)
2286 vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
2288 cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,
2289 is_gimple_condexpr, NULL_TREE);
2290 gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
2292 initialize_original_copy_tables ();
2293 if (scalar_loop)
2295 edge scalar_e;
2296 basic_block preheader, scalar_preheader;
2298 /* We don't want to scale SCALAR_LOOP's frequencies, we need to
2299 scale LOOP's frequencies instead. */
2300 nloop = loop_version (scalar_loop, cond_expr, &condition_bb, prob,
2301 REG_BR_PROB_BASE, REG_BR_PROB_BASE - prob, true);
2302 scale_loop_frequencies (loop, prob, REG_BR_PROB_BASE);
2303 /* CONDITION_BB was created above SCALAR_LOOP's preheader,
2304 while we need to move it above LOOP's preheader. */
2305 e = loop_preheader_edge (loop);
2306 scalar_e = loop_preheader_edge (scalar_loop);
2307 gcc_assert (empty_block_p (e->src)
2308 && single_pred_p (e->src));
2309 gcc_assert (empty_block_p (scalar_e->src)
2310 && single_pred_p (scalar_e->src));
2311 gcc_assert (single_pred_p (condition_bb));
2312 preheader = e->src;
2313 scalar_preheader = scalar_e->src;
2314 scalar_e = find_edge (condition_bb, scalar_preheader);
2315 e = single_pred_edge (preheader);
2316 redirect_edge_and_branch_force (single_pred_edge (condition_bb),
2317 scalar_preheader);
2318 redirect_edge_and_branch_force (scalar_e, preheader);
2319 redirect_edge_and_branch_force (e, condition_bb);
2320 set_immediate_dominator (CDI_DOMINATORS, condition_bb,
2321 single_pred (condition_bb));
2322 set_immediate_dominator (CDI_DOMINATORS, scalar_preheader,
2323 single_pred (scalar_preheader));
2324 set_immediate_dominator (CDI_DOMINATORS, preheader,
2325 condition_bb);
2327 else
2328 nloop = loop_version (loop, cond_expr, &condition_bb,
2329 prob, prob, REG_BR_PROB_BASE - prob, true);
2331 if (version_niter)
2333 /* The versioned loop could be infinite, we need to clear existing
2334 niter information which is copied from the original loop. */
2335 gcc_assert (loop_constraint_set_p (loop, LOOP_C_FINITE));
2336 vect_free_loop_info_assumptions (nloop);
2337 /* And set constraint LOOP_C_INFINITE for niter analyzer. */
2338 loop_constraint_set (loop, LOOP_C_INFINITE);
2341 if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
2342 && dump_enabled_p ())
2344 if (version_alias)
2345 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2346 "loop versioned for vectorization because of "
2347 "possible aliasing\n");
2348 if (version_align)
2349 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2350 "loop versioned for vectorization to enhance "
2351 "alignment\n");
2354 free_original_copy_tables ();
2356 /* Loop versioning violates an assumption we try to maintain during
2357 vectorization - that the loop exit block has a single predecessor.
2358 After versioning, the exit block of both loop versions is the same
2359 basic block (i.e. it has two predecessors). Just in order to simplify
2360 following transformations in the vectorizer, we fix this situation
2361 here by adding a new (empty) block on the exit-edge of the loop,
2362 with the proper loop-exit phis to maintain loop-closed-form.
2363 If loop versioning wasn't done from loop, but scalar_loop instead,
2364 merge_bb will have already just a single successor. */
2366 merge_bb = single_exit (loop)->dest;
2367 if (scalar_loop == NULL || EDGE_COUNT (merge_bb->preds) >= 2)
2369 gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
2370 new_exit_bb = split_edge (single_exit (loop));
2371 new_exit_e = single_exit (loop);
2372 e = EDGE_SUCC (new_exit_bb, 0);
2374 for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2376 tree new_res;
2377 orig_phi = gsi.phi ();
2378 new_res = copy_ssa_name (PHI_RESULT (orig_phi));
2379 new_phi = create_phi_node (new_res, new_exit_bb);
2380 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2381 add_phi_arg (new_phi, arg, new_exit_e,
2382 gimple_phi_arg_location_from_edge (orig_phi, e));
2383 adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
2387 /* End loop-exit-fixes after versioning. */
2389 if (cond_expr_stmt_list)
2391 cond_exp_gsi = gsi_last_bb (condition_bb);
2392 gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
2393 GSI_SAME_STMT);
2395 update_ssa (TODO_update_ssa);