Define arm_arch_core_flags in a single file
[official-gcc.git] / gcc / tree-vect-loop-manip.c
blobbeb2f066583485242b7a947307dcef4f57adff54
1 /* Vectorizer Specific Loop Manipulations
2 Copyright (C) 2003-2016 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "cfghooks.h"
29 #include "tree-pass.h"
30 #include "ssa.h"
31 #include "fold-const.h"
32 #include "cfganal.h"
33 #include "gimplify.h"
34 #include "gimple-iterator.h"
35 #include "gimplify-me.h"
36 #include "tree-cfg.h"
37 #include "tree-ssa-loop-manip.h"
38 #include "tree-into-ssa.h"
39 #include "tree-ssa.h"
40 #include "cfgloop.h"
41 #include "tree-scalar-evolution.h"
42 #include "tree-vectorizer.h"
43 #include "tree-ssa-loop-ivopts.h"
45 /*************************************************************************
46 Simple Loop Peeling Utilities
48 Utilities to support loop peeling for vectorization purposes.
49 *************************************************************************/
52 /* Renames the use *OP_P. */
54 static void
55 rename_use_op (use_operand_p op_p)
57 tree new_name;
59 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
60 return;
62 new_name = get_current_def (USE_FROM_PTR (op_p));
64 /* Something defined outside of the loop. */
65 if (!new_name)
66 return;
68 /* An ordinary ssa name defined in the loop. */
70 SET_USE (op_p, new_name);
74 /* Renames the variables in basic block BB. Allow renaming of PHI argumnets
75 on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
76 true. */
78 static void
79 rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop)
81 gimple *stmt;
82 use_operand_p use_p;
83 ssa_op_iter iter;
84 edge e;
85 edge_iterator ei;
86 struct loop *loop = bb->loop_father;
87 struct loop *outer_loop = NULL;
89 if (rename_from_outer_loop)
91 gcc_assert (loop);
92 outer_loop = loop_outer (loop);
95 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
96 gsi_next (&gsi))
98 stmt = gsi_stmt (gsi);
99 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
100 rename_use_op (use_p);
103 FOR_EACH_EDGE (e, ei, bb->preds)
105 if (!flow_bb_inside_loop_p (loop, e->src)
106 && (!rename_from_outer_loop || e->src != outer_loop->header))
107 continue;
108 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
109 gsi_next (&gsi))
110 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
115 struct adjust_info
117 tree from, to;
118 basic_block bb;
121 /* A stack of values to be adjusted in debug stmts. We have to
122 process them LIFO, so that the closest substitution applies. If we
123 processed them FIFO, without the stack, we might substitute uses
124 with a PHI DEF that would soon become non-dominant, and when we got
125 to the suitable one, it wouldn't have anything to substitute any
126 more. */
127 static vec<adjust_info, va_heap> adjust_vec;
129 /* Adjust any debug stmts that referenced AI->from values to use the
130 loop-closed AI->to, if the references are dominated by AI->bb and
131 not by the definition of AI->from. */
133 static void
134 adjust_debug_stmts_now (adjust_info *ai)
136 basic_block bbphi = ai->bb;
137 tree orig_def = ai->from;
138 tree new_def = ai->to;
139 imm_use_iterator imm_iter;
140 gimple *stmt;
141 basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
143 gcc_assert (dom_info_available_p (CDI_DOMINATORS));
145 /* Adjust any debug stmts that held onto non-loop-closed
146 references. */
147 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
149 use_operand_p use_p;
150 basic_block bbuse;
152 if (!is_gimple_debug (stmt))
153 continue;
155 gcc_assert (gimple_debug_bind_p (stmt));
157 bbuse = gimple_bb (stmt);
159 if ((bbuse == bbphi
160 || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
161 && !(bbuse == bbdef
162 || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
164 if (new_def)
165 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
166 SET_USE (use_p, new_def);
167 else
169 gimple_debug_bind_reset_value (stmt);
170 update_stmt (stmt);
176 /* Adjust debug stmts as scheduled before. */
178 static void
179 adjust_vec_debug_stmts (void)
181 if (!MAY_HAVE_DEBUG_STMTS)
182 return;
184 gcc_assert (adjust_vec.exists ());
186 while (!adjust_vec.is_empty ())
188 adjust_debug_stmts_now (&adjust_vec.last ());
189 adjust_vec.pop ();
193 /* Adjust any debug stmts that referenced FROM values to use the
194 loop-closed TO, if the references are dominated by BB and not by
195 the definition of FROM. If adjust_vec is non-NULL, adjustments
196 will be postponed until adjust_vec_debug_stmts is called. */
198 static void
199 adjust_debug_stmts (tree from, tree to, basic_block bb)
201 adjust_info ai;
203 if (MAY_HAVE_DEBUG_STMTS
204 && TREE_CODE (from) == SSA_NAME
205 && ! SSA_NAME_IS_DEFAULT_DEF (from)
206 && ! virtual_operand_p (from))
208 ai.from = from;
209 ai.to = to;
210 ai.bb = bb;
212 if (adjust_vec.exists ())
213 adjust_vec.safe_push (ai);
214 else
215 adjust_debug_stmts_now (&ai);
219 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
220 to adjust any debug stmts that referenced the old phi arg,
221 presumably non-loop-closed references left over from other
222 transformations. */
224 static void
225 adjust_phi_and_debug_stmts (gimple *update_phi, edge e, tree new_def)
227 tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
229 SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
231 if (MAY_HAVE_DEBUG_STMTS)
232 adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
233 gimple_bb (update_phi));
236 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
237 that starts at zero, increases by one and its limit is NITERS.
239 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
241 void
242 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
244 tree indx_before_incr, indx_after_incr;
245 gcond *cond_stmt;
246 gcond *orig_cond;
247 edge exit_edge = single_exit (loop);
248 gimple_stmt_iterator loop_cond_gsi;
249 gimple_stmt_iterator incr_gsi;
250 bool insert_after;
251 tree init = build_int_cst (TREE_TYPE (niters), 0);
252 tree step = build_int_cst (TREE_TYPE (niters), 1);
253 source_location loop_loc;
254 enum tree_code code;
256 orig_cond = get_loop_exit_condition (loop);
257 gcc_assert (orig_cond);
258 loop_cond_gsi = gsi_for_stmt (orig_cond);
260 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
261 create_iv (init, step, NULL_TREE, loop,
262 &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
264 indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
265 true, NULL_TREE, true,
266 GSI_SAME_STMT);
267 niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
268 true, GSI_SAME_STMT);
270 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
271 cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
272 NULL_TREE);
274 gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
276 /* Remove old loop exit test: */
277 gsi_remove (&loop_cond_gsi, true);
278 free_stmt_vec_info (orig_cond);
280 loop_loc = find_loop_location (loop);
281 if (dump_enabled_p ())
283 if (LOCATION_LOCUS (loop_loc) != UNKNOWN_LOCATION)
284 dump_printf (MSG_NOTE, "\nloop at %s:%d: ", LOCATION_FILE (loop_loc),
285 LOCATION_LINE (loop_loc));
286 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, cond_stmt, 0);
289 /* Record the number of latch iterations. */
290 loop->nb_iterations = fold_build2 (MINUS_EXPR, TREE_TYPE (niters), niters,
291 build_int_cst (TREE_TYPE (niters), 1));
294 /* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg.
295 For all PHI arguments in FROM->dest and TO->dest from those
296 edges ensure that TO->dest PHI arguments have current_def
297 to that in from. */
299 static void
300 slpeel_duplicate_current_defs_from_edges (edge from, edge to)
302 gimple_stmt_iterator gsi_from, gsi_to;
304 for (gsi_from = gsi_start_phis (from->dest),
305 gsi_to = gsi_start_phis (to->dest);
306 !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);)
308 gimple *from_phi = gsi_stmt (gsi_from);
309 gimple *to_phi = gsi_stmt (gsi_to);
310 tree from_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, from);
311 tree to_arg = PHI_ARG_DEF_FROM_EDGE (to_phi, to);
312 if (virtual_operand_p (from_arg))
314 gsi_next (&gsi_from);
315 continue;
317 if (virtual_operand_p (to_arg))
319 gsi_next (&gsi_to);
320 continue;
322 if (TREE_CODE (from_arg) != SSA_NAME)
323 gcc_assert (operand_equal_p (from_arg, to_arg, 0));
324 else
326 if (get_current_def (to_arg) == NULL_TREE)
327 set_current_def (to_arg, get_current_def (from_arg));
329 gsi_next (&gsi_from);
330 gsi_next (&gsi_to);
333 gphi *from_phi = get_virtual_phi (from->dest);
334 gphi *to_phi = get_virtual_phi (to->dest);
335 if (from_phi)
336 set_current_def (PHI_ARG_DEF_FROM_EDGE (to_phi, to),
337 get_current_def (PHI_ARG_DEF_FROM_EDGE (from_phi, from)));
341 /* Given LOOP this function generates a new copy of it and puts it
342 on E which is either the entry or exit of LOOP. If SCALAR_LOOP is
343 non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
344 basic blocks from SCALAR_LOOP instead of LOOP, but to either the
345 entry or exit of LOOP. */
347 struct loop *
348 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop,
349 struct loop *scalar_loop, edge e)
351 struct loop *new_loop;
352 basic_block *new_bbs, *bbs, *pbbs;
353 bool at_exit;
354 bool was_imm_dom;
355 basic_block exit_dest;
356 edge exit, new_exit;
357 bool duplicate_outer_loop = false;
359 exit = single_exit (loop);
360 at_exit = (e == exit);
361 if (!at_exit && e != loop_preheader_edge (loop))
362 return NULL;
364 if (scalar_loop == NULL)
365 scalar_loop = loop;
367 bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
368 pbbs = bbs + 1;
369 get_loop_body_with_size (scalar_loop, pbbs, scalar_loop->num_nodes);
370 /* Allow duplication of outer loops. */
371 if (scalar_loop->inner)
372 duplicate_outer_loop = true;
373 /* Check whether duplication is possible. */
374 if (!can_copy_bbs_p (pbbs, scalar_loop->num_nodes))
376 free (bbs);
377 return NULL;
380 /* Generate new loop structure. */
381 new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
382 duplicate_subloops (scalar_loop, new_loop);
384 exit_dest = exit->dest;
385 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
386 exit_dest) == loop->header ?
387 true : false);
389 /* Also copy the pre-header, this avoids jumping through hoops to
390 duplicate the loop entry PHI arguments. Create an empty
391 pre-header unconditionally for this. */
392 basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
393 edge entry_e = single_pred_edge (preheader);
394 bbs[0] = preheader;
395 new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
397 exit = single_exit (scalar_loop);
398 copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
399 &exit, 1, &new_exit, NULL,
400 at_exit ? loop->latch : e->src, true);
401 exit = single_exit (loop);
402 basic_block new_preheader = new_bbs[0];
404 add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
406 if (scalar_loop != loop)
408 /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
409 SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop,
410 but LOOP will not. slpeel_update_phi_nodes_for_guard{1,2} expects
411 the LOOP SSA_NAMEs (on the exit edge and edge from latch to
412 header) to have current_def set, so copy them over. */
413 slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop),
414 exit);
415 slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop->latch,
417 EDGE_SUCC (loop->latch, 0));
420 if (at_exit) /* Add the loop copy at exit. */
422 if (scalar_loop != loop)
424 gphi_iterator gsi;
425 new_exit = redirect_edge_and_branch (new_exit, exit_dest);
427 for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi);
428 gsi_next (&gsi))
430 gphi *phi = gsi.phi ();
431 tree orig_arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
432 location_t orig_locus
433 = gimple_phi_arg_location_from_edge (phi, e);
435 add_phi_arg (phi, orig_arg, new_exit, orig_locus);
438 redirect_edge_and_branch_force (e, new_preheader);
439 flush_pending_stmts (e);
440 set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
441 if (was_imm_dom || duplicate_outer_loop)
442 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
444 /* And remove the non-necessary forwarder again. Keep the other
445 one so we have a proper pre-header for the loop at the exit edge. */
446 redirect_edge_pred (single_succ_edge (preheader),
447 single_pred (preheader));
448 delete_basic_block (preheader);
449 set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
450 loop_preheader_edge (scalar_loop)->src);
452 else /* Add the copy at entry. */
454 if (scalar_loop != loop)
456 /* Remove the non-necessary forwarder of scalar_loop again. */
457 redirect_edge_pred (single_succ_edge (preheader),
458 single_pred (preheader));
459 delete_basic_block (preheader);
460 set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
461 loop_preheader_edge (scalar_loop)->src);
462 preheader = split_edge (loop_preheader_edge (loop));
463 entry_e = single_pred_edge (preheader);
466 redirect_edge_and_branch_force (entry_e, new_preheader);
467 flush_pending_stmts (entry_e);
468 set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
470 redirect_edge_and_branch_force (new_exit, preheader);
471 flush_pending_stmts (new_exit);
472 set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
474 /* And remove the non-necessary forwarder again. Keep the other
475 one so we have a proper pre-header for the loop at the exit edge. */
476 redirect_edge_pred (single_succ_edge (new_preheader),
477 single_pred (new_preheader));
478 delete_basic_block (new_preheader);
479 set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
480 loop_preheader_edge (new_loop)->src);
483 for (unsigned i = 0; i < scalar_loop->num_nodes + 1; i++)
484 rename_variables_in_bb (new_bbs[i], duplicate_outer_loop);
486 if (scalar_loop != loop)
488 /* Update new_loop->header PHIs, so that on the preheader
489 edge they are the ones from loop rather than scalar_loop. */
490 gphi_iterator gsi_orig, gsi_new;
491 edge orig_e = loop_preheader_edge (loop);
492 edge new_e = loop_preheader_edge (new_loop);
494 for (gsi_orig = gsi_start_phis (loop->header),
495 gsi_new = gsi_start_phis (new_loop->header);
496 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_new);
497 gsi_next (&gsi_orig), gsi_next (&gsi_new))
499 gphi *orig_phi = gsi_orig.phi ();
500 gphi *new_phi = gsi_new.phi ();
501 tree orig_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
502 location_t orig_locus
503 = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
505 add_phi_arg (new_phi, orig_arg, new_e, orig_locus);
509 free (new_bbs);
510 free (bbs);
512 checking_verify_dominators (CDI_DOMINATORS);
514 return new_loop;
518 /* Given the condition expression COND, put it as the last statement of
519 GUARD_BB; set both edges' probability; set dominator of GUARD_TO to
520 DOM_BB; return the skip edge. GUARD_TO is the target basic block to
521 skip the loop. PROBABILITY is the skip edge's probability. */
523 static edge
524 slpeel_add_loop_guard (basic_block guard_bb, tree cond,
525 basic_block guard_to, basic_block dom_bb,
526 int probability)
528 gimple_stmt_iterator gsi;
529 edge new_e, enter_e;
530 gcond *cond_stmt;
531 gimple_seq gimplify_stmt_list = NULL;
533 enter_e = EDGE_SUCC (guard_bb, 0);
534 enter_e->flags &= ~EDGE_FALLTHRU;
535 enter_e->flags |= EDGE_FALSE_VALUE;
536 gsi = gsi_last_bb (guard_bb);
538 cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
539 NULL_TREE);
540 if (gimplify_stmt_list)
541 gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
543 cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
544 gsi = gsi_last_bb (guard_bb);
545 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
547 /* Add new edge to connect guard block to the merge/loop-exit block. */
548 new_e = make_edge (guard_bb, guard_to, EDGE_TRUE_VALUE);
550 new_e->count = guard_bb->count;
551 new_e->probability = probability;
552 new_e->count = apply_probability (enter_e->count, probability);
553 enter_e->count -= new_e->count;
554 enter_e->probability = inverse_probability (probability);
555 set_immediate_dominator (CDI_DOMINATORS, guard_to, dom_bb);
556 return new_e;
560 /* This function verifies that the following restrictions apply to LOOP:
561 (1) it consists of exactly 2 basic blocks - header, and an empty latch
562 for innermost loop and 5 basic blocks for outer-loop.
563 (2) it is single entry, single exit
564 (3) its exit condition is the last stmt in the header
565 (4) E is the entry/exit edge of LOOP.
568 bool
569 slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
571 edge exit_e = single_exit (loop);
572 edge entry_e = loop_preheader_edge (loop);
573 gcond *orig_cond = get_loop_exit_condition (loop);
574 gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
575 unsigned int num_bb = loop->inner? 5 : 2;
577 /* All loops have an outer scope; the only case loop->outer is NULL is for
578 the function itself. */
579 if (!loop_outer (loop)
580 || loop->num_nodes != num_bb
581 || !empty_block_p (loop->latch)
582 || !single_exit (loop)
583 /* Verify that new loop exit condition can be trivially modified. */
584 || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
585 || (e != exit_e && e != entry_e))
586 return false;
588 return true;
591 /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
592 in the exit bb and rename all the uses after the loop. This simplifies
593 the *guard[12] routines, which assume loop closed SSA form for all PHIs
594 (but normally loop closed SSA form doesn't require virtual PHIs to be
595 in the same form). Doing this early simplifies the checking what
596 uses should be renamed. */
598 static void
599 create_lcssa_for_virtual_phi (struct loop *loop)
601 gphi_iterator gsi;
602 edge exit_e = single_exit (loop);
604 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
605 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
607 gphi *phi = gsi.phi ();
608 for (gsi = gsi_start_phis (exit_e->dest);
609 !gsi_end_p (gsi); gsi_next (&gsi))
610 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
611 break;
612 if (gsi_end_p (gsi))
614 tree new_vop = copy_ssa_name (PHI_RESULT (phi));
615 gphi *new_phi = create_phi_node (new_vop, exit_e->dest);
616 tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
617 imm_use_iterator imm_iter;
618 gimple *stmt;
619 use_operand_p use_p;
621 add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
622 gimple_phi_set_result (new_phi, new_vop);
623 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
624 if (stmt != new_phi
625 && !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
626 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
627 SET_USE (use_p, new_vop);
629 break;
634 /* Function vect_get_loop_location.
636 Extract the location of the loop in the source code.
637 If the loop is not well formed for vectorization, an estimated
638 location is calculated.
639 Return the loop location if succeed and NULL if not. */
641 source_location
642 find_loop_location (struct loop *loop)
644 gimple *stmt = NULL;
645 basic_block bb;
646 gimple_stmt_iterator si;
648 if (!loop)
649 return UNKNOWN_LOCATION;
651 stmt = get_loop_exit_condition (loop);
653 if (stmt
654 && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
655 return gimple_location (stmt);
657 /* If we got here the loop is probably not "well formed",
658 try to estimate the loop location */
660 if (!loop->header)
661 return UNKNOWN_LOCATION;
663 bb = loop->header;
665 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
667 stmt = gsi_stmt (si);
668 if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
669 return gimple_location (stmt);
672 return UNKNOWN_LOCATION;
675 /* Return true if PHI defines an IV of the loop to be vectorized. */
677 static bool
678 iv_phi_p (gphi *phi)
680 if (virtual_operand_p (PHI_RESULT (phi)))
681 return false;
683 stmt_vec_info stmt_info = vinfo_for_stmt (phi);
684 gcc_assert (stmt_info != NULL);
685 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
686 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
687 return false;
689 return true;
692 /* Function vect_can_advance_ivs_p
694 In case the number of iterations that LOOP iterates is unknown at compile
695 time, an epilog loop will be generated, and the loop induction variables
696 (IVs) will be "advanced" to the value they are supposed to take just before
697 the epilog loop. Here we check that the access function of the loop IVs
698 and the expression that represents the loop bound are simple enough.
699 These restrictions will be relaxed in the future. */
701 bool
702 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
704 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
705 basic_block bb = loop->header;
706 gphi_iterator gsi;
708 /* Analyze phi functions of the loop header. */
710 if (dump_enabled_p ())
711 dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
712 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
714 tree evolution_part;
716 gphi *phi = gsi.phi ();
717 if (dump_enabled_p ())
719 dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
720 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
723 /* Skip virtual phi's. The data dependences that are associated with
724 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.
726 Skip reduction phis. */
727 if (!iv_phi_p (phi))
729 if (dump_enabled_p ())
730 dump_printf_loc (MSG_NOTE, vect_location,
731 "reduc or virtual phi. skip.\n");
732 continue;
735 /* Analyze the evolution function. */
737 evolution_part
738 = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
739 if (evolution_part == NULL_TREE)
741 if (dump_enabled_p ())
742 dump_printf (MSG_MISSED_OPTIMIZATION,
743 "No access function or evolution.\n");
744 return false;
747 /* FORNOW: We do not transform initial conditions of IVs
748 which evolution functions are not invariants in the loop. */
750 if (!expr_invariant_in_loop_p (loop, evolution_part))
752 if (dump_enabled_p ())
753 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
754 "evolution not invariant in loop.\n");
755 return false;
758 /* FORNOW: We do not transform initial conditions of IVs
759 which evolution functions are a polynomial of degree >= 2. */
761 if (tree_is_chrec (evolution_part))
763 if (dump_enabled_p ())
764 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
765 "evolution is chrec.\n");
766 return false;
770 return true;
774 /* Function vect_update_ivs_after_vectorizer.
776 "Advance" the induction variables of LOOP to the value they should take
777 after the execution of LOOP. This is currently necessary because the
778 vectorizer does not handle induction variables that are used after the
779 loop. Such a situation occurs when the last iterations of LOOP are
780 peeled, because:
781 1. We introduced new uses after LOOP for IVs that were not originally used
782 after LOOP: the IVs of LOOP are now used by an epilog loop.
783 2. LOOP is going to be vectorized; this means that it will iterate N/VF
784 times, whereas the loop IVs should be bumped N times.
786 Input:
787 - LOOP - a loop that is going to be vectorized. The last few iterations
788 of LOOP were peeled.
789 - NITERS - the number of iterations that LOOP executes (before it is
790 vectorized). i.e, the number of times the ivs should be bumped.
791 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
792 coming out from LOOP on which there are uses of the LOOP ivs
793 (this is the path from LOOP->exit to epilog_loop->preheader).
795 The new definitions of the ivs are placed in LOOP->exit.
796 The phi args associated with the edge UPDATE_E in the bb
797 UPDATE_E->dest are updated accordingly.
799 Assumption 1: Like the rest of the vectorizer, this function assumes
800 a single loop exit that has a single predecessor.
802 Assumption 2: The phi nodes in the LOOP header and in update_bb are
803 organized in the same order.
805 Assumption 3: The access function of the ivs is simple enough (see
806 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
808 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
809 coming out of LOOP on which the ivs of LOOP are used (this is the path
810 that leads to the epilog loop; other paths skip the epilog loop). This
811 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
812 needs to have its phis updated.
815 static void
816 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo,
817 tree niters, edge update_e)
819 gphi_iterator gsi, gsi1;
820 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
821 basic_block update_bb = update_e->dest;
822 basic_block exit_bb = single_exit (loop)->dest;
824 /* Make sure there exists a single-predecessor exit bb: */
825 gcc_assert (single_pred_p (exit_bb));
826 gcc_assert (single_succ_edge (exit_bb) == update_e);
828 for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
829 !gsi_end_p (gsi) && !gsi_end_p (gsi1);
830 gsi_next (&gsi), gsi_next (&gsi1))
832 tree init_expr;
833 tree step_expr, off;
834 tree type;
835 tree var, ni, ni_name;
836 gimple_stmt_iterator last_gsi;
838 gphi *phi = gsi.phi ();
839 gphi *phi1 = gsi1.phi ();
840 if (dump_enabled_p ())
842 dump_printf_loc (MSG_NOTE, vect_location,
843 "vect_update_ivs_after_vectorizer: phi: ");
844 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
847 /* Skip reduction and virtual phis. */
848 if (!iv_phi_p (phi))
850 if (dump_enabled_p ())
851 dump_printf_loc (MSG_NOTE, vect_location,
852 "reduc or virtual phi. skip.\n");
853 continue;
856 type = TREE_TYPE (gimple_phi_result (phi));
857 step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
858 step_expr = unshare_expr (step_expr);
860 /* FORNOW: We do not support IVs whose evolution function is a polynomial
861 of degree >= 2 or exponential. */
862 gcc_assert (!tree_is_chrec (step_expr));
864 init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
866 off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
867 fold_convert (TREE_TYPE (step_expr), niters),
868 step_expr);
869 if (POINTER_TYPE_P (type))
870 ni = fold_build_pointer_plus (init_expr, off);
871 else
872 ni = fold_build2 (PLUS_EXPR, type,
873 init_expr, fold_convert (type, off));
875 var = create_tmp_var (type, "tmp");
877 last_gsi = gsi_last_bb (exit_bb);
878 gimple_seq new_stmts = NULL;
879 ni_name = force_gimple_operand (ni, &new_stmts, false, var);
880 /* Exit_bb shouldn't be empty. */
881 if (!gsi_end_p (last_gsi))
882 gsi_insert_seq_after (&last_gsi, new_stmts, GSI_SAME_STMT);
883 else
884 gsi_insert_seq_before (&last_gsi, new_stmts, GSI_SAME_STMT);
886 /* Fix phi expressions in the successor bb. */
887 adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
891 /* Function vect_gen_prolog_loop_niters
893 Generate the number of iterations which should be peeled as prolog for the
894 loop represented by LOOP_VINFO. It is calculated as the misalignment of
895 DR - the data reference recorded in LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).
896 As a result, after the execution of this loop, the data reference DR will
897 refer to an aligned location. The following computation is generated:
899 If the misalignment of DR is known at compile time:
900 addr_mis = int mis = DR_MISALIGNMENT (dr);
901 Else, compute address misalignment in bytes:
902 addr_mis = addr & (vectype_align - 1)
904 prolog_niters = ((VF - addr_mis/elem_size)&(VF-1))/step
906 (elem_size = element type size; an element is the scalar element whose type
907 is the inner type of the vectype)
909 The computations will be emitted at the end of BB. We also compute and
910 store upper bound (included) of the result in BOUND.
912 When the step of the data-ref in the loop is not 1 (as in interleaved data
913 and SLP), the number of iterations of the prolog must be divided by the step
914 (which is equal to the size of interleaved group).
916 The above formulas assume that VF == number of elements in the vector. This
917 may not hold when there are multiple-types in the loop.
918 In this case, for some data-references in the loop the VF does not represent
919 the number of elements that fit in the vector. Therefore, instead of VF we
920 use TYPE_VECTOR_SUBPARTS. */
922 static tree
923 vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
924 basic_block bb, int *bound)
926 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
927 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
928 tree var;
929 tree niters_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
930 gimple_seq stmts = NULL, new_stmts = NULL;
931 tree iters, iters_name;
932 gimple *dr_stmt = DR_STMT (dr);
933 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
934 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
935 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
936 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
938 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
940 int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
942 if (dump_enabled_p ())
943 dump_printf_loc (MSG_NOTE, vect_location,
944 "known peeling = %d.\n", npeel);
946 iters = build_int_cst (niters_type, npeel);
947 *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
949 else
951 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
952 tree offset = negative
953 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
954 tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
955 &stmts, offset, loop);
956 tree type = unsigned_type_for (TREE_TYPE (start_addr));
957 tree vectype_align_minus_1 = build_int_cst (type, vectype_align - 1);
958 HOST_WIDE_INT elem_size =
959 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
960 tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
961 tree nelements_minus_1 = build_int_cst (type, nelements - 1);
962 tree nelements_tree = build_int_cst (type, nelements);
963 tree byte_misalign;
964 tree elem_misalign;
966 /* Create: byte_misalign = addr & (vectype_align - 1) */
967 byte_misalign =
968 fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
969 vectype_align_minus_1);
971 /* Create: elem_misalign = byte_misalign / element_size */
972 elem_misalign =
973 fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
975 /* Create: (niters_type) (nelements - elem_misalign)&(nelements - 1) */
976 if (negative)
977 iters = fold_build2 (MINUS_EXPR, type, elem_misalign, nelements_tree);
978 else
979 iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
980 iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
981 iters = fold_convert (niters_type, iters);
982 *bound = nelements - 1;
985 if (dump_enabled_p ())
987 dump_printf_loc (MSG_NOTE, vect_location,
988 "niters for prolog loop: ");
989 dump_generic_expr (MSG_NOTE, TDF_SLIM, iters);
990 dump_printf (MSG_NOTE, "\n");
993 var = create_tmp_var (niters_type, "prolog_loop_niters");
994 iters_name = force_gimple_operand (iters, &new_stmts, false, var);
996 if (new_stmts)
997 gimple_seq_add_seq (&stmts, new_stmts);
998 if (stmts)
1000 gcc_assert (single_succ_p (bb));
1001 gimple_stmt_iterator gsi = gsi_last_bb (bb);
1002 if (gsi_end_p (gsi))
1003 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1004 else
1005 gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
1007 return iters_name;
1011 /* Function vect_update_init_of_dr
1013 NITERS iterations were peeled from LOOP. DR represents a data reference
1014 in LOOP. This function updates the information recorded in DR to
1015 account for the fact that the first NITERS iterations had already been
1016 executed. Specifically, it updates the OFFSET field of DR. */
1018 static void
1019 vect_update_init_of_dr (struct data_reference *dr, tree niters)
1021 tree offset = DR_OFFSET (dr);
1023 niters = fold_build2 (MULT_EXPR, sizetype,
1024 fold_convert (sizetype, niters),
1025 fold_convert (sizetype, DR_STEP (dr)));
1026 offset = fold_build2 (PLUS_EXPR, sizetype,
1027 fold_convert (sizetype, offset), niters);
1028 DR_OFFSET (dr) = offset;
1032 /* Function vect_update_inits_of_drs
1034 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1035 This function updates the information recorded for the data references in
1036 the loop to account for the fact that the first NITERS iterations had
1037 already been executed. Specifically, it updates the initial_condition of
1038 the access_function of all the data_references in the loop. */
1040 static void
1041 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
1043 unsigned int i;
1044 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1045 struct data_reference *dr;
1047 if (dump_enabled_p ())
1048 dump_printf_loc (MSG_NOTE, vect_location,
1049 "=== vect_update_inits_of_dr ===\n");
1051 /* Adjust niters to sizetype and insert stmts on loop preheader edge. */
1052 if (!types_compatible_p (sizetype, TREE_TYPE (niters)))
1054 gimple_seq seq;
1055 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1056 tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
1058 niters = fold_convert (sizetype, niters);
1059 niters = force_gimple_operand (niters, &seq, false, var);
1060 if (seq)
1062 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1063 gcc_assert (!new_bb);
1067 FOR_EACH_VEC_ELT (datarefs, i, dr)
1068 vect_update_init_of_dr (dr, niters);
1072 /* This function builds ni_name = number of iterations. Statements
1073 are emitted on the loop preheader edge. */
1075 tree
1076 vect_build_loop_niters (loop_vec_info loop_vinfo)
1078 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1079 if (TREE_CODE (ni) == INTEGER_CST)
1080 return ni;
1081 else
1083 tree ni_name, var;
1084 gimple_seq stmts = NULL;
1085 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1087 var = create_tmp_var (TREE_TYPE (ni), "niters");
1088 ni_name = force_gimple_operand (ni, &stmts, false, var);
1089 if (stmts)
1090 gsi_insert_seq_on_edge_immediate (pe, stmts);
1092 return ni_name;
1096 /* Calculate the number of iterations above which vectorized loop will be
1097 preferred than scalar loop. NITERS_PROLOG is the number of iterations
1098 of prolog loop. If it's integer const, the integer number is also passed
1099 in INT_NITERS_PROLOG. BOUND_PROLOG is the upper bound (included) of
1100 number of iterations of prolog loop. VFM1 is vector factor minus one.
1101 If CHECK_PROFITABILITY is true, TH is the threshold below which scalar
1102 (rather than vectorized) loop will be executed. This function stores
1103 upper bound (included) of the result in BOUND_SCALAR. */
1105 static tree
1106 vect_gen_scalar_loop_niters (tree niters_prolog, int int_niters_prolog,
1107 int bound_prolog, int vfm1, int th,
1108 int *bound_scalar, bool check_profitability)
1110 tree type = TREE_TYPE (niters_prolog);
1111 tree niters = fold_build2 (PLUS_EXPR, type, niters_prolog,
1112 build_int_cst (type, vfm1));
1114 *bound_scalar = vfm1 + bound_prolog;
1115 if (check_profitability)
1117 /* TH indicates the minimum niters of vectorized loop, while we
1118 compute the maximum niters of scalar loop. */
1119 th--;
1120 /* Peeling for constant times. */
1121 if (int_niters_prolog >= 0)
1123 *bound_scalar = (int_niters_prolog + vfm1 < th
1124 ? th
1125 : vfm1 + int_niters_prolog);
1126 return build_int_cst (type, *bound_scalar);
1128 /* Peeling for unknown times. Note BOUND_PROLOG is the upper
1129 bound (inlcuded) of niters of prolog loop. */
1130 if (th >= vfm1 + bound_prolog)
1132 *bound_scalar = th;
1133 return build_int_cst (type, th);
1135 /* Need to do runtime comparison, but BOUND_SCALAR remains the same. */
1136 else if (th > vfm1)
1137 return fold_build2 (MAX_EXPR, type, build_int_cst (type, th), niters);
1139 return niters;
1142 /* This function generates the following statements:
1144 niters = number of iterations loop executes (after peeling)
1145 niters_vector = niters / vf
1147 and places them on the loop preheader edge. NITERS_NO_OVERFLOW is
1148 true if NITERS doesn't overflow. */
1150 void
1151 vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters,
1152 tree *niters_vector_ptr, bool niters_no_overflow)
1154 tree ni_minus_gap, var;
1155 tree niters_vector;
1156 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1157 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1158 tree log_vf = build_int_cst (TREE_TYPE (niters), exact_log2 (vf));
1160 /* If epilogue loop is required because of data accesses with gaps, we
1161 subtract one iteration from the total number of iterations here for
1162 correct calculation of RATIO. */
1163 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1165 ni_minus_gap = fold_build2 (MINUS_EXPR, TREE_TYPE (niters),
1166 niters,
1167 build_one_cst (TREE_TYPE (niters)));
1168 if (!is_gimple_val (ni_minus_gap))
1170 var = create_tmp_var (TREE_TYPE (niters), "ni_gap");
1171 gimple *stmts = NULL;
1172 ni_minus_gap = force_gimple_operand (ni_minus_gap, &stmts,
1173 true, var);
1174 gsi_insert_seq_on_edge_immediate (pe, stmts);
1177 else
1178 ni_minus_gap = niters;
1180 /* Create: niters >> log2(vf) */
1181 /* If it's known that niters == number of latch executions + 1 doesn't
1182 overflow, we can generate niters >> log2(vf); otherwise we generate
1183 (niters - vf) >> log2(vf) + 1 by using the fact that we know ratio
1184 will be at least one. */
1185 if (niters_no_overflow)
1186 niters_vector = fold_build2 (RSHIFT_EXPR, TREE_TYPE (niters),
1187 ni_minus_gap, log_vf);
1188 else
1189 niters_vector
1190 = fold_build2 (PLUS_EXPR, TREE_TYPE (niters),
1191 fold_build2 (RSHIFT_EXPR, TREE_TYPE (niters),
1192 fold_build2 (MINUS_EXPR, TREE_TYPE (niters),
1193 ni_minus_gap,
1194 build_int_cst
1195 (TREE_TYPE (niters), vf)),
1196 log_vf),
1197 build_int_cst (TREE_TYPE (niters), 1));
1199 if (!is_gimple_val (niters_vector))
1201 var = create_tmp_var (TREE_TYPE (niters), "bnd");
1202 gimple *stmts = NULL;
1203 niters_vector = force_gimple_operand (niters_vector, &stmts, true, var);
1204 gsi_insert_seq_on_edge_immediate (pe, stmts);
1206 *niters_vector_ptr = niters_vector;
1208 return;
1211 /* Given NITERS_VECTOR which is the number of iterations for vectorized
1212 loop specified by LOOP_VINFO after vectorization, compute the number
1213 of iterations before vectorization (niters_vector * vf) and store it
1214 to NITERS_VECTOR_MULT_VF_PTR. */
1216 static void
1217 vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo,
1218 tree niters_vector,
1219 tree *niters_vector_mult_vf_ptr)
1221 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1222 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1223 tree type = TREE_TYPE (niters_vector);
1224 tree log_vf = build_int_cst (type, exact_log2 (vf));
1225 basic_block exit_bb = single_exit (loop)->dest;
1227 gcc_assert (niters_vector_mult_vf_ptr != NULL);
1228 tree niters_vector_mult_vf = fold_build2 (LSHIFT_EXPR, type,
1229 niters_vector, log_vf);
1230 if (!is_gimple_val (niters_vector_mult_vf))
1232 tree var = create_tmp_var (type, "niters_vector_mult_vf");
1233 gimple_seq stmts = NULL;
1234 niters_vector_mult_vf = force_gimple_operand (niters_vector_mult_vf,
1235 &stmts, true, var);
1236 gimple_stmt_iterator gsi = gsi_start_bb (exit_bb);
1237 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1239 *niters_vector_mult_vf_ptr = niters_vector_mult_vf;
1242 /* Function slpeel_tree_duplicate_loop_to_edge_cfg duplciates FIRST/SECOND
1243 from SECOND/FIRST and puts it at the original loop's preheader/exit
1244 edge, the two loops are arranged as below:
1246 preheader_a:
1247 first_loop:
1248 header_a:
1249 i_1 = PHI<i_0, i_2>;
1251 i_2 = i_1 + 1;
1252 if (cond_a)
1253 goto latch_a;
1254 else
1255 goto between_bb;
1256 latch_a:
1257 goto header_a;
1259 between_bb:
1260 ;; i_x = PHI<i_2>; ;; LCSSA phi node to be created for FIRST,
1262 second_loop:
1263 header_b:
1264 i_3 = PHI<i_0, i_4>; ;; Use of i_0 to be replaced with i_x,
1265 or with i_2 if no LCSSA phi is created
1266 under condition of CREATE_LCSSA_FOR_IV_PHIS.
1268 i_4 = i_3 + 1;
1269 if (cond_b)
1270 goto latch_b;
1271 else
1272 goto exit_bb;
1273 latch_b:
1274 goto header_b;
1276 exit_bb:
1278 This function creates loop closed SSA for the first loop; update the
1279 second loop's PHI nodes by replacing argument on incoming edge with the
1280 result of newly created lcssa PHI nodes. IF CREATE_LCSSA_FOR_IV_PHIS
1281 is false, Loop closed ssa phis will only be created for non-iv phis for
1282 the first loop.
1284 This function assumes exit bb of the first loop is preheader bb of the
1285 second loop, i.e, between_bb in the example code. With PHIs updated,
1286 the second loop will execute rest iterations of the first. */
1288 static void
1289 slpeel_update_phi_nodes_for_loops (loop_vec_info loop_vinfo,
1290 struct loop *first, struct loop *second,
1291 bool create_lcssa_for_iv_phis)
1293 gphi_iterator gsi_update, gsi_orig;
1294 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1296 edge first_latch_e = EDGE_SUCC (first->latch, 0);
1297 edge second_preheader_e = loop_preheader_edge (second);
1298 basic_block between_bb = single_exit (first)->dest;
1300 gcc_assert (between_bb == second_preheader_e->src);
1301 gcc_assert (single_pred_p (between_bb) && single_succ_p (between_bb));
1302 /* Either the first loop or the second is the loop to be vectorized. */
1303 gcc_assert (loop == first || loop == second);
1305 for (gsi_orig = gsi_start_phis (first->header),
1306 gsi_update = gsi_start_phis (second->header);
1307 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
1308 gsi_next (&gsi_orig), gsi_next (&gsi_update))
1310 gphi *orig_phi = gsi_orig.phi ();
1311 gphi *update_phi = gsi_update.phi ();
1313 tree arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, first_latch_e);
1314 /* Generate lcssa PHI node for the first loop. */
1315 gphi *vect_phi = (loop == first) ? orig_phi : update_phi;
1316 if (create_lcssa_for_iv_phis || !iv_phi_p (vect_phi))
1318 tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
1319 gphi *lcssa_phi = create_phi_node (new_res, between_bb);
1320 add_phi_arg (lcssa_phi, arg, single_exit (first), UNKNOWN_LOCATION);
1321 arg = new_res;
1324 /* Update PHI node in the second loop by replacing arg on the loop's
1325 incoming edge. */
1326 adjust_phi_and_debug_stmts (update_phi, second_preheader_e, arg);
1330 /* Function slpeel_add_loop_guard adds guard skipping from the beginning
1331 of SKIP_LOOP to the beginning of UPDATE_LOOP. GUARD_EDGE and MERGE_EDGE
1332 are two pred edges of the merge point before UPDATE_LOOP. The two loops
1333 appear like below:
1335 guard_bb:
1336 if (cond)
1337 goto merge_bb;
1338 else
1339 goto skip_loop;
1341 skip_loop:
1342 header_a:
1343 i_1 = PHI<i_0, i_2>;
1345 i_2 = i_1 + 1;
1346 if (cond_a)
1347 goto latch_a;
1348 else
1349 goto exit_a;
1350 latch_a:
1351 goto header_a;
1353 exit_a:
1354 i_5 = PHI<i_2>;
1356 merge_bb:
1357 ;; PHI (i_x = PHI<i_0, i_5>) to be created at merge point.
1359 update_loop:
1360 header_b:
1361 i_3 = PHI<i_5, i_4>; ;; Use of i_5 to be replaced with i_x.
1363 i_4 = i_3 + 1;
1364 if (cond_b)
1365 goto latch_b;
1366 else
1367 goto exit_bb;
1368 latch_b:
1369 goto header_b;
1371 exit_bb:
1373 This function creates PHI nodes at merge_bb and replaces the use of i_5
1374 in the update_loop's PHI node with the result of new PHI result. */
1376 static void
1377 slpeel_update_phi_nodes_for_guard1 (struct loop *skip_loop,
1378 struct loop *update_loop,
1379 edge guard_edge, edge merge_edge)
1381 source_location merge_loc, guard_loc;
1382 edge orig_e = loop_preheader_edge (skip_loop);
1383 edge update_e = loop_preheader_edge (update_loop);
1384 gphi_iterator gsi_orig, gsi_update;
1386 for ((gsi_orig = gsi_start_phis (skip_loop->header),
1387 gsi_update = gsi_start_phis (update_loop->header));
1388 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
1389 gsi_next (&gsi_orig), gsi_next (&gsi_update))
1391 gphi *orig_phi = gsi_orig.phi ();
1392 gphi *update_phi = gsi_update.phi ();
1394 /* Generate new phi node at merge bb of the guard. */
1395 tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
1396 gphi *new_phi = create_phi_node (new_res, guard_edge->dest);
1398 /* Merge bb has two incoming edges: GUARD_EDGE and MERGE_EDGE. Set the
1399 args in NEW_PHI for these edges. */
1400 tree merge_arg = PHI_ARG_DEF_FROM_EDGE (update_phi, update_e);
1401 tree guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
1402 merge_loc = gimple_phi_arg_location_from_edge (update_phi, update_e);
1403 guard_loc = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
1404 add_phi_arg (new_phi, merge_arg, merge_edge, merge_loc);
1405 add_phi_arg (new_phi, guard_arg, guard_edge, guard_loc);
1407 /* Update phi in UPDATE_PHI. */
1408 adjust_phi_and_debug_stmts (update_phi, update_e, new_res);
1412 /* LCSSA_PHI is a lcssa phi of EPILOG loop which is copied from LOOP,
1413 this function searches for the corresponding lcssa phi node in exit
1414 bb of LOOP. If it is found, return the phi result; otherwise return
1415 NULL. */
1417 static tree
1418 find_guard_arg (struct loop *loop, struct loop *epilog ATTRIBUTE_UNUSED,
1419 gphi *lcssa_phi)
1421 gphi_iterator gsi;
1422 edge e = single_exit (loop);
1424 gcc_assert (single_pred_p (e->dest));
1425 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
1427 gphi *phi = gsi.phi ();
1428 if (operand_equal_p (PHI_ARG_DEF (phi, 0),
1429 PHI_ARG_DEF (lcssa_phi, 0), 0))
1430 return PHI_RESULT (phi);
1432 return NULL_TREE;
1435 /* LOOP and EPILOG are two consecutive loops in CFG and EPILOG is copied
1436 from LOOP. Function slpeel_add_loop_guard adds guard skipping from a
1437 point between the two loops to the end of EPILOG. Edges GUARD_EDGE
1438 and MERGE_EDGE are the two pred edges of merge_bb at the end of EPILOG.
1439 The CFG looks like:
1441 loop:
1442 header_a:
1443 i_1 = PHI<i_0, i_2>;
1445 i_2 = i_1 + 1;
1446 if (cond_a)
1447 goto latch_a;
1448 else
1449 goto exit_a;
1450 latch_a:
1451 goto header_a;
1453 exit_a:
1455 guard_bb:
1456 if (cond)
1457 goto merge_bb;
1458 else
1459 goto epilog_loop;
1461 ;; fall_through_bb
1463 epilog_loop:
1464 header_b:
1465 i_3 = PHI<i_2, i_4>;
1467 i_4 = i_3 + 1;
1468 if (cond_b)
1469 goto latch_b;
1470 else
1471 goto merge_bb;
1472 latch_b:
1473 goto header_b;
1475 merge_bb:
1476 ; PHI node (i_y = PHI<i_2, i_4>) to be created at merge point.
1478 exit_bb:
1479 i_x = PHI<i_4>; ;Use of i_4 to be replaced with i_y in merge_bb.
1481 For each name used out side EPILOG (i.e - for each name that has a lcssa
1482 phi in exit_bb) we create a new PHI in merge_bb. The new PHI has two
1483 args corresponding to GUARD_EDGE and MERGE_EDGE. Arg for MERGE_EDGE is
1484 the arg of the original PHI in exit_bb, arg for GUARD_EDGE is defined
1485 by LOOP and is found in the exit bb of LOOP. Arg of the original PHI
1486 in exit_bb will also be updated. */
1488 static void
1489 slpeel_update_phi_nodes_for_guard2 (struct loop *loop, struct loop *epilog,
1490 edge guard_edge, edge merge_edge)
1492 gphi_iterator gsi;
1493 basic_block merge_bb = guard_edge->dest;
1495 gcc_assert (single_succ_p (merge_bb));
1496 edge e = single_succ_edge (merge_bb);
1497 basic_block exit_bb = e->dest;
1498 gcc_assert (single_pred_p (exit_bb));
1499 gcc_assert (single_pred (exit_bb) == single_exit (epilog)->dest);
1501 for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1503 gphi *update_phi = gsi.phi ();
1504 tree old_arg = PHI_ARG_DEF (update_phi, 0);
1505 /* This loop-closed-phi actually doesn't represent a use out of the
1506 loop - the phi arg is a constant. */
1507 if (TREE_CODE (old_arg) != SSA_NAME)
1508 continue;
1510 tree merge_arg = get_current_def (old_arg);
1511 if (!merge_arg)
1512 merge_arg = old_arg;
1514 tree guard_arg = find_guard_arg (loop, epilog, update_phi);
1515 /* If the var is live after loop but not a reduction, we simply
1516 use the old arg. */
1517 if (!guard_arg)
1518 guard_arg = old_arg;
1520 /* Create new phi node in MERGE_BB: */
1521 tree new_res = copy_ssa_name (PHI_RESULT (update_phi));
1522 gphi *merge_phi = create_phi_node (new_res, merge_bb);
1524 /* MERGE_BB has two incoming edges: GUARD_EDGE and MERGE_EDGE, Set
1525 the two PHI args in merge_phi for these edges. */
1526 add_phi_arg (merge_phi, merge_arg, merge_edge, UNKNOWN_LOCATION);
1527 add_phi_arg (merge_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
1529 /* Update the original phi in exit_bb. */
1530 adjust_phi_and_debug_stmts (update_phi, e, new_res);
1534 /* EPILOG loop is duplicated from the original loop for vectorizing,
1535 the arg of its loop closed ssa PHI needs to be updated. */
1537 static void
1538 slpeel_update_phi_nodes_for_lcssa (struct loop *epilog)
1540 gphi_iterator gsi;
1541 basic_block exit_bb = single_exit (epilog)->dest;
1543 gcc_assert (single_pred_p (exit_bb));
1544 edge e = EDGE_PRED (exit_bb, 0);
1545 for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1546 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
1549 /* Function vect_do_peeling.
1551 Input:
1552 - LOOP_VINFO: Represent a loop to be vectorized, which looks like:
1554 preheader:
1555 LOOP:
1556 header_bb:
1557 loop_body
1558 if (exit_loop_cond) goto exit_bb
1559 else goto header_bb
1560 exit_bb:
1562 - NITERS: The number of iterations of the loop.
1563 - NITERSM1: The number of iterations of the loop's latch.
1564 - NITERS_NO_OVERFLOW: No overflow in computing NITERS.
1565 - TH, CHECK_PROFITABILITY: Threshold of niters to vectorize loop if
1566 CHECK_PROFITABILITY is true.
1567 Output:
1568 - NITERS_VECTOR: The number of iterations of loop after vectorization.
1570 This function peels prolog and epilog from the loop, adds guards skipping
1571 PROLOG and EPILOG for various conditions. As a result, the changed CFG
1572 would look like:
1574 guard_bb_1:
1575 if (prefer_scalar_loop) goto merge_bb_1
1576 else goto guard_bb_2
1578 guard_bb_2:
1579 if (skip_prolog) goto merge_bb_2
1580 else goto prolog_preheader
1582 prolog_preheader:
1583 PROLOG:
1584 prolog_header_bb:
1585 prolog_body
1586 if (exit_prolog_cond) goto prolog_exit_bb
1587 else goto prolog_header_bb
1588 prolog_exit_bb:
1590 merge_bb_2:
1592 vector_preheader:
1593 VECTOR LOOP:
1594 vector_header_bb:
1595 vector_body
1596 if (exit_vector_cond) goto vector_exit_bb
1597 else goto vector_header_bb
1598 vector_exit_bb:
1600 guard_bb_3:
1601 if (skip_epilog) goto merge_bb_3
1602 else goto epilog_preheader
1604 merge_bb_1:
1606 epilog_preheader:
1607 EPILOG:
1608 epilog_header_bb:
1609 epilog_body
1610 if (exit_epilog_cond) goto merge_bb_3
1611 else goto epilog_header_bb
1613 merge_bb_3:
1615 Note this function peels prolog and epilog only if it's necessary,
1616 as well as guards.
1617 Returns created epilogue or NULL.
1619 TODO: Guard for prefer_scalar_loop should be emitted along with
1620 versioning conditions if loop versioning is needed. */
1623 struct loop *
1624 vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
1625 tree *niters_vector, int th, bool check_profitability,
1626 bool niters_no_overflow)
1628 edge e, guard_e;
1629 tree type = TREE_TYPE (niters), guard_cond;
1630 basic_block guard_bb, guard_to;
1631 int prob_prolog, prob_vector, prob_epilog;
1632 int bound_prolog = 0, bound_scalar = 0, bound = 0;
1633 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1634 int prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1635 bool epilog_peeling = (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)
1636 || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
1638 if (!prolog_peeling && !epilog_peeling)
1639 return NULL;
1641 prob_vector = 9 * REG_BR_PROB_BASE / 10;
1642 if ((vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo)) == 2)
1643 vf = 3;
1644 prob_prolog = prob_epilog = (vf - 1) * REG_BR_PROB_BASE / vf;
1645 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1647 struct loop *prolog, *epilog = NULL, *loop = LOOP_VINFO_LOOP (loop_vinfo);
1648 struct loop *first_loop = loop;
1649 create_lcssa_for_virtual_phi (loop);
1650 update_ssa (TODO_update_ssa_only_virtuals);
1652 if (MAY_HAVE_DEBUG_STMTS)
1654 gcc_assert (!adjust_vec.exists ());
1655 adjust_vec.create (32);
1657 initialize_original_copy_tables ();
1659 /* Prolog loop may be skipped. */
1660 bool skip_prolog = (prolog_peeling != 0);
1661 /* Skip to epilog if scalar loop may be preferred. It's only used when
1662 we peel for epilog loop. */
1663 bool skip_vector = (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo));
1664 /* Epilog loop must be executed if the number of iterations for epilog
1665 loop is known at compile time, otherwise we need to add a check at
1666 the end of vector loop and skip to the end of epilog loop. */
1667 bool skip_epilog = (prolog_peeling < 0
1668 || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo));
1669 /* PEELING_FOR_GAPS is special because epilog loop must be executed. */
1670 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1671 skip_epilog = false;
1673 /* Record the anchor bb at which guard should be placed if scalar loop
1674 may be preferred. */
1675 basic_block anchor = loop_preheader_edge (loop)->src;
1676 if (skip_vector)
1677 split_edge (loop_preheader_edge (loop));
1679 tree niters_prolog = build_int_cst (type, 0);
1680 source_location loop_loc = find_loop_location (loop);
1681 struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
1682 if (prolog_peeling)
1684 e = loop_preheader_edge (loop);
1685 if (!slpeel_can_duplicate_loop_p (loop, e))
1687 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1688 "loop can't be duplicated to preheader edge.\n");
1689 gcc_unreachable ();
1691 /* Peel prolog and put it on preheader edge of loop. */
1692 prolog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
1693 if (!prolog)
1695 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1696 "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1697 gcc_unreachable ();
1699 slpeel_update_phi_nodes_for_loops (loop_vinfo, prolog, loop, true);
1700 first_loop = prolog;
1701 reset_original_copy_tables ();
1703 /* Generate and update the number of iterations for prolog loop. */
1704 niters_prolog = vect_gen_prolog_loop_niters (loop_vinfo, anchor,
1705 &bound_prolog);
1706 slpeel_make_loop_iterate_ntimes (prolog, niters_prolog);
1708 /* Skip the prolog loop. */
1709 if (skip_prolog)
1711 guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
1712 niters_prolog, build_int_cst (type, 0));
1713 guard_bb = loop_preheader_edge (prolog)->src;
1714 guard_to = split_edge (loop_preheader_edge (loop));
1715 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
1716 guard_to, guard_bb,
1717 inverse_probability (prob_prolog));
1718 e = EDGE_PRED (guard_to, 0);
1719 e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
1720 slpeel_update_phi_nodes_for_guard1 (prolog, loop, guard_e, e);
1721 scale_loop_profile (prolog, prob_prolog, bound_prolog);
1723 /* Update init address of DRs. */
1724 vect_update_inits_of_drs (loop_vinfo, niters_prolog);
1725 /* Update niters for vector loop. */
1726 LOOP_VINFO_NITERS (loop_vinfo)
1727 = fold_build2 (MINUS_EXPR, type, niters, niters_prolog);
1728 LOOP_VINFO_NITERSM1 (loop_vinfo)
1729 = fold_build2 (MINUS_EXPR, type,
1730 LOOP_VINFO_NITERSM1 (loop_vinfo), niters_prolog);
1731 niters = vect_build_loop_niters (loop_vinfo);
1733 /* Prolog iterates at most bound_prolog times, latch iterates at
1734 most bound_prolog - 1 times. */
1735 record_niter_bound (prolog, bound_prolog - 1, false, true);
1736 delete_update_ssa ();
1737 adjust_vec_debug_stmts ();
1738 scev_reset ();
1741 if (epilog_peeling)
1743 e = single_exit (loop);
1744 if (!slpeel_can_duplicate_loop_p (loop, e))
1746 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1747 "loop can't be duplicated to exit edge.\n");
1748 gcc_unreachable ();
1750 /* Peel epilog and put it on exit edge of loop. */
1751 epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
1752 if (!epilog)
1754 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1755 "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1756 gcc_unreachable ();
1758 slpeel_update_phi_nodes_for_loops (loop_vinfo, loop, epilog, false);
1760 /* Scalar version loop may be preferred. In this case, add guard
1761 and skip to epilog. Note this only happens when the number of
1762 iterations of loop is unknown at compile time, otherwise this
1763 won't be vectorized. */
1764 if (skip_vector)
1766 /* Additional epilogue iteration is peeled if gap exists. */
1767 bool peel_for_gaps = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo);
1768 tree t = vect_gen_scalar_loop_niters (niters_prolog, prolog_peeling,
1769 bound_prolog,
1770 peel_for_gaps ? vf : vf - 1,
1771 th, &bound_scalar,
1772 check_profitability);
1773 /* Build guard against NITERSM1 since NITERS may overflow. */
1774 guard_cond = fold_build2 (LT_EXPR, boolean_type_node, nitersm1, t);
1775 guard_bb = anchor;
1776 guard_to = split_edge (loop_preheader_edge (epilog));
1777 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
1778 guard_to, guard_bb,
1779 inverse_probability (prob_vector));
1780 e = EDGE_PRED (guard_to, 0);
1781 e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
1782 slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e);
1783 scale_loop_profile (epilog, prob_vector, bound_scalar);
1786 tree niters_vector_mult_vf;
1787 /* If loop is peeled for non-zero constant times, now niters refers to
1788 orig_niters - prolog_peeling, it won't overflow even the orig_niters
1789 overflows. */
1790 niters_no_overflow |= (prolog_peeling > 0);
1791 vect_gen_vector_loop_niters (loop_vinfo, niters,
1792 niters_vector, niters_no_overflow);
1793 vect_gen_vector_loop_niters_mult_vf (loop_vinfo, *niters_vector,
1794 &niters_vector_mult_vf);
1795 /* Update IVs of original loop as if they were advanced by
1796 niters_vector_mult_vf steps. */
1797 gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
1798 edge update_e = skip_vector ? e : loop_preheader_edge (epilog);
1799 vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf,
1800 update_e);
1802 if (skip_epilog)
1804 guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
1805 niters, niters_vector_mult_vf);
1806 guard_bb = single_exit (loop)->dest;
1807 guard_to = split_edge (single_exit (epilog));
1808 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond, guard_to,
1809 skip_vector ? anchor : guard_bb,
1810 inverse_probability (prob_epilog));
1811 slpeel_update_phi_nodes_for_guard2 (loop, epilog, guard_e,
1812 single_exit (epilog));
1813 scale_loop_profile (epilog, prob_epilog, bound);
1815 else
1816 slpeel_update_phi_nodes_for_lcssa (epilog);
1818 bound = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) ? vf - 1 : vf - 2;
1819 /* We share epilog loop with scalar version loop. */
1820 bound = MAX (bound, bound_scalar - 1);
1821 record_niter_bound (epilog, bound, false, true);
1823 delete_update_ssa ();
1824 adjust_vec_debug_stmts ();
1825 scev_reset ();
1827 adjust_vec.release ();
1828 free_original_copy_tables ();
1830 return epilog;
1833 /* Function vect_create_cond_for_niters_checks.
1835 Create a conditional expression that represents the run-time checks for
1836 loop's niter. The loop is guaranteed to to terminate if the run-time
1837 checks hold.
1839 Input:
1840 COND_EXPR - input conditional expression. New conditions will be chained
1841 with logical AND operation. If it is NULL, then the function
1842 is used to return the number of alias checks.
1843 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
1844 to be checked.
1846 Output:
1847 COND_EXPR - conditional expression.
1849 The returned COND_EXPR is the conditional expression to be used in the
1850 if statement that controls which version of the loop gets executed at
1851 runtime. */
1853 static void
1854 vect_create_cond_for_niters_checks (loop_vec_info loop_vinfo, tree *cond_expr)
1856 tree part_cond_expr = LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo);
1858 if (*cond_expr)
1859 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1860 *cond_expr, part_cond_expr);
1861 else
1862 *cond_expr = part_cond_expr;
1865 /* Function vect_create_cond_for_align_checks.
1867 Create a conditional expression that represents the alignment checks for
1868 all of data references (array element references) whose alignment must be
1869 checked at runtime.
1871 Input:
1872 COND_EXPR - input conditional expression. New conditions will be chained
1873 with logical AND operation.
1874 LOOP_VINFO - two fields of the loop information are used.
1875 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
1876 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
1878 Output:
1879 COND_EXPR_STMT_LIST - statements needed to construct the conditional
1880 expression.
1881 The returned value is the conditional expression to be used in the if
1882 statement that controls which version of the loop gets executed at runtime.
1884 The algorithm makes two assumptions:
1885 1) The number of bytes "n" in a vector is a power of 2.
1886 2) An address "a" is aligned if a%n is zero and that this
1887 test can be done as a&(n-1) == 0. For example, for 16
1888 byte vectors the test is a&0xf == 0. */
1890 static void
1891 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
1892 tree *cond_expr,
1893 gimple_seq *cond_expr_stmt_list)
1895 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1896 vec<gimple *> may_misalign_stmts
1897 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1898 gimple *ref_stmt;
1899 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
1900 tree mask_cst;
1901 unsigned int i;
1902 tree int_ptrsize_type;
1903 char tmp_name[20];
1904 tree or_tmp_name = NULL_TREE;
1905 tree and_tmp_name;
1906 gimple *and_stmt;
1907 tree ptrsize_zero;
1908 tree part_cond_expr;
1910 /* Check that mask is one less than a power of 2, i.e., mask is
1911 all zeros followed by all ones. */
1912 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
1914 int_ptrsize_type = signed_type_for (ptr_type_node);
1916 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
1917 of the first vector of the i'th data reference. */
1919 FOR_EACH_VEC_ELT (may_misalign_stmts, i, ref_stmt)
1921 gimple_seq new_stmt_list = NULL;
1922 tree addr_base;
1923 tree addr_tmp_name;
1924 tree new_or_tmp_name;
1925 gimple *addr_stmt, *or_stmt;
1926 stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
1927 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1928 bool negative = tree_int_cst_compare
1929 (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
1930 tree offset = negative
1931 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
1933 /* create: addr_tmp = (int)(address_of_first_vector) */
1934 addr_base =
1935 vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
1936 offset, loop);
1937 if (new_stmt_list != NULL)
1938 gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
1940 sprintf (tmp_name, "addr2int%d", i);
1941 addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
1942 addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base);
1943 gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
1945 /* The addresses are OR together. */
1947 if (or_tmp_name != NULL_TREE)
1949 /* create: or_tmp = or_tmp | addr_tmp */
1950 sprintf (tmp_name, "orptrs%d", i);
1951 new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
1952 or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
1953 or_tmp_name, addr_tmp_name);
1954 gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
1955 or_tmp_name = new_or_tmp_name;
1957 else
1958 or_tmp_name = addr_tmp_name;
1960 } /* end for i */
1962 mask_cst = build_int_cst (int_ptrsize_type, mask);
1964 /* create: and_tmp = or_tmp & mask */
1965 and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
1967 and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR,
1968 or_tmp_name, mask_cst);
1969 gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
1971 /* Make and_tmp the left operand of the conditional test against zero.
1972 if and_tmp has a nonzero bit then some address is unaligned. */
1973 ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
1974 part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
1975 and_tmp_name, ptrsize_zero);
1976 if (*cond_expr)
1977 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1978 *cond_expr, part_cond_expr);
1979 else
1980 *cond_expr = part_cond_expr;
1983 /* Given two data references and segment lengths described by DR_A and DR_B,
1984 create expression checking if the two addresses ranges intersect with
1985 each other based on index of the two addresses. This can only be done
1986 if DR_A and DR_B referring to the same (array) object and the index is
1987 the only difference. For example:
1989 DR_A DR_B
1990 data-ref arr[i] arr[j]
1991 base_object arr arr
1992 index {i_0, +, 1}_loop {j_0, +, 1}_loop
1994 The addresses and their index are like:
1996 |<- ADDR_A ->| |<- ADDR_B ->|
1997 ------------------------------------------------------->
1998 | | | | | | | | | |
1999 ------------------------------------------------------->
2000 i_0 ... i_0+4 j_0 ... j_0+4
2002 We can create expression based on index rather than address:
2004 (i_0 + 4 < j_0 || j_0 + 4 < i_0)
2006 Note evolution step of index needs to be considered in comparison. */
2008 static bool
2009 create_intersect_range_checks_index (loop_vec_info loop_vinfo, tree *cond_expr,
2010 const dr_with_seg_len& dr_a,
2011 const dr_with_seg_len& dr_b)
2013 if (integer_zerop (DR_STEP (dr_a.dr))
2014 || integer_zerop (DR_STEP (dr_b.dr))
2015 || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
2016 return false;
2018 if (!tree_fits_uhwi_p (dr_a.seg_len) || !tree_fits_uhwi_p (dr_b.seg_len))
2019 return false;
2021 if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
2022 return false;
2024 if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
2025 return false;
2027 if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
2028 return false;
2030 gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
2032 bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
2033 unsigned HOST_WIDE_INT abs_step
2034 = absu_hwi (tree_to_shwi (DR_STEP (dr_a.dr)));
2036 unsigned HOST_WIDE_INT seg_len1 = tree_to_uhwi (dr_a.seg_len);
2037 unsigned HOST_WIDE_INT seg_len2 = tree_to_uhwi (dr_b.seg_len);
2038 /* Infer the number of iterations with which the memory segment is accessed
2039 by DR. In other words, alias is checked if memory segment accessed by
2040 DR_A in some iterations intersect with memory segment accessed by DR_B
2041 in the same amount iterations.
2042 Note segnment length is a linear function of number of iterations with
2043 DR_STEP as the coefficient. */
2044 unsigned HOST_WIDE_INT niter_len1 = (seg_len1 + abs_step - 1) / abs_step;
2045 unsigned HOST_WIDE_INT niter_len2 = (seg_len2 + abs_step - 1) / abs_step;
2047 unsigned int i;
2048 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2049 for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
2051 tree access1 = DR_ACCESS_FN (dr_a.dr, i);
2052 tree access2 = DR_ACCESS_FN (dr_b.dr, i);
2053 /* Two indices must be the same if they are not scev, or not scev wrto
2054 current loop being vecorized. */
2055 if (TREE_CODE (access1) != POLYNOMIAL_CHREC
2056 || TREE_CODE (access2) != POLYNOMIAL_CHREC
2057 || CHREC_VARIABLE (access1) != (unsigned)loop->num
2058 || CHREC_VARIABLE (access2) != (unsigned)loop->num)
2060 if (operand_equal_p (access1, access2, 0))
2061 continue;
2063 return false;
2065 /* The two indices must have the same step. */
2066 if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
2067 return false;
2069 tree idx_step = CHREC_RIGHT (access1);
2070 /* Index must have const step, otherwise DR_STEP won't be constant. */
2071 gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
2072 /* Index must evaluate in the same direction as DR. */
2073 gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
2075 tree min1 = CHREC_LEFT (access1);
2076 tree min2 = CHREC_LEFT (access2);
2077 if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
2078 return false;
2080 /* Ideally, alias can be checked against loop's control IV, but we
2081 need to prove linear mapping between control IV and reference
2082 index. Although that should be true, we check against (array)
2083 index of data reference. Like segment length, index length is
2084 linear function of the number of iterations with index_step as
2085 the coefficient, i.e, niter_len * idx_step. */
2086 tree idx_len1 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
2087 build_int_cst (TREE_TYPE (min1),
2088 niter_len1));
2089 tree idx_len2 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
2090 build_int_cst (TREE_TYPE (min2),
2091 niter_len2));
2092 tree max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1), min1, idx_len1);
2093 tree max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2), min2, idx_len2);
2094 /* Adjust ranges for negative step. */
2095 if (neg_step)
2097 min1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1), max1, idx_step);
2098 max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1),
2099 CHREC_LEFT (access1), idx_step);
2100 min2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2), max2, idx_step);
2101 max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2),
2102 CHREC_LEFT (access2), idx_step);
2104 tree part_cond_expr
2105 = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2106 fold_build2 (LE_EXPR, boolean_type_node, max1, min2),
2107 fold_build2 (LE_EXPR, boolean_type_node, max2, min1));
2108 if (*cond_expr)
2109 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2110 *cond_expr, part_cond_expr);
2111 else
2112 *cond_expr = part_cond_expr;
2114 return true;
2117 /* Given two data references and segment lengths described by DR_A and DR_B,
2118 create expression checking if the two addresses ranges intersect with
2119 each other:
2121 ((DR_A_addr_0 + DR_A_segment_length_0) <= DR_B_addr_0)
2122 || (DR_B_addr_0 + DER_B_segment_length_0) <= DR_A_addr_0)) */
2124 static void
2125 create_intersect_range_checks (loop_vec_info loop_vinfo, tree *cond_expr,
2126 const dr_with_seg_len& dr_a,
2127 const dr_with_seg_len& dr_b)
2129 *cond_expr = NULL_TREE;
2130 if (create_intersect_range_checks_index (loop_vinfo, cond_expr, dr_a, dr_b))
2131 return;
2133 tree segment_length_a = dr_a.seg_len;
2134 tree segment_length_b = dr_b.seg_len;
2135 tree addr_base_a = DR_BASE_ADDRESS (dr_a.dr);
2136 tree addr_base_b = DR_BASE_ADDRESS (dr_b.dr);
2137 tree offset_a = DR_OFFSET (dr_a.dr), offset_b = DR_OFFSET (dr_b.dr);
2139 offset_a = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_a),
2140 offset_a, DR_INIT (dr_a.dr));
2141 offset_b = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_b),
2142 offset_b, DR_INIT (dr_b.dr));
2143 addr_base_a = fold_build_pointer_plus (addr_base_a, offset_a);
2144 addr_base_b = fold_build_pointer_plus (addr_base_b, offset_b);
2146 tree seg_a_min = addr_base_a;
2147 tree seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
2148 /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
2149 bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
2150 [a, a+12) */
2151 if (tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0)
2153 tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a.dr)));
2154 seg_a_min = fold_build_pointer_plus (seg_a_max, unit_size);
2155 seg_a_max = fold_build_pointer_plus (addr_base_a, unit_size);
2158 tree seg_b_min = addr_base_b;
2159 tree seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
2160 if (tree_int_cst_compare (DR_STEP (dr_b.dr), size_zero_node) < 0)
2162 tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b.dr)));
2163 seg_b_min = fold_build_pointer_plus (seg_b_max, unit_size);
2164 seg_b_max = fold_build_pointer_plus (addr_base_b, unit_size);
2166 *cond_expr
2167 = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2168 fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
2169 fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
2172 /* Function vect_create_cond_for_alias_checks.
2174 Create a conditional expression that represents the run-time checks for
2175 overlapping of address ranges represented by a list of data references
2176 relations passed as input.
2178 Input:
2179 COND_EXPR - input conditional expression. New conditions will be chained
2180 with logical AND operation. If it is NULL, then the function
2181 is used to return the number of alias checks.
2182 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2183 to be checked.
2185 Output:
2186 COND_EXPR - conditional expression.
2188 The returned COND_EXPR is the conditional expression to be used in the if
2189 statement that controls which version of the loop gets executed at runtime.
2192 void
2193 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
2195 vec<dr_with_seg_len_pair_t> comp_alias_ddrs =
2196 LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
2197 tree part_cond_expr;
2199 if (comp_alias_ddrs.is_empty ())
2200 return;
2202 for (size_t i = 0, s = comp_alias_ddrs.length (); i < s; ++i)
2204 const dr_with_seg_len& dr_a = comp_alias_ddrs[i].first;
2205 const dr_with_seg_len& dr_b = comp_alias_ddrs[i].second;
2207 if (dump_enabled_p ())
2209 dump_printf_loc (MSG_NOTE, vect_location,
2210 "create runtime check for data references ");
2211 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a.dr));
2212 dump_printf (MSG_NOTE, " and ");
2213 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b.dr));
2214 dump_printf (MSG_NOTE, "\n");
2217 /* Create condition expression for each pair data references. */
2218 create_intersect_range_checks (loop_vinfo, &part_cond_expr, dr_a, dr_b);
2219 if (*cond_expr)
2220 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2221 *cond_expr, part_cond_expr);
2222 else
2223 *cond_expr = part_cond_expr;
2226 if (dump_enabled_p ())
2227 dump_printf_loc (MSG_NOTE, vect_location,
2228 "created %u versioning for alias checks.\n",
2229 comp_alias_ddrs.length ());
2233 /* Function vect_loop_versioning.
2235 If the loop has data references that may or may not be aligned or/and
2236 has data reference relations whose independence was not proven then
2237 two versions of the loop need to be generated, one which is vectorized
2238 and one which isn't. A test is then generated to control which of the
2239 loops is executed. The test checks for the alignment of all of the
2240 data references that may or may not be aligned. An additional
2241 sequence of runtime tests is generated for each pairs of DDRs whose
2242 independence was not proven. The vectorized version of loop is
2243 executed only if both alias and alignment tests are passed.
2245 The test generated to check which version of loop is executed
2246 is modified to also check for profitability as indicated by the
2247 cost model threshold TH.
2249 The versioning precondition(s) are placed in *COND_EXPR and
2250 *COND_EXPR_STMT_LIST. */
2252 void
2253 vect_loop_versioning (loop_vec_info loop_vinfo,
2254 unsigned int th, bool check_profitability)
2256 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *nloop;
2257 struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
2258 basic_block condition_bb;
2259 gphi_iterator gsi;
2260 gimple_stmt_iterator cond_exp_gsi;
2261 basic_block merge_bb;
2262 basic_block new_exit_bb;
2263 edge new_exit_e, e;
2264 gphi *orig_phi, *new_phi;
2265 tree cond_expr = NULL_TREE;
2266 gimple_seq cond_expr_stmt_list = NULL;
2267 tree arg;
2268 unsigned prob = 4 * REG_BR_PROB_BASE / 5;
2269 gimple_seq gimplify_stmt_list = NULL;
2270 tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2271 bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
2272 bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
2273 bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo);
2275 if (check_profitability)
2276 cond_expr = fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
2277 build_int_cst (TREE_TYPE (scalar_loop_iters),
2278 th));
2280 if (version_niter)
2281 vect_create_cond_for_niters_checks (loop_vinfo, &cond_expr);
2283 if (cond_expr)
2284 cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
2285 is_gimple_condexpr, NULL_TREE);
2287 if (version_align)
2288 vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
2289 &cond_expr_stmt_list);
2291 if (version_alias)
2292 vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
2294 cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,
2295 is_gimple_condexpr, NULL_TREE);
2296 gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
2298 initialize_original_copy_tables ();
2299 if (scalar_loop)
2301 edge scalar_e;
2302 basic_block preheader, scalar_preheader;
2304 /* We don't want to scale SCALAR_LOOP's frequencies, we need to
2305 scale LOOP's frequencies instead. */
2306 nloop = loop_version (scalar_loop, cond_expr, &condition_bb, prob,
2307 REG_BR_PROB_BASE, REG_BR_PROB_BASE - prob, true);
2308 scale_loop_frequencies (loop, prob, REG_BR_PROB_BASE);
2309 /* CONDITION_BB was created above SCALAR_LOOP's preheader,
2310 while we need to move it above LOOP's preheader. */
2311 e = loop_preheader_edge (loop);
2312 scalar_e = loop_preheader_edge (scalar_loop);
2313 gcc_assert (empty_block_p (e->src)
2314 && single_pred_p (e->src));
2315 gcc_assert (empty_block_p (scalar_e->src)
2316 && single_pred_p (scalar_e->src));
2317 gcc_assert (single_pred_p (condition_bb));
2318 preheader = e->src;
2319 scalar_preheader = scalar_e->src;
2320 scalar_e = find_edge (condition_bb, scalar_preheader);
2321 e = single_pred_edge (preheader);
2322 redirect_edge_and_branch_force (single_pred_edge (condition_bb),
2323 scalar_preheader);
2324 redirect_edge_and_branch_force (scalar_e, preheader);
2325 redirect_edge_and_branch_force (e, condition_bb);
2326 set_immediate_dominator (CDI_DOMINATORS, condition_bb,
2327 single_pred (condition_bb));
2328 set_immediate_dominator (CDI_DOMINATORS, scalar_preheader,
2329 single_pred (scalar_preheader));
2330 set_immediate_dominator (CDI_DOMINATORS, preheader,
2331 condition_bb);
2333 else
2334 nloop = loop_version (loop, cond_expr, &condition_bb,
2335 prob, prob, REG_BR_PROB_BASE - prob, true);
2337 if (version_niter)
2339 /* The versioned loop could be infinite, we need to clear existing
2340 niter information which is copied from the original loop. */
2341 gcc_assert (loop_constraint_set_p (loop, LOOP_C_FINITE));
2342 vect_free_loop_info_assumptions (nloop);
2343 /* And set constraint LOOP_C_INFINITE for niter analyzer. */
2344 loop_constraint_set (loop, LOOP_C_INFINITE);
2347 if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
2348 && dump_enabled_p ())
2350 if (version_alias)
2351 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2352 "loop versioned for vectorization because of "
2353 "possible aliasing\n");
2354 if (version_align)
2355 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2356 "loop versioned for vectorization to enhance "
2357 "alignment\n");
2360 free_original_copy_tables ();
2362 /* Loop versioning violates an assumption we try to maintain during
2363 vectorization - that the loop exit block has a single predecessor.
2364 After versioning, the exit block of both loop versions is the same
2365 basic block (i.e. it has two predecessors). Just in order to simplify
2366 following transformations in the vectorizer, we fix this situation
2367 here by adding a new (empty) block on the exit-edge of the loop,
2368 with the proper loop-exit phis to maintain loop-closed-form.
2369 If loop versioning wasn't done from loop, but scalar_loop instead,
2370 merge_bb will have already just a single successor. */
2372 merge_bb = single_exit (loop)->dest;
2373 if (scalar_loop == NULL || EDGE_COUNT (merge_bb->preds) >= 2)
2375 gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
2376 new_exit_bb = split_edge (single_exit (loop));
2377 new_exit_e = single_exit (loop);
2378 e = EDGE_SUCC (new_exit_bb, 0);
2380 for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2382 tree new_res;
2383 orig_phi = gsi.phi ();
2384 new_res = copy_ssa_name (PHI_RESULT (orig_phi));
2385 new_phi = create_phi_node (new_res, new_exit_bb);
2386 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2387 add_phi_arg (new_phi, arg, new_exit_e,
2388 gimple_phi_arg_location_from_edge (orig_phi, e));
2389 adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
2393 /* End loop-exit-fixes after versioning. */
2395 if (cond_expr_stmt_list)
2397 cond_exp_gsi = gsi_last_bb (condition_bb);
2398 gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
2399 GSI_SAME_STMT);
2401 update_ssa (TODO_update_ssa);