2017-02-20 Paul Thomas <pault@gcc.gnu.org>
[official-gcc.git] / gcc / tree-vect-loop-manip.c
blob5ee2c38e0484406c8afacc06a2d657f0b1a46f93
1 /* Vectorizer Specific Loop Manipulations
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "cfghooks.h"
29 #include "tree-pass.h"
30 #include "ssa.h"
31 #include "fold-const.h"
32 #include "cfganal.h"
33 #include "gimplify.h"
34 #include "gimple-iterator.h"
35 #include "gimplify-me.h"
36 #include "tree-cfg.h"
37 #include "tree-ssa-loop-manip.h"
38 #include "tree-into-ssa.h"
39 #include "tree-ssa.h"
40 #include "cfgloop.h"
41 #include "tree-scalar-evolution.h"
42 #include "tree-vectorizer.h"
43 #include "tree-ssa-loop-ivopts.h"
45 /*************************************************************************
46 Simple Loop Peeling Utilities
48 Utilities to support loop peeling for vectorization purposes.
49 *************************************************************************/
52 /* Renames the use *OP_P. */
54 static void
55 rename_use_op (use_operand_p op_p)
57 tree new_name;
59 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
60 return;
62 new_name = get_current_def (USE_FROM_PTR (op_p));
64 /* Something defined outside of the loop. */
65 if (!new_name)
66 return;
68 /* An ordinary ssa name defined in the loop. */
70 SET_USE (op_p, new_name);
74 /* Renames the variables in basic block BB. Allow renaming of PHI arguments
75 on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
76 true. */
78 static void
79 rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop)
81 gimple *stmt;
82 use_operand_p use_p;
83 ssa_op_iter iter;
84 edge e;
85 edge_iterator ei;
86 struct loop *loop = bb->loop_father;
87 struct loop *outer_loop = NULL;
89 if (rename_from_outer_loop)
91 gcc_assert (loop);
92 outer_loop = loop_outer (loop);
95 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
96 gsi_next (&gsi))
98 stmt = gsi_stmt (gsi);
99 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
100 rename_use_op (use_p);
103 FOR_EACH_EDGE (e, ei, bb->preds)
105 if (!flow_bb_inside_loop_p (loop, e->src))
107 if (!rename_from_outer_loop)
108 continue;
109 if (e->src != outer_loop->header)
111 if (outer_loop->inner->next)
113 /* If outer_loop has 2 inner loops, allow there to
114 be an extra basic block which decides which of the
115 two loops to use using LOOP_VECTORIZED. */
116 if (!single_pred_p (e->src)
117 || single_pred (e->src) != outer_loop->header)
118 continue;
120 else
121 continue;
124 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
125 gsi_next (&gsi))
126 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
131 struct adjust_info
133 tree from, to;
134 basic_block bb;
137 /* A stack of values to be adjusted in debug stmts. We have to
138 process them LIFO, so that the closest substitution applies. If we
139 processed them FIFO, without the stack, we might substitute uses
140 with a PHI DEF that would soon become non-dominant, and when we got
141 to the suitable one, it wouldn't have anything to substitute any
142 more. */
143 static vec<adjust_info, va_heap> adjust_vec;
145 /* Adjust any debug stmts that referenced AI->from values to use the
146 loop-closed AI->to, if the references are dominated by AI->bb and
147 not by the definition of AI->from. */
149 static void
150 adjust_debug_stmts_now (adjust_info *ai)
152 basic_block bbphi = ai->bb;
153 tree orig_def = ai->from;
154 tree new_def = ai->to;
155 imm_use_iterator imm_iter;
156 gimple *stmt;
157 basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
159 gcc_assert (dom_info_available_p (CDI_DOMINATORS));
161 /* Adjust any debug stmts that held onto non-loop-closed
162 references. */
163 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
165 use_operand_p use_p;
166 basic_block bbuse;
168 if (!is_gimple_debug (stmt))
169 continue;
171 gcc_assert (gimple_debug_bind_p (stmt));
173 bbuse = gimple_bb (stmt);
175 if ((bbuse == bbphi
176 || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
177 && !(bbuse == bbdef
178 || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
180 if (new_def)
181 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
182 SET_USE (use_p, new_def);
183 else
185 gimple_debug_bind_reset_value (stmt);
186 update_stmt (stmt);
192 /* Adjust debug stmts as scheduled before. */
194 static void
195 adjust_vec_debug_stmts (void)
197 if (!MAY_HAVE_DEBUG_STMTS)
198 return;
200 gcc_assert (adjust_vec.exists ());
202 while (!adjust_vec.is_empty ())
204 adjust_debug_stmts_now (&adjust_vec.last ());
205 adjust_vec.pop ();
209 /* Adjust any debug stmts that referenced FROM values to use the
210 loop-closed TO, if the references are dominated by BB and not by
211 the definition of FROM. If adjust_vec is non-NULL, adjustments
212 will be postponed until adjust_vec_debug_stmts is called. */
214 static void
215 adjust_debug_stmts (tree from, tree to, basic_block bb)
217 adjust_info ai;
219 if (MAY_HAVE_DEBUG_STMTS
220 && TREE_CODE (from) == SSA_NAME
221 && ! SSA_NAME_IS_DEFAULT_DEF (from)
222 && ! virtual_operand_p (from))
224 ai.from = from;
225 ai.to = to;
226 ai.bb = bb;
228 if (adjust_vec.exists ())
229 adjust_vec.safe_push (ai);
230 else
231 adjust_debug_stmts_now (&ai);
235 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
236 to adjust any debug stmts that referenced the old phi arg,
237 presumably non-loop-closed references left over from other
238 transformations. */
240 static void
241 adjust_phi_and_debug_stmts (gimple *update_phi, edge e, tree new_def)
243 tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
245 SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
247 if (MAY_HAVE_DEBUG_STMTS)
248 adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
249 gimple_bb (update_phi));
252 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
253 that starts at zero, increases by one and its limit is NITERS.
255 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
257 void
258 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
260 tree indx_before_incr, indx_after_incr;
261 gcond *cond_stmt;
262 gcond *orig_cond;
263 edge exit_edge = single_exit (loop);
264 gimple_stmt_iterator loop_cond_gsi;
265 gimple_stmt_iterator incr_gsi;
266 bool insert_after;
267 tree init = build_int_cst (TREE_TYPE (niters), 0);
268 tree step = build_int_cst (TREE_TYPE (niters), 1);
269 source_location loop_loc;
270 enum tree_code code;
272 orig_cond = get_loop_exit_condition (loop);
273 gcc_assert (orig_cond);
274 loop_cond_gsi = gsi_for_stmt (orig_cond);
276 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
277 create_iv (init, step, NULL_TREE, loop,
278 &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
280 indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
281 true, NULL_TREE, true,
282 GSI_SAME_STMT);
283 niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
284 true, GSI_SAME_STMT);
286 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
287 cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
288 NULL_TREE);
290 gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
292 /* Remove old loop exit test: */
293 gsi_remove (&loop_cond_gsi, true);
294 free_stmt_vec_info (orig_cond);
296 loop_loc = find_loop_location (loop);
297 if (dump_enabled_p ())
299 if (LOCATION_LOCUS (loop_loc) != UNKNOWN_LOCATION)
300 dump_printf (MSG_NOTE, "\nloop at %s:%d: ", LOCATION_FILE (loop_loc),
301 LOCATION_LINE (loop_loc));
302 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, cond_stmt, 0);
305 /* Record the number of latch iterations. */
306 loop->nb_iterations = fold_build2 (MINUS_EXPR, TREE_TYPE (niters), niters,
307 build_int_cst (TREE_TYPE (niters), 1));
310 /* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg.
311 For all PHI arguments in FROM->dest and TO->dest from those
312 edges ensure that TO->dest PHI arguments have current_def
313 to that in from. */
315 static void
316 slpeel_duplicate_current_defs_from_edges (edge from, edge to)
318 gimple_stmt_iterator gsi_from, gsi_to;
320 for (gsi_from = gsi_start_phis (from->dest),
321 gsi_to = gsi_start_phis (to->dest);
322 !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);)
324 gimple *from_phi = gsi_stmt (gsi_from);
325 gimple *to_phi = gsi_stmt (gsi_to);
326 tree from_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, from);
327 tree to_arg = PHI_ARG_DEF_FROM_EDGE (to_phi, to);
328 if (virtual_operand_p (from_arg))
330 gsi_next (&gsi_from);
331 continue;
333 if (virtual_operand_p (to_arg))
335 gsi_next (&gsi_to);
336 continue;
338 if (TREE_CODE (from_arg) != SSA_NAME)
339 gcc_assert (operand_equal_p (from_arg, to_arg, 0));
340 else
342 if (get_current_def (to_arg) == NULL_TREE)
343 set_current_def (to_arg, get_current_def (from_arg));
345 gsi_next (&gsi_from);
346 gsi_next (&gsi_to);
349 gphi *from_phi = get_virtual_phi (from->dest);
350 gphi *to_phi = get_virtual_phi (to->dest);
351 if (from_phi)
352 set_current_def (PHI_ARG_DEF_FROM_EDGE (to_phi, to),
353 get_current_def (PHI_ARG_DEF_FROM_EDGE (from_phi, from)));
357 /* Given LOOP this function generates a new copy of it and puts it
358 on E which is either the entry or exit of LOOP. If SCALAR_LOOP is
359 non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
360 basic blocks from SCALAR_LOOP instead of LOOP, but to either the
361 entry or exit of LOOP. */
363 struct loop *
364 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop,
365 struct loop *scalar_loop, edge e)
367 struct loop *new_loop;
368 basic_block *new_bbs, *bbs, *pbbs;
369 bool at_exit;
370 bool was_imm_dom;
371 basic_block exit_dest;
372 edge exit, new_exit;
373 bool duplicate_outer_loop = false;
375 exit = single_exit (loop);
376 at_exit = (e == exit);
377 if (!at_exit && e != loop_preheader_edge (loop))
378 return NULL;
380 if (scalar_loop == NULL)
381 scalar_loop = loop;
383 bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
384 pbbs = bbs + 1;
385 get_loop_body_with_size (scalar_loop, pbbs, scalar_loop->num_nodes);
386 /* Allow duplication of outer loops. */
387 if (scalar_loop->inner)
388 duplicate_outer_loop = true;
389 /* Check whether duplication is possible. */
390 if (!can_copy_bbs_p (pbbs, scalar_loop->num_nodes))
392 free (bbs);
393 return NULL;
396 /* Generate new loop structure. */
397 new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
398 duplicate_subloops (scalar_loop, new_loop);
400 exit_dest = exit->dest;
401 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
402 exit_dest) == loop->header ?
403 true : false);
405 /* Also copy the pre-header, this avoids jumping through hoops to
406 duplicate the loop entry PHI arguments. Create an empty
407 pre-header unconditionally for this. */
408 basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
409 edge entry_e = single_pred_edge (preheader);
410 bbs[0] = preheader;
411 new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
413 exit = single_exit (scalar_loop);
414 copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
415 &exit, 1, &new_exit, NULL,
416 at_exit ? loop->latch : e->src, true);
417 exit = single_exit (loop);
418 basic_block new_preheader = new_bbs[0];
420 add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
422 if (scalar_loop != loop)
424 /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
425 SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop,
426 but LOOP will not. slpeel_update_phi_nodes_for_guard{1,2} expects
427 the LOOP SSA_NAMEs (on the exit edge and edge from latch to
428 header) to have current_def set, so copy them over. */
429 slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop),
430 exit);
431 slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop->latch,
433 EDGE_SUCC (loop->latch, 0));
436 if (at_exit) /* Add the loop copy at exit. */
438 if (scalar_loop != loop)
440 gphi_iterator gsi;
441 new_exit = redirect_edge_and_branch (new_exit, exit_dest);
443 for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi);
444 gsi_next (&gsi))
446 gphi *phi = gsi.phi ();
447 tree orig_arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
448 location_t orig_locus
449 = gimple_phi_arg_location_from_edge (phi, e);
451 add_phi_arg (phi, orig_arg, new_exit, orig_locus);
454 redirect_edge_and_branch_force (e, new_preheader);
455 flush_pending_stmts (e);
456 set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
457 if (was_imm_dom || duplicate_outer_loop)
458 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
460 /* And remove the non-necessary forwarder again. Keep the other
461 one so we have a proper pre-header for the loop at the exit edge. */
462 redirect_edge_pred (single_succ_edge (preheader),
463 single_pred (preheader));
464 delete_basic_block (preheader);
465 set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
466 loop_preheader_edge (scalar_loop)->src);
468 else /* Add the copy at entry. */
470 if (scalar_loop != loop)
472 /* Remove the non-necessary forwarder of scalar_loop again. */
473 redirect_edge_pred (single_succ_edge (preheader),
474 single_pred (preheader));
475 delete_basic_block (preheader);
476 set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
477 loop_preheader_edge (scalar_loop)->src);
478 preheader = split_edge (loop_preheader_edge (loop));
479 entry_e = single_pred_edge (preheader);
482 redirect_edge_and_branch_force (entry_e, new_preheader);
483 flush_pending_stmts (entry_e);
484 set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
486 redirect_edge_and_branch_force (new_exit, preheader);
487 flush_pending_stmts (new_exit);
488 set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
490 /* And remove the non-necessary forwarder again. Keep the other
491 one so we have a proper pre-header for the loop at the exit edge. */
492 redirect_edge_pred (single_succ_edge (new_preheader),
493 single_pred (new_preheader));
494 delete_basic_block (new_preheader);
495 set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
496 loop_preheader_edge (new_loop)->src);
499 for (unsigned i = 0; i < scalar_loop->num_nodes + 1; i++)
500 rename_variables_in_bb (new_bbs[i], duplicate_outer_loop);
502 if (scalar_loop != loop)
504 /* Update new_loop->header PHIs, so that on the preheader
505 edge they are the ones from loop rather than scalar_loop. */
506 gphi_iterator gsi_orig, gsi_new;
507 edge orig_e = loop_preheader_edge (loop);
508 edge new_e = loop_preheader_edge (new_loop);
510 for (gsi_orig = gsi_start_phis (loop->header),
511 gsi_new = gsi_start_phis (new_loop->header);
512 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_new);
513 gsi_next (&gsi_orig), gsi_next (&gsi_new))
515 gphi *orig_phi = gsi_orig.phi ();
516 gphi *new_phi = gsi_new.phi ();
517 tree orig_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
518 location_t orig_locus
519 = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
521 add_phi_arg (new_phi, orig_arg, new_e, orig_locus);
525 free (new_bbs);
526 free (bbs);
528 checking_verify_dominators (CDI_DOMINATORS);
530 return new_loop;
534 /* Given the condition expression COND, put it as the last statement of
535 GUARD_BB; set both edges' probability; set dominator of GUARD_TO to
536 DOM_BB; return the skip edge. GUARD_TO is the target basic block to
537 skip the loop. PROBABILITY is the skip edge's probability. */
539 static edge
540 slpeel_add_loop_guard (basic_block guard_bb, tree cond,
541 basic_block guard_to, basic_block dom_bb,
542 int probability)
544 gimple_stmt_iterator gsi;
545 edge new_e, enter_e;
546 gcond *cond_stmt;
547 gimple_seq gimplify_stmt_list = NULL;
549 enter_e = EDGE_SUCC (guard_bb, 0);
550 enter_e->flags &= ~EDGE_FALLTHRU;
551 enter_e->flags |= EDGE_FALSE_VALUE;
552 gsi = gsi_last_bb (guard_bb);
554 cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
555 NULL_TREE);
556 if (gimplify_stmt_list)
557 gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
559 cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
560 gsi = gsi_last_bb (guard_bb);
561 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
563 /* Add new edge to connect guard block to the merge/loop-exit block. */
564 new_e = make_edge (guard_bb, guard_to, EDGE_TRUE_VALUE);
566 new_e->count = guard_bb->count;
567 new_e->probability = probability;
568 new_e->count = apply_probability (enter_e->count, probability);
569 enter_e->count -= new_e->count;
570 enter_e->probability = inverse_probability (probability);
571 set_immediate_dominator (CDI_DOMINATORS, guard_to, dom_bb);
572 return new_e;
576 /* This function verifies that the following restrictions apply to LOOP:
577 (1) it consists of exactly 2 basic blocks - header, and an empty latch
578 for innermost loop and 5 basic blocks for outer-loop.
579 (2) it is single entry, single exit
580 (3) its exit condition is the last stmt in the header
581 (4) E is the entry/exit edge of LOOP.
584 bool
585 slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
587 edge exit_e = single_exit (loop);
588 edge entry_e = loop_preheader_edge (loop);
589 gcond *orig_cond = get_loop_exit_condition (loop);
590 gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
591 unsigned int num_bb = loop->inner? 5 : 2;
593 /* All loops have an outer scope; the only case loop->outer is NULL is for
594 the function itself. */
595 if (!loop_outer (loop)
596 || loop->num_nodes != num_bb
597 || !empty_block_p (loop->latch)
598 || !single_exit (loop)
599 /* Verify that new loop exit condition can be trivially modified. */
600 || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
601 || (e != exit_e && e != entry_e))
602 return false;
604 return true;
607 /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
608 in the exit bb and rename all the uses after the loop. This simplifies
609 the *guard[12] routines, which assume loop closed SSA form for all PHIs
610 (but normally loop closed SSA form doesn't require virtual PHIs to be
611 in the same form). Doing this early simplifies the checking what
612 uses should be renamed. */
614 static void
615 create_lcssa_for_virtual_phi (struct loop *loop)
617 gphi_iterator gsi;
618 edge exit_e = single_exit (loop);
620 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
621 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
623 gphi *phi = gsi.phi ();
624 for (gsi = gsi_start_phis (exit_e->dest);
625 !gsi_end_p (gsi); gsi_next (&gsi))
626 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
627 break;
628 if (gsi_end_p (gsi))
630 tree new_vop = copy_ssa_name (PHI_RESULT (phi));
631 gphi *new_phi = create_phi_node (new_vop, exit_e->dest);
632 tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
633 imm_use_iterator imm_iter;
634 gimple *stmt;
635 use_operand_p use_p;
637 add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
638 gimple_phi_set_result (new_phi, new_vop);
639 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
640 if (stmt != new_phi
641 && !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
642 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
643 SET_USE (use_p, new_vop);
645 break;
650 /* Function vect_get_loop_location.
652 Extract the location of the loop in the source code.
653 If the loop is not well formed for vectorization, an estimated
654 location is calculated.
655 Return the loop location if succeed and NULL if not. */
657 source_location
658 find_loop_location (struct loop *loop)
660 gimple *stmt = NULL;
661 basic_block bb;
662 gimple_stmt_iterator si;
664 if (!loop)
665 return UNKNOWN_LOCATION;
667 stmt = get_loop_exit_condition (loop);
669 if (stmt
670 && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
671 return gimple_location (stmt);
673 /* If we got here the loop is probably not "well formed",
674 try to estimate the loop location */
676 if (!loop->header)
677 return UNKNOWN_LOCATION;
679 bb = loop->header;
681 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
683 stmt = gsi_stmt (si);
684 if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
685 return gimple_location (stmt);
688 return UNKNOWN_LOCATION;
691 /* Return true if PHI defines an IV of the loop to be vectorized. */
693 static bool
694 iv_phi_p (gphi *phi)
696 if (virtual_operand_p (PHI_RESULT (phi)))
697 return false;
699 stmt_vec_info stmt_info = vinfo_for_stmt (phi);
700 gcc_assert (stmt_info != NULL);
701 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
702 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
703 return false;
705 return true;
708 /* Function vect_can_advance_ivs_p
710 In case the number of iterations that LOOP iterates is unknown at compile
711 time, an epilog loop will be generated, and the loop induction variables
712 (IVs) will be "advanced" to the value they are supposed to take just before
713 the epilog loop. Here we check that the access function of the loop IVs
714 and the expression that represents the loop bound are simple enough.
715 These restrictions will be relaxed in the future. */
717 bool
718 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
720 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
721 basic_block bb = loop->header;
722 gphi_iterator gsi;
724 /* Analyze phi functions of the loop header. */
726 if (dump_enabled_p ())
727 dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
728 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
730 tree evolution_part;
732 gphi *phi = gsi.phi ();
733 if (dump_enabled_p ())
735 dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
736 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
739 /* Skip virtual phi's. The data dependences that are associated with
740 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.
742 Skip reduction phis. */
743 if (!iv_phi_p (phi))
745 if (dump_enabled_p ())
746 dump_printf_loc (MSG_NOTE, vect_location,
747 "reduc or virtual phi. skip.\n");
748 continue;
751 /* Analyze the evolution function. */
753 evolution_part
754 = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
755 if (evolution_part == NULL_TREE)
757 if (dump_enabled_p ())
758 dump_printf (MSG_MISSED_OPTIMIZATION,
759 "No access function or evolution.\n");
760 return false;
763 /* FORNOW: We do not transform initial conditions of IVs
764 which evolution functions are not invariants in the loop. */
766 if (!expr_invariant_in_loop_p (loop, evolution_part))
768 if (dump_enabled_p ())
769 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
770 "evolution not invariant in loop.\n");
771 return false;
774 /* FORNOW: We do not transform initial conditions of IVs
775 which evolution functions are a polynomial of degree >= 2. */
777 if (tree_is_chrec (evolution_part))
779 if (dump_enabled_p ())
780 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
781 "evolution is chrec.\n");
782 return false;
786 return true;
790 /* Function vect_update_ivs_after_vectorizer.
792 "Advance" the induction variables of LOOP to the value they should take
793 after the execution of LOOP. This is currently necessary because the
794 vectorizer does not handle induction variables that are used after the
795 loop. Such a situation occurs when the last iterations of LOOP are
796 peeled, because:
797 1. We introduced new uses after LOOP for IVs that were not originally used
798 after LOOP: the IVs of LOOP are now used by an epilog loop.
799 2. LOOP is going to be vectorized; this means that it will iterate N/VF
800 times, whereas the loop IVs should be bumped N times.
802 Input:
803 - LOOP - a loop that is going to be vectorized. The last few iterations
804 of LOOP were peeled.
805 - NITERS - the number of iterations that LOOP executes (before it is
806 vectorized). i.e, the number of times the ivs should be bumped.
807 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
808 coming out from LOOP on which there are uses of the LOOP ivs
809 (this is the path from LOOP->exit to epilog_loop->preheader).
811 The new definitions of the ivs are placed in LOOP->exit.
812 The phi args associated with the edge UPDATE_E in the bb
813 UPDATE_E->dest are updated accordingly.
815 Assumption 1: Like the rest of the vectorizer, this function assumes
816 a single loop exit that has a single predecessor.
818 Assumption 2: The phi nodes in the LOOP header and in update_bb are
819 organized in the same order.
821 Assumption 3: The access function of the ivs is simple enough (see
822 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
824 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
825 coming out of LOOP on which the ivs of LOOP are used (this is the path
826 that leads to the epilog loop; other paths skip the epilog loop). This
827 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
828 needs to have its phis updated.
831 static void
832 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo,
833 tree niters, edge update_e)
835 gphi_iterator gsi, gsi1;
836 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
837 basic_block update_bb = update_e->dest;
838 basic_block exit_bb = single_exit (loop)->dest;
840 /* Make sure there exists a single-predecessor exit bb: */
841 gcc_assert (single_pred_p (exit_bb));
842 gcc_assert (single_succ_edge (exit_bb) == update_e);
844 for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
845 !gsi_end_p (gsi) && !gsi_end_p (gsi1);
846 gsi_next (&gsi), gsi_next (&gsi1))
848 tree init_expr;
849 tree step_expr, off;
850 tree type;
851 tree var, ni, ni_name;
852 gimple_stmt_iterator last_gsi;
854 gphi *phi = gsi.phi ();
855 gphi *phi1 = gsi1.phi ();
856 if (dump_enabled_p ())
858 dump_printf_loc (MSG_NOTE, vect_location,
859 "vect_update_ivs_after_vectorizer: phi: ");
860 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
863 /* Skip reduction and virtual phis. */
864 if (!iv_phi_p (phi))
866 if (dump_enabled_p ())
867 dump_printf_loc (MSG_NOTE, vect_location,
868 "reduc or virtual phi. skip.\n");
869 continue;
872 type = TREE_TYPE (gimple_phi_result (phi));
873 step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
874 step_expr = unshare_expr (step_expr);
876 /* FORNOW: We do not support IVs whose evolution function is a polynomial
877 of degree >= 2 or exponential. */
878 gcc_assert (!tree_is_chrec (step_expr));
880 init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
882 off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
883 fold_convert (TREE_TYPE (step_expr), niters),
884 step_expr);
885 if (POINTER_TYPE_P (type))
886 ni = fold_build_pointer_plus (init_expr, off);
887 else
888 ni = fold_build2 (PLUS_EXPR, type,
889 init_expr, fold_convert (type, off));
891 var = create_tmp_var (type, "tmp");
893 last_gsi = gsi_last_bb (exit_bb);
894 gimple_seq new_stmts = NULL;
895 ni_name = force_gimple_operand (ni, &new_stmts, false, var);
896 /* Exit_bb shouldn't be empty. */
897 if (!gsi_end_p (last_gsi))
898 gsi_insert_seq_after (&last_gsi, new_stmts, GSI_SAME_STMT);
899 else
900 gsi_insert_seq_before (&last_gsi, new_stmts, GSI_SAME_STMT);
902 /* Fix phi expressions in the successor bb. */
903 adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
907 /* Function vect_gen_prolog_loop_niters
909 Generate the number of iterations which should be peeled as prolog for the
910 loop represented by LOOP_VINFO. It is calculated as the misalignment of
911 DR - the data reference recorded in LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).
912 As a result, after the execution of this loop, the data reference DR will
913 refer to an aligned location. The following computation is generated:
915 If the misalignment of DR is known at compile time:
916 addr_mis = int mis = DR_MISALIGNMENT (dr);
917 Else, compute address misalignment in bytes:
918 addr_mis = addr & (vectype_align - 1)
920 prolog_niters = ((VF - addr_mis/elem_size)&(VF-1))/step
922 (elem_size = element type size; an element is the scalar element whose type
923 is the inner type of the vectype)
925 The computations will be emitted at the end of BB. We also compute and
926 store upper bound (included) of the result in BOUND.
928 When the step of the data-ref in the loop is not 1 (as in interleaved data
929 and SLP), the number of iterations of the prolog must be divided by the step
930 (which is equal to the size of interleaved group).
932 The above formulas assume that VF == number of elements in the vector. This
933 may not hold when there are multiple-types in the loop.
934 In this case, for some data-references in the loop the VF does not represent
935 the number of elements that fit in the vector. Therefore, instead of VF we
936 use TYPE_VECTOR_SUBPARTS. */
938 static tree
939 vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
940 basic_block bb, int *bound)
942 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
943 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
944 tree var;
945 tree niters_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
946 gimple_seq stmts = NULL, new_stmts = NULL;
947 tree iters, iters_name;
948 gimple *dr_stmt = DR_STMT (dr);
949 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
950 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
951 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
952 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
954 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
956 int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
958 if (dump_enabled_p ())
959 dump_printf_loc (MSG_NOTE, vect_location,
960 "known peeling = %d.\n", npeel);
962 iters = build_int_cst (niters_type, npeel);
963 *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
965 else
967 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
968 tree offset = negative
969 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
970 tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
971 &stmts, offset, loop);
972 tree type = unsigned_type_for (TREE_TYPE (start_addr));
973 tree vectype_align_minus_1 = build_int_cst (type, vectype_align - 1);
974 HOST_WIDE_INT elem_size =
975 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
976 tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
977 tree nelements_minus_1 = build_int_cst (type, nelements - 1);
978 tree nelements_tree = build_int_cst (type, nelements);
979 tree byte_misalign;
980 tree elem_misalign;
982 /* Create: byte_misalign = addr & (vectype_align - 1) */
983 byte_misalign =
984 fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
985 vectype_align_minus_1);
987 /* Create: elem_misalign = byte_misalign / element_size */
988 elem_misalign =
989 fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
991 /* Create: (niters_type) (nelements - elem_misalign)&(nelements - 1) */
992 if (negative)
993 iters = fold_build2 (MINUS_EXPR, type, elem_misalign, nelements_tree);
994 else
995 iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
996 iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
997 iters = fold_convert (niters_type, iters);
998 *bound = nelements - 1;
1001 if (dump_enabled_p ())
1003 dump_printf_loc (MSG_NOTE, vect_location,
1004 "niters for prolog loop: ");
1005 dump_generic_expr (MSG_NOTE, TDF_SLIM, iters);
1006 dump_printf (MSG_NOTE, "\n");
1009 var = create_tmp_var (niters_type, "prolog_loop_niters");
1010 iters_name = force_gimple_operand (iters, &new_stmts, false, var);
1012 if (new_stmts)
1013 gimple_seq_add_seq (&stmts, new_stmts);
1014 if (stmts)
1016 gcc_assert (single_succ_p (bb));
1017 gimple_stmt_iterator gsi = gsi_last_bb (bb);
1018 if (gsi_end_p (gsi))
1019 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1020 else
1021 gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
1023 return iters_name;
1027 /* Function vect_update_init_of_dr
1029 NITERS iterations were peeled from LOOP. DR represents a data reference
1030 in LOOP. This function updates the information recorded in DR to
1031 account for the fact that the first NITERS iterations had already been
1032 executed. Specifically, it updates the OFFSET field of DR. */
1034 static void
1035 vect_update_init_of_dr (struct data_reference *dr, tree niters)
1037 tree offset = DR_OFFSET (dr);
1039 niters = fold_build2 (MULT_EXPR, sizetype,
1040 fold_convert (sizetype, niters),
1041 fold_convert (sizetype, DR_STEP (dr)));
1042 offset = fold_build2 (PLUS_EXPR, sizetype,
1043 fold_convert (sizetype, offset), niters);
1044 DR_OFFSET (dr) = offset;
1048 /* Function vect_update_inits_of_drs
1050 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1051 This function updates the information recorded for the data references in
1052 the loop to account for the fact that the first NITERS iterations had
1053 already been executed. Specifically, it updates the initial_condition of
1054 the access_function of all the data_references in the loop. */
1056 static void
1057 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
1059 unsigned int i;
1060 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1061 struct data_reference *dr;
1063 if (dump_enabled_p ())
1064 dump_printf_loc (MSG_NOTE, vect_location,
1065 "=== vect_update_inits_of_dr ===\n");
1067 /* Adjust niters to sizetype and insert stmts on loop preheader edge. */
1068 if (!types_compatible_p (sizetype, TREE_TYPE (niters)))
1070 gimple_seq seq;
1071 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1072 tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
1074 niters = fold_convert (sizetype, niters);
1075 niters = force_gimple_operand (niters, &seq, false, var);
1076 if (seq)
1078 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1079 gcc_assert (!new_bb);
1083 FOR_EACH_VEC_ELT (datarefs, i, dr)
1084 vect_update_init_of_dr (dr, niters);
1088 /* This function builds ni_name = number of iterations. Statements
1089 are emitted on the loop preheader edge. */
1091 tree
1092 vect_build_loop_niters (loop_vec_info loop_vinfo)
1094 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1095 if (TREE_CODE (ni) == INTEGER_CST)
1096 return ni;
1097 else
1099 tree ni_name, var;
1100 gimple_seq stmts = NULL;
1101 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1103 var = create_tmp_var (TREE_TYPE (ni), "niters");
1104 ni_name = force_gimple_operand (ni, &stmts, false, var);
1105 if (stmts)
1106 gsi_insert_seq_on_edge_immediate (pe, stmts);
1108 return ni_name;
1112 /* Calculate the number of iterations above which vectorized loop will be
1113 preferred than scalar loop. NITERS_PROLOG is the number of iterations
1114 of prolog loop. If it's integer const, the integer number is also passed
1115 in INT_NITERS_PROLOG. BOUND_PROLOG is the upper bound (included) of
1116 number of iterations of prolog loop. VFM1 is vector factor minus one.
1117 If CHECK_PROFITABILITY is true, TH is the threshold below which scalar
1118 (rather than vectorized) loop will be executed. This function stores
1119 upper bound (included) of the result in BOUND_SCALAR. */
1121 static tree
1122 vect_gen_scalar_loop_niters (tree niters_prolog, int int_niters_prolog,
1123 int bound_prolog, int vfm1, int th,
1124 int *bound_scalar, bool check_profitability)
1126 tree type = TREE_TYPE (niters_prolog);
1127 tree niters = fold_build2 (PLUS_EXPR, type, niters_prolog,
1128 build_int_cst (type, vfm1));
1130 *bound_scalar = vfm1 + bound_prolog;
1131 if (check_profitability)
1133 /* TH indicates the minimum niters of vectorized loop, while we
1134 compute the maximum niters of scalar loop. */
1135 th--;
1136 /* Peeling for constant times. */
1137 if (int_niters_prolog >= 0)
1139 *bound_scalar = (int_niters_prolog + vfm1 < th
1140 ? th
1141 : vfm1 + int_niters_prolog);
1142 return build_int_cst (type, *bound_scalar);
1144 /* Peeling for unknown times. Note BOUND_PROLOG is the upper
1145 bound (inlcuded) of niters of prolog loop. */
1146 if (th >= vfm1 + bound_prolog)
1148 *bound_scalar = th;
1149 return build_int_cst (type, th);
1151 /* Need to do runtime comparison, but BOUND_SCALAR remains the same. */
1152 else if (th > vfm1)
1153 return fold_build2 (MAX_EXPR, type, build_int_cst (type, th), niters);
1155 return niters;
1158 /* This function generates the following statements:
1160 niters = number of iterations loop executes (after peeling)
1161 niters_vector = niters / vf
1163 and places them on the loop preheader edge. NITERS_NO_OVERFLOW is
1164 true if NITERS doesn't overflow. */
1166 void
1167 vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters,
1168 tree *niters_vector_ptr, bool niters_no_overflow)
1170 tree ni_minus_gap, var;
1171 tree niters_vector;
1172 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1173 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1174 tree log_vf = build_int_cst (TREE_TYPE (niters), exact_log2 (vf));
1176 /* If epilogue loop is required because of data accesses with gaps, we
1177 subtract one iteration from the total number of iterations here for
1178 correct calculation of RATIO. */
1179 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1181 ni_minus_gap = fold_build2 (MINUS_EXPR, TREE_TYPE (niters),
1182 niters,
1183 build_one_cst (TREE_TYPE (niters)));
1184 if (!is_gimple_val (ni_minus_gap))
1186 var = create_tmp_var (TREE_TYPE (niters), "ni_gap");
1187 gimple *stmts = NULL;
1188 ni_minus_gap = force_gimple_operand (ni_minus_gap, &stmts,
1189 true, var);
1190 gsi_insert_seq_on_edge_immediate (pe, stmts);
1193 else
1194 ni_minus_gap = niters;
1196 /* Create: niters >> log2(vf) */
1197 /* If it's known that niters == number of latch executions + 1 doesn't
1198 overflow, we can generate niters >> log2(vf); otherwise we generate
1199 (niters - vf) >> log2(vf) + 1 by using the fact that we know ratio
1200 will be at least one. */
1201 if (niters_no_overflow)
1202 niters_vector = fold_build2 (RSHIFT_EXPR, TREE_TYPE (niters),
1203 ni_minus_gap, log_vf);
1204 else
1205 niters_vector
1206 = fold_build2 (PLUS_EXPR, TREE_TYPE (niters),
1207 fold_build2 (RSHIFT_EXPR, TREE_TYPE (niters),
1208 fold_build2 (MINUS_EXPR, TREE_TYPE (niters),
1209 ni_minus_gap,
1210 build_int_cst
1211 (TREE_TYPE (niters), vf)),
1212 log_vf),
1213 build_int_cst (TREE_TYPE (niters), 1));
1215 if (!is_gimple_val (niters_vector))
1217 var = create_tmp_var (TREE_TYPE (niters), "bnd");
1218 gimple *stmts = NULL;
1219 niters_vector = force_gimple_operand (niters_vector, &stmts, true, var);
1220 gsi_insert_seq_on_edge_immediate (pe, stmts);
1222 *niters_vector_ptr = niters_vector;
1224 return;
1227 /* Given NITERS_VECTOR which is the number of iterations for vectorized
1228 loop specified by LOOP_VINFO after vectorization, compute the number
1229 of iterations before vectorization (niters_vector * vf) and store it
1230 to NITERS_VECTOR_MULT_VF_PTR. */
1232 static void
1233 vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo,
1234 tree niters_vector,
1235 tree *niters_vector_mult_vf_ptr)
1237 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1238 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1239 tree type = TREE_TYPE (niters_vector);
1240 tree log_vf = build_int_cst (type, exact_log2 (vf));
1241 basic_block exit_bb = single_exit (loop)->dest;
1243 gcc_assert (niters_vector_mult_vf_ptr != NULL);
1244 tree niters_vector_mult_vf = fold_build2 (LSHIFT_EXPR, type,
1245 niters_vector, log_vf);
1246 if (!is_gimple_val (niters_vector_mult_vf))
1248 tree var = create_tmp_var (type, "niters_vector_mult_vf");
1249 gimple_seq stmts = NULL;
1250 niters_vector_mult_vf = force_gimple_operand (niters_vector_mult_vf,
1251 &stmts, true, var);
1252 gimple_stmt_iterator gsi = gsi_start_bb (exit_bb);
1253 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1255 *niters_vector_mult_vf_ptr = niters_vector_mult_vf;
1258 /* Function slpeel_tree_duplicate_loop_to_edge_cfg duplciates FIRST/SECOND
1259 from SECOND/FIRST and puts it at the original loop's preheader/exit
1260 edge, the two loops are arranged as below:
1262 preheader_a:
1263 first_loop:
1264 header_a:
1265 i_1 = PHI<i_0, i_2>;
1267 i_2 = i_1 + 1;
1268 if (cond_a)
1269 goto latch_a;
1270 else
1271 goto between_bb;
1272 latch_a:
1273 goto header_a;
1275 between_bb:
1276 ;; i_x = PHI<i_2>; ;; LCSSA phi node to be created for FIRST,
1278 second_loop:
1279 header_b:
1280 i_3 = PHI<i_0, i_4>; ;; Use of i_0 to be replaced with i_x,
1281 or with i_2 if no LCSSA phi is created
1282 under condition of CREATE_LCSSA_FOR_IV_PHIS.
1284 i_4 = i_3 + 1;
1285 if (cond_b)
1286 goto latch_b;
1287 else
1288 goto exit_bb;
1289 latch_b:
1290 goto header_b;
1292 exit_bb:
1294 This function creates loop closed SSA for the first loop; update the
1295 second loop's PHI nodes by replacing argument on incoming edge with the
1296 result of newly created lcssa PHI nodes. IF CREATE_LCSSA_FOR_IV_PHIS
1297 is false, Loop closed ssa phis will only be created for non-iv phis for
1298 the first loop.
1300 This function assumes exit bb of the first loop is preheader bb of the
1301 second loop, i.e, between_bb in the example code. With PHIs updated,
1302 the second loop will execute rest iterations of the first. */
1304 static void
1305 slpeel_update_phi_nodes_for_loops (loop_vec_info loop_vinfo,
1306 struct loop *first, struct loop *second,
1307 bool create_lcssa_for_iv_phis)
1309 gphi_iterator gsi_update, gsi_orig;
1310 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1312 edge first_latch_e = EDGE_SUCC (first->latch, 0);
1313 edge second_preheader_e = loop_preheader_edge (second);
1314 basic_block between_bb = single_exit (first)->dest;
1316 gcc_assert (between_bb == second_preheader_e->src);
1317 gcc_assert (single_pred_p (between_bb) && single_succ_p (between_bb));
1318 /* Either the first loop or the second is the loop to be vectorized. */
1319 gcc_assert (loop == first || loop == second);
1321 for (gsi_orig = gsi_start_phis (first->header),
1322 gsi_update = gsi_start_phis (second->header);
1323 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
1324 gsi_next (&gsi_orig), gsi_next (&gsi_update))
1326 gphi *orig_phi = gsi_orig.phi ();
1327 gphi *update_phi = gsi_update.phi ();
1329 tree arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, first_latch_e);
1330 /* Generate lcssa PHI node for the first loop. */
1331 gphi *vect_phi = (loop == first) ? orig_phi : update_phi;
1332 if (create_lcssa_for_iv_phis || !iv_phi_p (vect_phi))
1334 tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
1335 gphi *lcssa_phi = create_phi_node (new_res, between_bb);
1336 add_phi_arg (lcssa_phi, arg, single_exit (first), UNKNOWN_LOCATION);
1337 arg = new_res;
1340 /* Update PHI node in the second loop by replacing arg on the loop's
1341 incoming edge. */
1342 adjust_phi_and_debug_stmts (update_phi, second_preheader_e, arg);
1346 /* Function slpeel_add_loop_guard adds guard skipping from the beginning
1347 of SKIP_LOOP to the beginning of UPDATE_LOOP. GUARD_EDGE and MERGE_EDGE
1348 are two pred edges of the merge point before UPDATE_LOOP. The two loops
1349 appear like below:
1351 guard_bb:
1352 if (cond)
1353 goto merge_bb;
1354 else
1355 goto skip_loop;
1357 skip_loop:
1358 header_a:
1359 i_1 = PHI<i_0, i_2>;
1361 i_2 = i_1 + 1;
1362 if (cond_a)
1363 goto latch_a;
1364 else
1365 goto exit_a;
1366 latch_a:
1367 goto header_a;
1369 exit_a:
1370 i_5 = PHI<i_2>;
1372 merge_bb:
1373 ;; PHI (i_x = PHI<i_0, i_5>) to be created at merge point.
1375 update_loop:
1376 header_b:
1377 i_3 = PHI<i_5, i_4>; ;; Use of i_5 to be replaced with i_x.
1379 i_4 = i_3 + 1;
1380 if (cond_b)
1381 goto latch_b;
1382 else
1383 goto exit_bb;
1384 latch_b:
1385 goto header_b;
1387 exit_bb:
1389 This function creates PHI nodes at merge_bb and replaces the use of i_5
1390 in the update_loop's PHI node with the result of new PHI result. */
1392 static void
1393 slpeel_update_phi_nodes_for_guard1 (struct loop *skip_loop,
1394 struct loop *update_loop,
1395 edge guard_edge, edge merge_edge)
1397 source_location merge_loc, guard_loc;
1398 edge orig_e = loop_preheader_edge (skip_loop);
1399 edge update_e = loop_preheader_edge (update_loop);
1400 gphi_iterator gsi_orig, gsi_update;
1402 for ((gsi_orig = gsi_start_phis (skip_loop->header),
1403 gsi_update = gsi_start_phis (update_loop->header));
1404 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
1405 gsi_next (&gsi_orig), gsi_next (&gsi_update))
1407 gphi *orig_phi = gsi_orig.phi ();
1408 gphi *update_phi = gsi_update.phi ();
1410 /* Generate new phi node at merge bb of the guard. */
1411 tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
1412 gphi *new_phi = create_phi_node (new_res, guard_edge->dest);
1414 /* Merge bb has two incoming edges: GUARD_EDGE and MERGE_EDGE. Set the
1415 args in NEW_PHI for these edges. */
1416 tree merge_arg = PHI_ARG_DEF_FROM_EDGE (update_phi, update_e);
1417 tree guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
1418 merge_loc = gimple_phi_arg_location_from_edge (update_phi, update_e);
1419 guard_loc = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
1420 add_phi_arg (new_phi, merge_arg, merge_edge, merge_loc);
1421 add_phi_arg (new_phi, guard_arg, guard_edge, guard_loc);
1423 /* Update phi in UPDATE_PHI. */
1424 adjust_phi_and_debug_stmts (update_phi, update_e, new_res);
1428 /* LCSSA_PHI is a lcssa phi of EPILOG loop which is copied from LOOP,
1429 this function searches for the corresponding lcssa phi node in exit
1430 bb of LOOP. If it is found, return the phi result; otherwise return
1431 NULL. */
1433 static tree
1434 find_guard_arg (struct loop *loop, struct loop *epilog ATTRIBUTE_UNUSED,
1435 gphi *lcssa_phi)
1437 gphi_iterator gsi;
1438 edge e = single_exit (loop);
1440 gcc_assert (single_pred_p (e->dest));
1441 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
1443 gphi *phi = gsi.phi ();
1444 if (operand_equal_p (PHI_ARG_DEF (phi, 0),
1445 PHI_ARG_DEF (lcssa_phi, 0), 0))
1446 return PHI_RESULT (phi);
1448 return NULL_TREE;
1451 /* LOOP and EPILOG are two consecutive loops in CFG and EPILOG is copied
1452 from LOOP. Function slpeel_add_loop_guard adds guard skipping from a
1453 point between the two loops to the end of EPILOG. Edges GUARD_EDGE
1454 and MERGE_EDGE are the two pred edges of merge_bb at the end of EPILOG.
1455 The CFG looks like:
1457 loop:
1458 header_a:
1459 i_1 = PHI<i_0, i_2>;
1461 i_2 = i_1 + 1;
1462 if (cond_a)
1463 goto latch_a;
1464 else
1465 goto exit_a;
1466 latch_a:
1467 goto header_a;
1469 exit_a:
1471 guard_bb:
1472 if (cond)
1473 goto merge_bb;
1474 else
1475 goto epilog_loop;
1477 ;; fall_through_bb
1479 epilog_loop:
1480 header_b:
1481 i_3 = PHI<i_2, i_4>;
1483 i_4 = i_3 + 1;
1484 if (cond_b)
1485 goto latch_b;
1486 else
1487 goto merge_bb;
1488 latch_b:
1489 goto header_b;
1491 merge_bb:
1492 ; PHI node (i_y = PHI<i_2, i_4>) to be created at merge point.
1494 exit_bb:
1495 i_x = PHI<i_4>; ;Use of i_4 to be replaced with i_y in merge_bb.
1497 For each name used out side EPILOG (i.e - for each name that has a lcssa
1498 phi in exit_bb) we create a new PHI in merge_bb. The new PHI has two
1499 args corresponding to GUARD_EDGE and MERGE_EDGE. Arg for MERGE_EDGE is
1500 the arg of the original PHI in exit_bb, arg for GUARD_EDGE is defined
1501 by LOOP and is found in the exit bb of LOOP. Arg of the original PHI
1502 in exit_bb will also be updated. */
1504 static void
1505 slpeel_update_phi_nodes_for_guard2 (struct loop *loop, struct loop *epilog,
1506 edge guard_edge, edge merge_edge)
1508 gphi_iterator gsi;
1509 basic_block merge_bb = guard_edge->dest;
1511 gcc_assert (single_succ_p (merge_bb));
1512 edge e = single_succ_edge (merge_bb);
1513 basic_block exit_bb = e->dest;
1514 gcc_assert (single_pred_p (exit_bb));
1515 gcc_assert (single_pred (exit_bb) == single_exit (epilog)->dest);
1517 for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1519 gphi *update_phi = gsi.phi ();
1520 tree old_arg = PHI_ARG_DEF (update_phi, 0);
1521 /* This loop-closed-phi actually doesn't represent a use out of the
1522 loop - the phi arg is a constant. */
1523 if (TREE_CODE (old_arg) != SSA_NAME)
1524 continue;
1526 tree merge_arg = get_current_def (old_arg);
1527 if (!merge_arg)
1528 merge_arg = old_arg;
1530 tree guard_arg = find_guard_arg (loop, epilog, update_phi);
1531 /* If the var is live after loop but not a reduction, we simply
1532 use the old arg. */
1533 if (!guard_arg)
1534 guard_arg = old_arg;
1536 /* Create new phi node in MERGE_BB: */
1537 tree new_res = copy_ssa_name (PHI_RESULT (update_phi));
1538 gphi *merge_phi = create_phi_node (new_res, merge_bb);
1540 /* MERGE_BB has two incoming edges: GUARD_EDGE and MERGE_EDGE, Set
1541 the two PHI args in merge_phi for these edges. */
1542 add_phi_arg (merge_phi, merge_arg, merge_edge, UNKNOWN_LOCATION);
1543 add_phi_arg (merge_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
1545 /* Update the original phi in exit_bb. */
1546 adjust_phi_and_debug_stmts (update_phi, e, new_res);
1550 /* EPILOG loop is duplicated from the original loop for vectorizing,
1551 the arg of its loop closed ssa PHI needs to be updated. */
1553 static void
1554 slpeel_update_phi_nodes_for_lcssa (struct loop *epilog)
1556 gphi_iterator gsi;
1557 basic_block exit_bb = single_exit (epilog)->dest;
1559 gcc_assert (single_pred_p (exit_bb));
1560 edge e = EDGE_PRED (exit_bb, 0);
1561 for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1562 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
1565 /* Function vect_do_peeling.
1567 Input:
1568 - LOOP_VINFO: Represent a loop to be vectorized, which looks like:
1570 preheader:
1571 LOOP:
1572 header_bb:
1573 loop_body
1574 if (exit_loop_cond) goto exit_bb
1575 else goto header_bb
1576 exit_bb:
1578 - NITERS: The number of iterations of the loop.
1579 - NITERSM1: The number of iterations of the loop's latch.
1580 - NITERS_NO_OVERFLOW: No overflow in computing NITERS.
1581 - TH, CHECK_PROFITABILITY: Threshold of niters to vectorize loop if
1582 CHECK_PROFITABILITY is true.
1583 Output:
1584 - NITERS_VECTOR: The number of iterations of loop after vectorization.
1586 This function peels prolog and epilog from the loop, adds guards skipping
1587 PROLOG and EPILOG for various conditions. As a result, the changed CFG
1588 would look like:
1590 guard_bb_1:
1591 if (prefer_scalar_loop) goto merge_bb_1
1592 else goto guard_bb_2
1594 guard_bb_2:
1595 if (skip_prolog) goto merge_bb_2
1596 else goto prolog_preheader
1598 prolog_preheader:
1599 PROLOG:
1600 prolog_header_bb:
1601 prolog_body
1602 if (exit_prolog_cond) goto prolog_exit_bb
1603 else goto prolog_header_bb
1604 prolog_exit_bb:
1606 merge_bb_2:
1608 vector_preheader:
1609 VECTOR LOOP:
1610 vector_header_bb:
1611 vector_body
1612 if (exit_vector_cond) goto vector_exit_bb
1613 else goto vector_header_bb
1614 vector_exit_bb:
1616 guard_bb_3:
1617 if (skip_epilog) goto merge_bb_3
1618 else goto epilog_preheader
1620 merge_bb_1:
1622 epilog_preheader:
1623 EPILOG:
1624 epilog_header_bb:
1625 epilog_body
1626 if (exit_epilog_cond) goto merge_bb_3
1627 else goto epilog_header_bb
1629 merge_bb_3:
1631 Note this function peels prolog and epilog only if it's necessary,
1632 as well as guards.
1633 Returns created epilogue or NULL.
1635 TODO: Guard for prefer_scalar_loop should be emitted along with
1636 versioning conditions if loop versioning is needed. */
1639 struct loop *
1640 vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
1641 tree *niters_vector, int th, bool check_profitability,
1642 bool niters_no_overflow)
1644 edge e, guard_e;
1645 tree type = TREE_TYPE (niters), guard_cond;
1646 basic_block guard_bb, guard_to;
1647 int prob_prolog, prob_vector, prob_epilog;
1648 int bound_prolog = 0, bound_scalar = 0, bound = 0;
1649 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1650 int prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1651 bool epilog_peeling = (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)
1652 || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
1654 if (!prolog_peeling && !epilog_peeling)
1655 return NULL;
1657 prob_vector = 9 * REG_BR_PROB_BASE / 10;
1658 if ((vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo)) == 2)
1659 vf = 3;
1660 prob_prolog = prob_epilog = (vf - 1) * REG_BR_PROB_BASE / vf;
1661 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1663 struct loop *prolog, *epilog = NULL, *loop = LOOP_VINFO_LOOP (loop_vinfo);
1664 struct loop *first_loop = loop;
1665 create_lcssa_for_virtual_phi (loop);
1666 update_ssa (TODO_update_ssa_only_virtuals);
1668 if (MAY_HAVE_DEBUG_STMTS)
1670 gcc_assert (!adjust_vec.exists ());
1671 adjust_vec.create (32);
1673 initialize_original_copy_tables ();
1675 /* Prolog loop may be skipped. */
1676 bool skip_prolog = (prolog_peeling != 0);
1677 /* Skip to epilog if scalar loop may be preferred. It's only used when
1678 we peel for epilog loop. */
1679 bool skip_vector = (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo));
1680 /* Epilog loop must be executed if the number of iterations for epilog
1681 loop is known at compile time, otherwise we need to add a check at
1682 the end of vector loop and skip to the end of epilog loop. */
1683 bool skip_epilog = (prolog_peeling < 0
1684 || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo));
1685 /* PEELING_FOR_GAPS is special because epilog loop must be executed. */
1686 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1687 skip_epilog = false;
1689 /* Record the anchor bb at which guard should be placed if scalar loop
1690 may be preferred. */
1691 basic_block anchor = loop_preheader_edge (loop)->src;
1692 if (skip_vector)
1694 split_edge (loop_preheader_edge (loop));
1696 /* Due to the order in which we peel prolog and epilog, we first
1697 propagate probability to the whole loop. The purpose is to
1698 avoid adjusting probabilities of both prolog and vector loops
1699 separately. Note in this case, the probability of epilog loop
1700 needs to be scaled back later. */
1701 basic_block bb_before_loop = loop_preheader_edge (loop)->src;
1702 scale_bbs_frequencies_int (&bb_before_loop, 1, prob_vector,
1703 REG_BR_PROB_BASE);
1704 scale_loop_profile (loop, prob_vector, bound);
1707 tree niters_prolog = build_int_cst (type, 0);
1708 source_location loop_loc = find_loop_location (loop);
1709 struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
1710 if (prolog_peeling)
1712 e = loop_preheader_edge (loop);
1713 if (!slpeel_can_duplicate_loop_p (loop, e))
1715 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1716 "loop can't be duplicated to preheader edge.\n");
1717 gcc_unreachable ();
1719 /* Peel prolog and put it on preheader edge of loop. */
1720 prolog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
1721 if (!prolog)
1723 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1724 "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1725 gcc_unreachable ();
1727 slpeel_update_phi_nodes_for_loops (loop_vinfo, prolog, loop, true);
1728 first_loop = prolog;
1729 reset_original_copy_tables ();
1731 /* Generate and update the number of iterations for prolog loop. */
1732 niters_prolog = vect_gen_prolog_loop_niters (loop_vinfo, anchor,
1733 &bound_prolog);
1734 slpeel_make_loop_iterate_ntimes (prolog, niters_prolog);
1736 /* Skip the prolog loop. */
1737 if (skip_prolog)
1739 guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
1740 niters_prolog, build_int_cst (type, 0));
1741 guard_bb = loop_preheader_edge (prolog)->src;
1742 basic_block bb_after_prolog = loop_preheader_edge (loop)->src;
1743 guard_to = split_edge (loop_preheader_edge (loop));
1744 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
1745 guard_to, guard_bb,
1746 inverse_probability (prob_prolog));
1747 e = EDGE_PRED (guard_to, 0);
1748 e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
1749 slpeel_update_phi_nodes_for_guard1 (prolog, loop, guard_e, e);
1751 scale_bbs_frequencies_int (&bb_after_prolog, 1, prob_prolog,
1752 REG_BR_PROB_BASE);
1753 scale_loop_profile (prolog, prob_prolog, bound_prolog);
1755 /* Update init address of DRs. */
1756 vect_update_inits_of_drs (loop_vinfo, niters_prolog);
1757 /* Update niters for vector loop. */
1758 LOOP_VINFO_NITERS (loop_vinfo)
1759 = fold_build2 (MINUS_EXPR, type, niters, niters_prolog);
1760 LOOP_VINFO_NITERSM1 (loop_vinfo)
1761 = fold_build2 (MINUS_EXPR, type,
1762 LOOP_VINFO_NITERSM1 (loop_vinfo), niters_prolog);
1763 niters = vect_build_loop_niters (loop_vinfo);
1765 /* Prolog iterates at most bound_prolog times, latch iterates at
1766 most bound_prolog - 1 times. */
1767 record_niter_bound (prolog, bound_prolog - 1, false, true);
1768 delete_update_ssa ();
1769 adjust_vec_debug_stmts ();
1770 scev_reset ();
1773 if (epilog_peeling)
1775 e = single_exit (loop);
1776 if (!slpeel_can_duplicate_loop_p (loop, e))
1778 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1779 "loop can't be duplicated to exit edge.\n");
1780 gcc_unreachable ();
1782 /* Peel epilog and put it on exit edge of loop. */
1783 epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
1784 if (!epilog)
1786 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1787 "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1788 gcc_unreachable ();
1790 slpeel_update_phi_nodes_for_loops (loop_vinfo, loop, epilog, false);
1792 /* Scalar version loop may be preferred. In this case, add guard
1793 and skip to epilog. Note this only happens when the number of
1794 iterations of loop is unknown at compile time, otherwise this
1795 won't be vectorized. */
1796 if (skip_vector)
1798 /* Additional epilogue iteration is peeled if gap exists. */
1799 bool peel_for_gaps = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo);
1800 tree t = vect_gen_scalar_loop_niters (niters_prolog, prolog_peeling,
1801 bound_prolog,
1802 peel_for_gaps ? vf : vf - 1,
1803 th, &bound_scalar,
1804 check_profitability);
1805 /* Build guard against NITERSM1 since NITERS may overflow. */
1806 guard_cond = fold_build2 (LT_EXPR, boolean_type_node, nitersm1, t);
1807 guard_bb = anchor;
1808 guard_to = split_edge (loop_preheader_edge (epilog));
1809 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
1810 guard_to, guard_bb,
1811 inverse_probability (prob_vector));
1812 e = EDGE_PRED (guard_to, 0);
1813 e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
1814 slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e);
1816 /* Simply propagate profile info from guard_bb to guard_to which is
1817 a merge point of control flow. */
1818 guard_to->frequency = guard_bb->frequency;
1819 guard_to->count = guard_bb->count;
1820 single_succ_edge (guard_to)->count = guard_to->count;
1821 /* Scale probability of epilog loop back. */
1822 int scale_up = REG_BR_PROB_BASE * REG_BR_PROB_BASE / prob_vector;
1823 scale_loop_frequencies (epilog, scale_up, REG_BR_PROB_BASE);
1826 basic_block bb_before_epilog = loop_preheader_edge (epilog)->src;
1827 tree niters_vector_mult_vf;
1828 /* If loop is peeled for non-zero constant times, now niters refers to
1829 orig_niters - prolog_peeling, it won't overflow even the orig_niters
1830 overflows. */
1831 niters_no_overflow |= (prolog_peeling > 0);
1832 vect_gen_vector_loop_niters (loop_vinfo, niters,
1833 niters_vector, niters_no_overflow);
1834 vect_gen_vector_loop_niters_mult_vf (loop_vinfo, *niters_vector,
1835 &niters_vector_mult_vf);
1836 /* Update IVs of original loop as if they were advanced by
1837 niters_vector_mult_vf steps. */
1838 gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
1839 edge update_e = skip_vector ? e : loop_preheader_edge (epilog);
1840 vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf,
1841 update_e);
1843 if (skip_epilog)
1845 guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
1846 niters, niters_vector_mult_vf);
1847 guard_bb = single_exit (loop)->dest;
1848 guard_to = split_edge (single_exit (epilog));
1849 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond, guard_to,
1850 skip_vector ? anchor : guard_bb,
1851 inverse_probability (prob_epilog));
1852 slpeel_update_phi_nodes_for_guard2 (loop, epilog, guard_e,
1853 single_exit (epilog));
1854 /* Only need to handle basic block before epilog loop if it's not
1855 the guard_bb, which is the case when skip_vector is true. */
1856 if (guard_bb != bb_before_epilog)
1858 prob_epilog = (combine_probabilities (prob_vector, prob_epilog)
1859 + inverse_probability (prob_vector));
1861 scale_bbs_frequencies_int (&bb_before_epilog, 1, prob_epilog,
1862 REG_BR_PROB_BASE);
1864 scale_loop_profile (epilog, prob_epilog, bound);
1866 else
1867 slpeel_update_phi_nodes_for_lcssa (epilog);
1869 bound = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) ? vf - 1 : vf - 2;
1870 /* We share epilog loop with scalar version loop. */
1871 bound = MAX (bound, bound_scalar - 1);
1872 record_niter_bound (epilog, bound, false, true);
1874 delete_update_ssa ();
1875 adjust_vec_debug_stmts ();
1876 scev_reset ();
1878 adjust_vec.release ();
1879 free_original_copy_tables ();
1881 return epilog;
1884 /* Function vect_create_cond_for_niters_checks.
1886 Create a conditional expression that represents the run-time checks for
1887 loop's niter. The loop is guaranteed to to terminate if the run-time
1888 checks hold.
1890 Input:
1891 COND_EXPR - input conditional expression. New conditions will be chained
1892 with logical AND operation. If it is NULL, then the function
1893 is used to return the number of alias checks.
1894 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
1895 to be checked.
1897 Output:
1898 COND_EXPR - conditional expression.
1900 The returned COND_EXPR is the conditional expression to be used in the
1901 if statement that controls which version of the loop gets executed at
1902 runtime. */
1904 static void
1905 vect_create_cond_for_niters_checks (loop_vec_info loop_vinfo, tree *cond_expr)
1907 tree part_cond_expr = LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo);
1909 if (*cond_expr)
1910 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1911 *cond_expr, part_cond_expr);
1912 else
1913 *cond_expr = part_cond_expr;
1916 /* Function vect_create_cond_for_align_checks.
1918 Create a conditional expression that represents the alignment checks for
1919 all of data references (array element references) whose alignment must be
1920 checked at runtime.
1922 Input:
1923 COND_EXPR - input conditional expression. New conditions will be chained
1924 with logical AND operation.
1925 LOOP_VINFO - two fields of the loop information are used.
1926 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
1927 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
1929 Output:
1930 COND_EXPR_STMT_LIST - statements needed to construct the conditional
1931 expression.
1932 The returned value is the conditional expression to be used in the if
1933 statement that controls which version of the loop gets executed at runtime.
1935 The algorithm makes two assumptions:
1936 1) The number of bytes "n" in a vector is a power of 2.
1937 2) An address "a" is aligned if a%n is zero and that this
1938 test can be done as a&(n-1) == 0. For example, for 16
1939 byte vectors the test is a&0xf == 0. */
1941 static void
1942 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
1943 tree *cond_expr,
1944 gimple_seq *cond_expr_stmt_list)
1946 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1947 vec<gimple *> may_misalign_stmts
1948 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1949 gimple *ref_stmt;
1950 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
1951 tree mask_cst;
1952 unsigned int i;
1953 tree int_ptrsize_type;
1954 char tmp_name[20];
1955 tree or_tmp_name = NULL_TREE;
1956 tree and_tmp_name;
1957 gimple *and_stmt;
1958 tree ptrsize_zero;
1959 tree part_cond_expr;
1961 /* Check that mask is one less than a power of 2, i.e., mask is
1962 all zeros followed by all ones. */
1963 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
1965 int_ptrsize_type = signed_type_for (ptr_type_node);
1967 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
1968 of the first vector of the i'th data reference. */
1970 FOR_EACH_VEC_ELT (may_misalign_stmts, i, ref_stmt)
1972 gimple_seq new_stmt_list = NULL;
1973 tree addr_base;
1974 tree addr_tmp_name;
1975 tree new_or_tmp_name;
1976 gimple *addr_stmt, *or_stmt;
1977 stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
1978 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1979 bool negative = tree_int_cst_compare
1980 (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
1981 tree offset = negative
1982 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
1984 /* create: addr_tmp = (int)(address_of_first_vector) */
1985 addr_base =
1986 vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
1987 offset, loop);
1988 if (new_stmt_list != NULL)
1989 gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
1991 sprintf (tmp_name, "addr2int%d", i);
1992 addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
1993 addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base);
1994 gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
1996 /* The addresses are OR together. */
1998 if (or_tmp_name != NULL_TREE)
2000 /* create: or_tmp = or_tmp | addr_tmp */
2001 sprintf (tmp_name, "orptrs%d", i);
2002 new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2003 or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
2004 or_tmp_name, addr_tmp_name);
2005 gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
2006 or_tmp_name = new_or_tmp_name;
2008 else
2009 or_tmp_name = addr_tmp_name;
2011 } /* end for i */
2013 mask_cst = build_int_cst (int_ptrsize_type, mask);
2015 /* create: and_tmp = or_tmp & mask */
2016 and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
2018 and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR,
2019 or_tmp_name, mask_cst);
2020 gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
2022 /* Make and_tmp the left operand of the conditional test against zero.
2023 if and_tmp has a nonzero bit then some address is unaligned. */
2024 ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2025 part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
2026 and_tmp_name, ptrsize_zero);
2027 if (*cond_expr)
2028 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2029 *cond_expr, part_cond_expr);
2030 else
2031 *cond_expr = part_cond_expr;
2034 /* Given two data references and segment lengths described by DR_A and DR_B,
2035 create expression checking if the two addresses ranges intersect with
2036 each other based on index of the two addresses. This can only be done
2037 if DR_A and DR_B referring to the same (array) object and the index is
2038 the only difference. For example:
2040 DR_A DR_B
2041 data-ref arr[i] arr[j]
2042 base_object arr arr
2043 index {i_0, +, 1}_loop {j_0, +, 1}_loop
2045 The addresses and their index are like:
2047 |<- ADDR_A ->| |<- ADDR_B ->|
2048 ------------------------------------------------------->
2049 | | | | | | | | | |
2050 ------------------------------------------------------->
2051 i_0 ... i_0+4 j_0 ... j_0+4
2053 We can create expression based on index rather than address:
2055 (i_0 + 4 < j_0 || j_0 + 4 < i_0)
2057 Note evolution step of index needs to be considered in comparison. */
2059 static bool
2060 create_intersect_range_checks_index (loop_vec_info loop_vinfo, tree *cond_expr,
2061 const dr_with_seg_len& dr_a,
2062 const dr_with_seg_len& dr_b)
2064 if (integer_zerop (DR_STEP (dr_a.dr))
2065 || integer_zerop (DR_STEP (dr_b.dr))
2066 || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
2067 return false;
2069 if (!tree_fits_uhwi_p (dr_a.seg_len) || !tree_fits_uhwi_p (dr_b.seg_len))
2070 return false;
2072 if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
2073 return false;
2075 if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
2076 return false;
2078 if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
2079 return false;
2081 gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
2083 bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
2084 unsigned HOST_WIDE_INT abs_step
2085 = absu_hwi (tree_to_shwi (DR_STEP (dr_a.dr)));
2087 unsigned HOST_WIDE_INT seg_len1 = tree_to_uhwi (dr_a.seg_len);
2088 unsigned HOST_WIDE_INT seg_len2 = tree_to_uhwi (dr_b.seg_len);
2089 /* Infer the number of iterations with which the memory segment is accessed
2090 by DR. In other words, alias is checked if memory segment accessed by
2091 DR_A in some iterations intersect with memory segment accessed by DR_B
2092 in the same amount iterations.
2093 Note segnment length is a linear function of number of iterations with
2094 DR_STEP as the coefficient. */
2095 unsigned HOST_WIDE_INT niter_len1 = (seg_len1 + abs_step - 1) / abs_step;
2096 unsigned HOST_WIDE_INT niter_len2 = (seg_len2 + abs_step - 1) / abs_step;
2098 unsigned int i;
2099 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2100 for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
2102 tree access1 = DR_ACCESS_FN (dr_a.dr, i);
2103 tree access2 = DR_ACCESS_FN (dr_b.dr, i);
2104 /* Two indices must be the same if they are not scev, or not scev wrto
2105 current loop being vecorized. */
2106 if (TREE_CODE (access1) != POLYNOMIAL_CHREC
2107 || TREE_CODE (access2) != POLYNOMIAL_CHREC
2108 || CHREC_VARIABLE (access1) != (unsigned)loop->num
2109 || CHREC_VARIABLE (access2) != (unsigned)loop->num)
2111 if (operand_equal_p (access1, access2, 0))
2112 continue;
2114 return false;
2116 /* The two indices must have the same step. */
2117 if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
2118 return false;
2120 tree idx_step = CHREC_RIGHT (access1);
2121 /* Index must have const step, otherwise DR_STEP won't be constant. */
2122 gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
2123 /* Index must evaluate in the same direction as DR. */
2124 gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
2126 tree min1 = CHREC_LEFT (access1);
2127 tree min2 = CHREC_LEFT (access2);
2128 if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
2129 return false;
2131 /* Ideally, alias can be checked against loop's control IV, but we
2132 need to prove linear mapping between control IV and reference
2133 index. Although that should be true, we check against (array)
2134 index of data reference. Like segment length, index length is
2135 linear function of the number of iterations with index_step as
2136 the coefficient, i.e, niter_len * idx_step. */
2137 tree idx_len1 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
2138 build_int_cst (TREE_TYPE (min1),
2139 niter_len1));
2140 tree idx_len2 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
2141 build_int_cst (TREE_TYPE (min2),
2142 niter_len2));
2143 tree max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1), min1, idx_len1);
2144 tree max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2), min2, idx_len2);
2145 /* Adjust ranges for negative step. */
2146 if (neg_step)
2148 min1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1), max1, idx_step);
2149 max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1),
2150 CHREC_LEFT (access1), idx_step);
2151 min2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2), max2, idx_step);
2152 max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2),
2153 CHREC_LEFT (access2), idx_step);
2155 tree part_cond_expr
2156 = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2157 fold_build2 (LE_EXPR, boolean_type_node, max1, min2),
2158 fold_build2 (LE_EXPR, boolean_type_node, max2, min1));
2159 if (*cond_expr)
2160 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2161 *cond_expr, part_cond_expr);
2162 else
2163 *cond_expr = part_cond_expr;
2165 return true;
2168 /* Given two data references and segment lengths described by DR_A and DR_B,
2169 create expression checking if the two addresses ranges intersect with
2170 each other:
2172 ((DR_A_addr_0 + DR_A_segment_length_0) <= DR_B_addr_0)
2173 || (DR_B_addr_0 + DER_B_segment_length_0) <= DR_A_addr_0)) */
2175 static void
2176 create_intersect_range_checks (loop_vec_info loop_vinfo, tree *cond_expr,
2177 const dr_with_seg_len& dr_a,
2178 const dr_with_seg_len& dr_b)
2180 *cond_expr = NULL_TREE;
2181 if (create_intersect_range_checks_index (loop_vinfo, cond_expr, dr_a, dr_b))
2182 return;
2184 tree segment_length_a = dr_a.seg_len;
2185 tree segment_length_b = dr_b.seg_len;
2186 tree addr_base_a = DR_BASE_ADDRESS (dr_a.dr);
2187 tree addr_base_b = DR_BASE_ADDRESS (dr_b.dr);
2188 tree offset_a = DR_OFFSET (dr_a.dr), offset_b = DR_OFFSET (dr_b.dr);
2190 offset_a = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_a),
2191 offset_a, DR_INIT (dr_a.dr));
2192 offset_b = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_b),
2193 offset_b, DR_INIT (dr_b.dr));
2194 addr_base_a = fold_build_pointer_plus (addr_base_a, offset_a);
2195 addr_base_b = fold_build_pointer_plus (addr_base_b, offset_b);
2197 tree seg_a_min = addr_base_a;
2198 tree seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
2199 /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
2200 bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
2201 [a, a+12) */
2202 if (tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0)
2204 tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a.dr)));
2205 seg_a_min = fold_build_pointer_plus (seg_a_max, unit_size);
2206 seg_a_max = fold_build_pointer_plus (addr_base_a, unit_size);
2209 tree seg_b_min = addr_base_b;
2210 tree seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
2211 if (tree_int_cst_compare (DR_STEP (dr_b.dr), size_zero_node) < 0)
2213 tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b.dr)));
2214 seg_b_min = fold_build_pointer_plus (seg_b_max, unit_size);
2215 seg_b_max = fold_build_pointer_plus (addr_base_b, unit_size);
2217 *cond_expr
2218 = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2219 fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
2220 fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
2223 /* Function vect_create_cond_for_alias_checks.
2225 Create a conditional expression that represents the run-time checks for
2226 overlapping of address ranges represented by a list of data references
2227 relations passed as input.
2229 Input:
2230 COND_EXPR - input conditional expression. New conditions will be chained
2231 with logical AND operation. If it is NULL, then the function
2232 is used to return the number of alias checks.
2233 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2234 to be checked.
2236 Output:
2237 COND_EXPR - conditional expression.
2239 The returned COND_EXPR is the conditional expression to be used in the if
2240 statement that controls which version of the loop gets executed at runtime.
2243 void
2244 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
2246 vec<dr_with_seg_len_pair_t> comp_alias_ddrs =
2247 LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
2248 tree part_cond_expr;
2250 if (comp_alias_ddrs.is_empty ())
2251 return;
2253 for (size_t i = 0, s = comp_alias_ddrs.length (); i < s; ++i)
2255 const dr_with_seg_len& dr_a = comp_alias_ddrs[i].first;
2256 const dr_with_seg_len& dr_b = comp_alias_ddrs[i].second;
2258 if (dump_enabled_p ())
2260 dump_printf_loc (MSG_NOTE, vect_location,
2261 "create runtime check for data references ");
2262 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a.dr));
2263 dump_printf (MSG_NOTE, " and ");
2264 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b.dr));
2265 dump_printf (MSG_NOTE, "\n");
2268 /* Create condition expression for each pair data references. */
2269 create_intersect_range_checks (loop_vinfo, &part_cond_expr, dr_a, dr_b);
2270 if (*cond_expr)
2271 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2272 *cond_expr, part_cond_expr);
2273 else
2274 *cond_expr = part_cond_expr;
2277 if (dump_enabled_p ())
2278 dump_printf_loc (MSG_NOTE, vect_location,
2279 "created %u versioning for alias checks.\n",
2280 comp_alias_ddrs.length ());
2284 /* Function vect_loop_versioning.
2286 If the loop has data references that may or may not be aligned or/and
2287 has data reference relations whose independence was not proven then
2288 two versions of the loop need to be generated, one which is vectorized
2289 and one which isn't. A test is then generated to control which of the
2290 loops is executed. The test checks for the alignment of all of the
2291 data references that may or may not be aligned. An additional
2292 sequence of runtime tests is generated for each pairs of DDRs whose
2293 independence was not proven. The vectorized version of loop is
2294 executed only if both alias and alignment tests are passed.
2296 The test generated to check which version of loop is executed
2297 is modified to also check for profitability as indicated by the
2298 cost model threshold TH.
2300 The versioning precondition(s) are placed in *COND_EXPR and
2301 *COND_EXPR_STMT_LIST. */
2303 void
2304 vect_loop_versioning (loop_vec_info loop_vinfo,
2305 unsigned int th, bool check_profitability)
2307 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *nloop;
2308 struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
2309 basic_block condition_bb;
2310 gphi_iterator gsi;
2311 gimple_stmt_iterator cond_exp_gsi;
2312 basic_block merge_bb;
2313 basic_block new_exit_bb;
2314 edge new_exit_e, e;
2315 gphi *orig_phi, *new_phi;
2316 tree cond_expr = NULL_TREE;
2317 gimple_seq cond_expr_stmt_list = NULL;
2318 tree arg;
2319 unsigned prob = 4 * REG_BR_PROB_BASE / 5;
2320 gimple_seq gimplify_stmt_list = NULL;
2321 tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2322 bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
2323 bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
2324 bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo);
2326 if (check_profitability)
2327 cond_expr = fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
2328 build_int_cst (TREE_TYPE (scalar_loop_iters),
2329 th));
2331 if (version_niter)
2332 vect_create_cond_for_niters_checks (loop_vinfo, &cond_expr);
2334 if (cond_expr)
2335 cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
2336 is_gimple_condexpr, NULL_TREE);
2338 if (version_align)
2339 vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
2340 &cond_expr_stmt_list);
2342 if (version_alias)
2343 vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
2345 cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,
2346 is_gimple_condexpr, NULL_TREE);
2347 gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
2349 initialize_original_copy_tables ();
2350 if (scalar_loop)
2352 edge scalar_e;
2353 basic_block preheader, scalar_preheader;
2355 /* We don't want to scale SCALAR_LOOP's frequencies, we need to
2356 scale LOOP's frequencies instead. */
2357 nloop = loop_version (scalar_loop, cond_expr, &condition_bb,
2358 prob, REG_BR_PROB_BASE - prob,
2359 REG_BR_PROB_BASE, REG_BR_PROB_BASE - prob, true);
2360 scale_loop_frequencies (loop, prob, REG_BR_PROB_BASE);
2361 /* CONDITION_BB was created above SCALAR_LOOP's preheader,
2362 while we need to move it above LOOP's preheader. */
2363 e = loop_preheader_edge (loop);
2364 scalar_e = loop_preheader_edge (scalar_loop);
2365 gcc_assert (empty_block_p (e->src)
2366 && single_pred_p (e->src));
2367 gcc_assert (empty_block_p (scalar_e->src)
2368 && single_pred_p (scalar_e->src));
2369 gcc_assert (single_pred_p (condition_bb));
2370 preheader = e->src;
2371 scalar_preheader = scalar_e->src;
2372 scalar_e = find_edge (condition_bb, scalar_preheader);
2373 e = single_pred_edge (preheader);
2374 redirect_edge_and_branch_force (single_pred_edge (condition_bb),
2375 scalar_preheader);
2376 redirect_edge_and_branch_force (scalar_e, preheader);
2377 redirect_edge_and_branch_force (e, condition_bb);
2378 set_immediate_dominator (CDI_DOMINATORS, condition_bb,
2379 single_pred (condition_bb));
2380 set_immediate_dominator (CDI_DOMINATORS, scalar_preheader,
2381 single_pred (scalar_preheader));
2382 set_immediate_dominator (CDI_DOMINATORS, preheader,
2383 condition_bb);
2385 else
2386 nloop = loop_version (loop, cond_expr, &condition_bb,
2387 prob, REG_BR_PROB_BASE - prob,
2388 prob, REG_BR_PROB_BASE - prob, true);
2390 if (version_niter)
2392 /* The versioned loop could be infinite, we need to clear existing
2393 niter information which is copied from the original loop. */
2394 gcc_assert (loop_constraint_set_p (loop, LOOP_C_FINITE));
2395 vect_free_loop_info_assumptions (nloop);
2396 /* And set constraint LOOP_C_INFINITE for niter analyzer. */
2397 loop_constraint_set (loop, LOOP_C_INFINITE);
2400 if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
2401 && dump_enabled_p ())
2403 if (version_alias)
2404 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2405 "loop versioned for vectorization because of "
2406 "possible aliasing\n");
2407 if (version_align)
2408 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2409 "loop versioned for vectorization to enhance "
2410 "alignment\n");
2413 free_original_copy_tables ();
2415 /* Loop versioning violates an assumption we try to maintain during
2416 vectorization - that the loop exit block has a single predecessor.
2417 After versioning, the exit block of both loop versions is the same
2418 basic block (i.e. it has two predecessors). Just in order to simplify
2419 following transformations in the vectorizer, we fix this situation
2420 here by adding a new (empty) block on the exit-edge of the loop,
2421 with the proper loop-exit phis to maintain loop-closed-form.
2422 If loop versioning wasn't done from loop, but scalar_loop instead,
2423 merge_bb will have already just a single successor. */
2425 merge_bb = single_exit (loop)->dest;
2426 if (scalar_loop == NULL || EDGE_COUNT (merge_bb->preds) >= 2)
2428 gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
2429 new_exit_bb = split_edge (single_exit (loop));
2430 new_exit_e = single_exit (loop);
2431 e = EDGE_SUCC (new_exit_bb, 0);
2433 for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2435 tree new_res;
2436 orig_phi = gsi.phi ();
2437 new_res = copy_ssa_name (PHI_RESULT (orig_phi));
2438 new_phi = create_phi_node (new_res, new_exit_bb);
2439 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2440 add_phi_arg (new_phi, arg, new_exit_e,
2441 gimple_phi_arg_location_from_edge (orig_phi, e));
2442 adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
2446 /* End loop-exit-fixes after versioning. */
2448 if (cond_expr_stmt_list)
2450 cond_exp_gsi = gsi_last_bb (condition_bb);
2451 gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
2452 GSI_SAME_STMT);
2454 update_ssa (TODO_update_ssa);