* tree-vect-loop-manip.c (create_intersect_range_checks_index): Pass
[official-gcc.git] / gcc / tree-vect-loop-manip.c
blob62b1fe8e42d0b7c204b9d4af5f385320f82aa29b
1 /* Vectorizer Specific Loop Manipulations
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "cfghooks.h"
29 #include "tree-pass.h"
30 #include "ssa.h"
31 #include "fold-const.h"
32 #include "cfganal.h"
33 #include "gimplify.h"
34 #include "gimple-iterator.h"
35 #include "gimplify-me.h"
36 #include "tree-cfg.h"
37 #include "tree-ssa-loop-manip.h"
38 #include "tree-into-ssa.h"
39 #include "tree-ssa.h"
40 #include "cfgloop.h"
41 #include "tree-scalar-evolution.h"
42 #include "tree-vectorizer.h"
43 #include "tree-ssa-loop-ivopts.h"
45 /*************************************************************************
46 Simple Loop Peeling Utilities
48 Utilities to support loop peeling for vectorization purposes.
49 *************************************************************************/
52 /* Renames the use *OP_P. */
54 static void
55 rename_use_op (use_operand_p op_p)
57 tree new_name;
59 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
60 return;
62 new_name = get_current_def (USE_FROM_PTR (op_p));
64 /* Something defined outside of the loop. */
65 if (!new_name)
66 return;
68 /* An ordinary ssa name defined in the loop. */
70 SET_USE (op_p, new_name);
74 /* Renames the variables in basic block BB. Allow renaming of PHI arguments
75 on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
76 true. */
78 static void
79 rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop)
81 gimple *stmt;
82 use_operand_p use_p;
83 ssa_op_iter iter;
84 edge e;
85 edge_iterator ei;
86 struct loop *loop = bb->loop_father;
87 struct loop *outer_loop = NULL;
89 if (rename_from_outer_loop)
91 gcc_assert (loop);
92 outer_loop = loop_outer (loop);
95 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
96 gsi_next (&gsi))
98 stmt = gsi_stmt (gsi);
99 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
100 rename_use_op (use_p);
103 FOR_EACH_EDGE (e, ei, bb->preds)
105 if (!flow_bb_inside_loop_p (loop, e->src))
107 if (!rename_from_outer_loop)
108 continue;
109 if (e->src != outer_loop->header)
111 if (outer_loop->inner->next)
113 /* If outer_loop has 2 inner loops, allow there to
114 be an extra basic block which decides which of the
115 two loops to use using LOOP_VECTORIZED. */
116 if (!single_pred_p (e->src)
117 || single_pred (e->src) != outer_loop->header)
118 continue;
120 else
121 continue;
124 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
125 gsi_next (&gsi))
126 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
131 struct adjust_info
133 tree from, to;
134 basic_block bb;
137 /* A stack of values to be adjusted in debug stmts. We have to
138 process them LIFO, so that the closest substitution applies. If we
139 processed them FIFO, without the stack, we might substitute uses
140 with a PHI DEF that would soon become non-dominant, and when we got
141 to the suitable one, it wouldn't have anything to substitute any
142 more. */
143 static vec<adjust_info, va_heap> adjust_vec;
145 /* Adjust any debug stmts that referenced AI->from values to use the
146 loop-closed AI->to, if the references are dominated by AI->bb and
147 not by the definition of AI->from. */
149 static void
150 adjust_debug_stmts_now (adjust_info *ai)
152 basic_block bbphi = ai->bb;
153 tree orig_def = ai->from;
154 tree new_def = ai->to;
155 imm_use_iterator imm_iter;
156 gimple *stmt;
157 basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
159 gcc_assert (dom_info_available_p (CDI_DOMINATORS));
161 /* Adjust any debug stmts that held onto non-loop-closed
162 references. */
163 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
165 use_operand_p use_p;
166 basic_block bbuse;
168 if (!is_gimple_debug (stmt))
169 continue;
171 gcc_assert (gimple_debug_bind_p (stmt));
173 bbuse = gimple_bb (stmt);
175 if ((bbuse == bbphi
176 || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
177 && !(bbuse == bbdef
178 || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
180 if (new_def)
181 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
182 SET_USE (use_p, new_def);
183 else
185 gimple_debug_bind_reset_value (stmt);
186 update_stmt (stmt);
192 /* Adjust debug stmts as scheduled before. */
194 static void
195 adjust_vec_debug_stmts (void)
197 if (!MAY_HAVE_DEBUG_STMTS)
198 return;
200 gcc_assert (adjust_vec.exists ());
202 while (!adjust_vec.is_empty ())
204 adjust_debug_stmts_now (&adjust_vec.last ());
205 adjust_vec.pop ();
209 /* Adjust any debug stmts that referenced FROM values to use the
210 loop-closed TO, if the references are dominated by BB and not by
211 the definition of FROM. If adjust_vec is non-NULL, adjustments
212 will be postponed until adjust_vec_debug_stmts is called. */
214 static void
215 adjust_debug_stmts (tree from, tree to, basic_block bb)
217 adjust_info ai;
219 if (MAY_HAVE_DEBUG_STMTS
220 && TREE_CODE (from) == SSA_NAME
221 && ! SSA_NAME_IS_DEFAULT_DEF (from)
222 && ! virtual_operand_p (from))
224 ai.from = from;
225 ai.to = to;
226 ai.bb = bb;
228 if (adjust_vec.exists ())
229 adjust_vec.safe_push (ai);
230 else
231 adjust_debug_stmts_now (&ai);
235 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
236 to adjust any debug stmts that referenced the old phi arg,
237 presumably non-loop-closed references left over from other
238 transformations. */
240 static void
241 adjust_phi_and_debug_stmts (gimple *update_phi, edge e, tree new_def)
243 tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
245 SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
247 if (MAY_HAVE_DEBUG_STMTS)
248 adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
249 gimple_bb (update_phi));
252 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
253 that starts at zero, increases by one and its limit is NITERS.
255 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
257 void
258 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
260 tree indx_before_incr, indx_after_incr;
261 gcond *cond_stmt;
262 gcond *orig_cond;
263 edge exit_edge = single_exit (loop);
264 gimple_stmt_iterator loop_cond_gsi;
265 gimple_stmt_iterator incr_gsi;
266 bool insert_after;
267 tree init = build_int_cst (TREE_TYPE (niters), 0);
268 tree step = build_int_cst (TREE_TYPE (niters), 1);
269 source_location loop_loc;
270 enum tree_code code;
272 orig_cond = get_loop_exit_condition (loop);
273 gcc_assert (orig_cond);
274 loop_cond_gsi = gsi_for_stmt (orig_cond);
276 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
277 create_iv (init, step, NULL_TREE, loop,
278 &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
280 indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
281 true, NULL_TREE, true,
282 GSI_SAME_STMT);
283 niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
284 true, GSI_SAME_STMT);
286 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
287 cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
288 NULL_TREE);
290 gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
292 /* Remove old loop exit test: */
293 gsi_remove (&loop_cond_gsi, true);
294 free_stmt_vec_info (orig_cond);
296 loop_loc = find_loop_location (loop);
297 if (dump_enabled_p ())
299 if (LOCATION_LOCUS (loop_loc) != UNKNOWN_LOCATION)
300 dump_printf (MSG_NOTE, "\nloop at %s:%d: ", LOCATION_FILE (loop_loc),
301 LOCATION_LINE (loop_loc));
302 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, cond_stmt, 0);
305 /* Record the number of latch iterations. */
306 loop->nb_iterations = fold_build2 (MINUS_EXPR, TREE_TYPE (niters), niters,
307 build_int_cst (TREE_TYPE (niters), 1));
310 /* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg.
311 For all PHI arguments in FROM->dest and TO->dest from those
312 edges ensure that TO->dest PHI arguments have current_def
313 to that in from. */
315 static void
316 slpeel_duplicate_current_defs_from_edges (edge from, edge to)
318 gimple_stmt_iterator gsi_from, gsi_to;
320 for (gsi_from = gsi_start_phis (from->dest),
321 gsi_to = gsi_start_phis (to->dest);
322 !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);)
324 gimple *from_phi = gsi_stmt (gsi_from);
325 gimple *to_phi = gsi_stmt (gsi_to);
326 tree from_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, from);
327 tree to_arg = PHI_ARG_DEF_FROM_EDGE (to_phi, to);
328 if (virtual_operand_p (from_arg))
330 gsi_next (&gsi_from);
331 continue;
333 if (virtual_operand_p (to_arg))
335 gsi_next (&gsi_to);
336 continue;
338 if (TREE_CODE (from_arg) != SSA_NAME)
339 gcc_assert (operand_equal_p (from_arg, to_arg, 0));
340 else
342 if (get_current_def (to_arg) == NULL_TREE)
343 set_current_def (to_arg, get_current_def (from_arg));
345 gsi_next (&gsi_from);
346 gsi_next (&gsi_to);
349 gphi *from_phi = get_virtual_phi (from->dest);
350 gphi *to_phi = get_virtual_phi (to->dest);
351 if (from_phi)
352 set_current_def (PHI_ARG_DEF_FROM_EDGE (to_phi, to),
353 get_current_def (PHI_ARG_DEF_FROM_EDGE (from_phi, from)));
357 /* Given LOOP this function generates a new copy of it and puts it
358 on E which is either the entry or exit of LOOP. If SCALAR_LOOP is
359 non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
360 basic blocks from SCALAR_LOOP instead of LOOP, but to either the
361 entry or exit of LOOP. */
363 struct loop *
364 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop,
365 struct loop *scalar_loop, edge e)
367 struct loop *new_loop;
368 basic_block *new_bbs, *bbs, *pbbs;
369 bool at_exit;
370 bool was_imm_dom;
371 basic_block exit_dest;
372 edge exit, new_exit;
373 bool duplicate_outer_loop = false;
375 exit = single_exit (loop);
376 at_exit = (e == exit);
377 if (!at_exit && e != loop_preheader_edge (loop))
378 return NULL;
380 if (scalar_loop == NULL)
381 scalar_loop = loop;
383 bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
384 pbbs = bbs + 1;
385 get_loop_body_with_size (scalar_loop, pbbs, scalar_loop->num_nodes);
386 /* Allow duplication of outer loops. */
387 if (scalar_loop->inner)
388 duplicate_outer_loop = true;
389 /* Check whether duplication is possible. */
390 if (!can_copy_bbs_p (pbbs, scalar_loop->num_nodes))
392 free (bbs);
393 return NULL;
396 /* Generate new loop structure. */
397 new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
398 duplicate_subloops (scalar_loop, new_loop);
400 exit_dest = exit->dest;
401 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
402 exit_dest) == loop->header ?
403 true : false);
405 /* Also copy the pre-header, this avoids jumping through hoops to
406 duplicate the loop entry PHI arguments. Create an empty
407 pre-header unconditionally for this. */
408 basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
409 edge entry_e = single_pred_edge (preheader);
410 bbs[0] = preheader;
411 new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
413 exit = single_exit (scalar_loop);
414 copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
415 &exit, 1, &new_exit, NULL,
416 at_exit ? loop->latch : e->src, true);
417 exit = single_exit (loop);
418 basic_block new_preheader = new_bbs[0];
420 add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
422 if (scalar_loop != loop)
424 /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
425 SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop,
426 but LOOP will not. slpeel_update_phi_nodes_for_guard{1,2} expects
427 the LOOP SSA_NAMEs (on the exit edge and edge from latch to
428 header) to have current_def set, so copy them over. */
429 slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop),
430 exit);
431 slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop->latch,
433 EDGE_SUCC (loop->latch, 0));
436 if (at_exit) /* Add the loop copy at exit. */
438 if (scalar_loop != loop)
440 gphi_iterator gsi;
441 new_exit = redirect_edge_and_branch (new_exit, exit_dest);
443 for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi);
444 gsi_next (&gsi))
446 gphi *phi = gsi.phi ();
447 tree orig_arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
448 location_t orig_locus
449 = gimple_phi_arg_location_from_edge (phi, e);
451 add_phi_arg (phi, orig_arg, new_exit, orig_locus);
454 redirect_edge_and_branch_force (e, new_preheader);
455 flush_pending_stmts (e);
456 set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
457 if (was_imm_dom || duplicate_outer_loop)
458 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
460 /* And remove the non-necessary forwarder again. Keep the other
461 one so we have a proper pre-header for the loop at the exit edge. */
462 redirect_edge_pred (single_succ_edge (preheader),
463 single_pred (preheader));
464 delete_basic_block (preheader);
465 set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
466 loop_preheader_edge (scalar_loop)->src);
468 else /* Add the copy at entry. */
470 if (scalar_loop != loop)
472 /* Remove the non-necessary forwarder of scalar_loop again. */
473 redirect_edge_pred (single_succ_edge (preheader),
474 single_pred (preheader));
475 delete_basic_block (preheader);
476 set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
477 loop_preheader_edge (scalar_loop)->src);
478 preheader = split_edge (loop_preheader_edge (loop));
479 entry_e = single_pred_edge (preheader);
482 redirect_edge_and_branch_force (entry_e, new_preheader);
483 flush_pending_stmts (entry_e);
484 set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
486 redirect_edge_and_branch_force (new_exit, preheader);
487 flush_pending_stmts (new_exit);
488 set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
490 /* And remove the non-necessary forwarder again. Keep the other
491 one so we have a proper pre-header for the loop at the exit edge. */
492 redirect_edge_pred (single_succ_edge (new_preheader),
493 single_pred (new_preheader));
494 delete_basic_block (new_preheader);
495 set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
496 loop_preheader_edge (new_loop)->src);
499 for (unsigned i = 0; i < scalar_loop->num_nodes + 1; i++)
500 rename_variables_in_bb (new_bbs[i], duplicate_outer_loop);
502 if (scalar_loop != loop)
504 /* Update new_loop->header PHIs, so that on the preheader
505 edge they are the ones from loop rather than scalar_loop. */
506 gphi_iterator gsi_orig, gsi_new;
507 edge orig_e = loop_preheader_edge (loop);
508 edge new_e = loop_preheader_edge (new_loop);
510 for (gsi_orig = gsi_start_phis (loop->header),
511 gsi_new = gsi_start_phis (new_loop->header);
512 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_new);
513 gsi_next (&gsi_orig), gsi_next (&gsi_new))
515 gphi *orig_phi = gsi_orig.phi ();
516 gphi *new_phi = gsi_new.phi ();
517 tree orig_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
518 location_t orig_locus
519 = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
521 add_phi_arg (new_phi, orig_arg, new_e, orig_locus);
525 free (new_bbs);
526 free (bbs);
528 checking_verify_dominators (CDI_DOMINATORS);
530 return new_loop;
534 /* Given the condition expression COND, put it as the last statement of
535 GUARD_BB; set both edges' probability; set dominator of GUARD_TO to
536 DOM_BB; return the skip edge. GUARD_TO is the target basic block to
537 skip the loop. PROBABILITY is the skip edge's probability. Mark the
538 new edge as irreducible if IRREDUCIBLE_P is true. */
540 static edge
541 slpeel_add_loop_guard (basic_block guard_bb, tree cond,
542 basic_block guard_to, basic_block dom_bb,
543 int probability, bool irreducible_p)
545 gimple_stmt_iterator gsi;
546 edge new_e, enter_e;
547 gcond *cond_stmt;
548 gimple_seq gimplify_stmt_list = NULL;
550 enter_e = EDGE_SUCC (guard_bb, 0);
551 enter_e->flags &= ~EDGE_FALLTHRU;
552 enter_e->flags |= EDGE_FALSE_VALUE;
553 gsi = gsi_last_bb (guard_bb);
555 cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
556 NULL_TREE);
557 if (gimplify_stmt_list)
558 gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
560 cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
561 gsi = gsi_last_bb (guard_bb);
562 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
564 /* Add new edge to connect guard block to the merge/loop-exit block. */
565 new_e = make_edge (guard_bb, guard_to, EDGE_TRUE_VALUE);
567 new_e->count = guard_bb->count;
568 new_e->probability = probability;
569 new_e->count = apply_probability (enter_e->count, probability);
570 if (irreducible_p)
571 new_e->flags |= EDGE_IRREDUCIBLE_LOOP;
573 enter_e->count -= new_e->count;
574 enter_e->probability = inverse_probability (probability);
575 set_immediate_dominator (CDI_DOMINATORS, guard_to, dom_bb);
577 /* Split enter_e to preserve LOOPS_HAVE_PREHEADERS. */
578 if (enter_e->dest->loop_father->header == enter_e->dest)
579 split_edge (enter_e);
581 return new_e;
585 /* This function verifies that the following restrictions apply to LOOP:
586 (1) it consists of exactly 2 basic blocks - header, and an empty latch
587 for innermost loop and 5 basic blocks for outer-loop.
588 (2) it is single entry, single exit
589 (3) its exit condition is the last stmt in the header
590 (4) E is the entry/exit edge of LOOP.
593 bool
594 slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
596 edge exit_e = single_exit (loop);
597 edge entry_e = loop_preheader_edge (loop);
598 gcond *orig_cond = get_loop_exit_condition (loop);
599 gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
600 unsigned int num_bb = loop->inner? 5 : 2;
602 /* All loops have an outer scope; the only case loop->outer is NULL is for
603 the function itself. */
604 if (!loop_outer (loop)
605 || loop->num_nodes != num_bb
606 || !empty_block_p (loop->latch)
607 || !single_exit (loop)
608 /* Verify that new loop exit condition can be trivially modified. */
609 || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
610 || (e != exit_e && e != entry_e))
611 return false;
613 return true;
616 /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
617 in the exit bb and rename all the uses after the loop. This simplifies
618 the *guard[12] routines, which assume loop closed SSA form for all PHIs
619 (but normally loop closed SSA form doesn't require virtual PHIs to be
620 in the same form). Doing this early simplifies the checking what
621 uses should be renamed. */
623 static void
624 create_lcssa_for_virtual_phi (struct loop *loop)
626 gphi_iterator gsi;
627 edge exit_e = single_exit (loop);
629 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
630 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
632 gphi *phi = gsi.phi ();
633 for (gsi = gsi_start_phis (exit_e->dest);
634 !gsi_end_p (gsi); gsi_next (&gsi))
635 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
636 break;
637 if (gsi_end_p (gsi))
639 tree new_vop = copy_ssa_name (PHI_RESULT (phi));
640 gphi *new_phi = create_phi_node (new_vop, exit_e->dest);
641 tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
642 imm_use_iterator imm_iter;
643 gimple *stmt;
644 use_operand_p use_p;
646 add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
647 gimple_phi_set_result (new_phi, new_vop);
648 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
649 if (stmt != new_phi
650 && !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
651 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
652 SET_USE (use_p, new_vop);
654 break;
659 /* Function vect_get_loop_location.
661 Extract the location of the loop in the source code.
662 If the loop is not well formed for vectorization, an estimated
663 location is calculated.
664 Return the loop location if succeed and NULL if not. */
666 source_location
667 find_loop_location (struct loop *loop)
669 gimple *stmt = NULL;
670 basic_block bb;
671 gimple_stmt_iterator si;
673 if (!loop)
674 return UNKNOWN_LOCATION;
676 stmt = get_loop_exit_condition (loop);
678 if (stmt
679 && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
680 return gimple_location (stmt);
682 /* If we got here the loop is probably not "well formed",
683 try to estimate the loop location */
685 if (!loop->header)
686 return UNKNOWN_LOCATION;
688 bb = loop->header;
690 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
692 stmt = gsi_stmt (si);
693 if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
694 return gimple_location (stmt);
697 return UNKNOWN_LOCATION;
700 /* Return true if PHI defines an IV of the loop to be vectorized. */
702 static bool
703 iv_phi_p (gphi *phi)
705 if (virtual_operand_p (PHI_RESULT (phi)))
706 return false;
708 stmt_vec_info stmt_info = vinfo_for_stmt (phi);
709 gcc_assert (stmt_info != NULL);
710 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
711 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
712 return false;
714 return true;
717 /* Function vect_can_advance_ivs_p
719 In case the number of iterations that LOOP iterates is unknown at compile
720 time, an epilog loop will be generated, and the loop induction variables
721 (IVs) will be "advanced" to the value they are supposed to take just before
722 the epilog loop. Here we check that the access function of the loop IVs
723 and the expression that represents the loop bound are simple enough.
724 These restrictions will be relaxed in the future. */
726 bool
727 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
729 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
730 basic_block bb = loop->header;
731 gphi_iterator gsi;
733 /* Analyze phi functions of the loop header. */
735 if (dump_enabled_p ())
736 dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
737 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
739 tree evolution_part;
741 gphi *phi = gsi.phi ();
742 if (dump_enabled_p ())
744 dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
745 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
748 /* Skip virtual phi's. The data dependences that are associated with
749 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.
751 Skip reduction phis. */
752 if (!iv_phi_p (phi))
754 if (dump_enabled_p ())
755 dump_printf_loc (MSG_NOTE, vect_location,
756 "reduc or virtual phi. skip.\n");
757 continue;
760 /* Analyze the evolution function. */
762 evolution_part
763 = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
764 if (evolution_part == NULL_TREE)
766 if (dump_enabled_p ())
767 dump_printf (MSG_MISSED_OPTIMIZATION,
768 "No access function or evolution.\n");
769 return false;
772 /* FORNOW: We do not transform initial conditions of IVs
773 which evolution functions are not invariants in the loop. */
775 if (!expr_invariant_in_loop_p (loop, evolution_part))
777 if (dump_enabled_p ())
778 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
779 "evolution not invariant in loop.\n");
780 return false;
783 /* FORNOW: We do not transform initial conditions of IVs
784 which evolution functions are a polynomial of degree >= 2. */
786 if (tree_is_chrec (evolution_part))
788 if (dump_enabled_p ())
789 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
790 "evolution is chrec.\n");
791 return false;
795 return true;
799 /* Function vect_update_ivs_after_vectorizer.
801 "Advance" the induction variables of LOOP to the value they should take
802 after the execution of LOOP. This is currently necessary because the
803 vectorizer does not handle induction variables that are used after the
804 loop. Such a situation occurs when the last iterations of LOOP are
805 peeled, because:
806 1. We introduced new uses after LOOP for IVs that were not originally used
807 after LOOP: the IVs of LOOP are now used by an epilog loop.
808 2. LOOP is going to be vectorized; this means that it will iterate N/VF
809 times, whereas the loop IVs should be bumped N times.
811 Input:
812 - LOOP - a loop that is going to be vectorized. The last few iterations
813 of LOOP were peeled.
814 - NITERS - the number of iterations that LOOP executes (before it is
815 vectorized). i.e, the number of times the ivs should be bumped.
816 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
817 coming out from LOOP on which there are uses of the LOOP ivs
818 (this is the path from LOOP->exit to epilog_loop->preheader).
820 The new definitions of the ivs are placed in LOOP->exit.
821 The phi args associated with the edge UPDATE_E in the bb
822 UPDATE_E->dest are updated accordingly.
824 Assumption 1: Like the rest of the vectorizer, this function assumes
825 a single loop exit that has a single predecessor.
827 Assumption 2: The phi nodes in the LOOP header and in update_bb are
828 organized in the same order.
830 Assumption 3: The access function of the ivs is simple enough (see
831 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
833 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
834 coming out of LOOP on which the ivs of LOOP are used (this is the path
835 that leads to the epilog loop; other paths skip the epilog loop). This
836 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
837 needs to have its phis updated.
840 static void
841 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo,
842 tree niters, edge update_e)
844 gphi_iterator gsi, gsi1;
845 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
846 basic_block update_bb = update_e->dest;
847 basic_block exit_bb = single_exit (loop)->dest;
849 /* Make sure there exists a single-predecessor exit bb: */
850 gcc_assert (single_pred_p (exit_bb));
851 gcc_assert (single_succ_edge (exit_bb) == update_e);
853 for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
854 !gsi_end_p (gsi) && !gsi_end_p (gsi1);
855 gsi_next (&gsi), gsi_next (&gsi1))
857 tree init_expr;
858 tree step_expr, off;
859 tree type;
860 tree var, ni, ni_name;
861 gimple_stmt_iterator last_gsi;
863 gphi *phi = gsi.phi ();
864 gphi *phi1 = gsi1.phi ();
865 if (dump_enabled_p ())
867 dump_printf_loc (MSG_NOTE, vect_location,
868 "vect_update_ivs_after_vectorizer: phi: ");
869 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
872 /* Skip reduction and virtual phis. */
873 if (!iv_phi_p (phi))
875 if (dump_enabled_p ())
876 dump_printf_loc (MSG_NOTE, vect_location,
877 "reduc or virtual phi. skip.\n");
878 continue;
881 type = TREE_TYPE (gimple_phi_result (phi));
882 step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
883 step_expr = unshare_expr (step_expr);
885 /* FORNOW: We do not support IVs whose evolution function is a polynomial
886 of degree >= 2 or exponential. */
887 gcc_assert (!tree_is_chrec (step_expr));
889 init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
891 off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
892 fold_convert (TREE_TYPE (step_expr), niters),
893 step_expr);
894 if (POINTER_TYPE_P (type))
895 ni = fold_build_pointer_plus (init_expr, off);
896 else
897 ni = fold_build2 (PLUS_EXPR, type,
898 init_expr, fold_convert (type, off));
900 var = create_tmp_var (type, "tmp");
902 last_gsi = gsi_last_bb (exit_bb);
903 gimple_seq new_stmts = NULL;
904 ni_name = force_gimple_operand (ni, &new_stmts, false, var);
905 /* Exit_bb shouldn't be empty. */
906 if (!gsi_end_p (last_gsi))
907 gsi_insert_seq_after (&last_gsi, new_stmts, GSI_SAME_STMT);
908 else
909 gsi_insert_seq_before (&last_gsi, new_stmts, GSI_SAME_STMT);
911 /* Fix phi expressions in the successor bb. */
912 adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
916 /* Function vect_gen_prolog_loop_niters
918 Generate the number of iterations which should be peeled as prolog for the
919 loop represented by LOOP_VINFO. It is calculated as the misalignment of
920 DR - the data reference recorded in LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).
921 As a result, after the execution of this loop, the data reference DR will
922 refer to an aligned location. The following computation is generated:
924 If the misalignment of DR is known at compile time:
925 addr_mis = int mis = DR_MISALIGNMENT (dr);
926 Else, compute address misalignment in bytes:
927 addr_mis = addr & (vectype_align - 1)
929 prolog_niters = ((VF - addr_mis/elem_size)&(VF-1))/step
931 (elem_size = element type size; an element is the scalar element whose type
932 is the inner type of the vectype)
934 The computations will be emitted at the end of BB. We also compute and
935 store upper bound (included) of the result in BOUND.
937 When the step of the data-ref in the loop is not 1 (as in interleaved data
938 and SLP), the number of iterations of the prolog must be divided by the step
939 (which is equal to the size of interleaved group).
941 The above formulas assume that VF == number of elements in the vector. This
942 may not hold when there are multiple-types in the loop.
943 In this case, for some data-references in the loop the VF does not represent
944 the number of elements that fit in the vector. Therefore, instead of VF we
945 use TYPE_VECTOR_SUBPARTS. */
947 static tree
948 vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
949 basic_block bb, int *bound)
951 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
952 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
953 tree var;
954 tree niters_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
955 gimple_seq stmts = NULL, new_stmts = NULL;
956 tree iters, iters_name;
957 gimple *dr_stmt = DR_STMT (dr);
958 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
959 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
960 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
961 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
963 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
965 int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
967 if (dump_enabled_p ())
968 dump_printf_loc (MSG_NOTE, vect_location,
969 "known peeling = %d.\n", npeel);
971 iters = build_int_cst (niters_type, npeel);
972 *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
974 else
976 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
977 tree offset = negative
978 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
979 tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
980 &stmts, offset, loop);
981 tree type = unsigned_type_for (TREE_TYPE (start_addr));
982 tree vectype_align_minus_1 = build_int_cst (type, vectype_align - 1);
983 HOST_WIDE_INT elem_size =
984 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
985 tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
986 tree nelements_minus_1 = build_int_cst (type, nelements - 1);
987 tree nelements_tree = build_int_cst (type, nelements);
988 tree byte_misalign;
989 tree elem_misalign;
991 /* Create: byte_misalign = addr & (vectype_align - 1) */
992 byte_misalign =
993 fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
994 vectype_align_minus_1);
996 /* Create: elem_misalign = byte_misalign / element_size */
997 elem_misalign =
998 fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
1000 /* Create: (niters_type) (nelements - elem_misalign)&(nelements - 1) */
1001 if (negative)
1002 iters = fold_build2 (MINUS_EXPR, type, elem_misalign, nelements_tree);
1003 else
1004 iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
1005 iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
1006 iters = fold_convert (niters_type, iters);
1007 *bound = nelements - 1;
1010 if (dump_enabled_p ())
1012 dump_printf_loc (MSG_NOTE, vect_location,
1013 "niters for prolog loop: ");
1014 dump_generic_expr (MSG_NOTE, TDF_SLIM, iters);
1015 dump_printf (MSG_NOTE, "\n");
1018 var = create_tmp_var (niters_type, "prolog_loop_niters");
1019 iters_name = force_gimple_operand (iters, &new_stmts, false, var);
1021 if (new_stmts)
1022 gimple_seq_add_seq (&stmts, new_stmts);
1023 if (stmts)
1025 gcc_assert (single_succ_p (bb));
1026 gimple_stmt_iterator gsi = gsi_last_bb (bb);
1027 if (gsi_end_p (gsi))
1028 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1029 else
1030 gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
1032 return iters_name;
1036 /* Function vect_update_init_of_dr
1038 NITERS iterations were peeled from LOOP. DR represents a data reference
1039 in LOOP. This function updates the information recorded in DR to
1040 account for the fact that the first NITERS iterations had already been
1041 executed. Specifically, it updates the OFFSET field of DR. */
1043 static void
1044 vect_update_init_of_dr (struct data_reference *dr, tree niters)
1046 tree offset = DR_OFFSET (dr);
1048 niters = fold_build2 (MULT_EXPR, sizetype,
1049 fold_convert (sizetype, niters),
1050 fold_convert (sizetype, DR_STEP (dr)));
1051 offset = fold_build2 (PLUS_EXPR, sizetype,
1052 fold_convert (sizetype, offset), niters);
1053 DR_OFFSET (dr) = offset;
1057 /* Function vect_update_inits_of_drs
1059 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1060 This function updates the information recorded for the data references in
1061 the loop to account for the fact that the first NITERS iterations had
1062 already been executed. Specifically, it updates the initial_condition of
1063 the access_function of all the data_references in the loop. */
1065 static void
1066 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
1068 unsigned int i;
1069 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1070 struct data_reference *dr;
1072 if (dump_enabled_p ())
1073 dump_printf_loc (MSG_NOTE, vect_location,
1074 "=== vect_update_inits_of_dr ===\n");
1076 /* Adjust niters to sizetype and insert stmts on loop preheader edge. */
1077 if (!types_compatible_p (sizetype, TREE_TYPE (niters)))
1079 gimple_seq seq;
1080 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1081 tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
1083 niters = fold_convert (sizetype, niters);
1084 niters = force_gimple_operand (niters, &seq, false, var);
1085 if (seq)
1087 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1088 gcc_assert (!new_bb);
1092 FOR_EACH_VEC_ELT (datarefs, i, dr)
1093 vect_update_init_of_dr (dr, niters);
1097 /* This function builds ni_name = number of iterations. Statements
1098 are emitted on the loop preheader edge. */
1100 tree
1101 vect_build_loop_niters (loop_vec_info loop_vinfo)
1103 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1104 if (TREE_CODE (ni) == INTEGER_CST)
1105 return ni;
1106 else
1108 tree ni_name, var;
1109 gimple_seq stmts = NULL;
1110 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1112 var = create_tmp_var (TREE_TYPE (ni), "niters");
1113 ni_name = force_gimple_operand (ni, &stmts, false, var);
1114 if (stmts)
1115 gsi_insert_seq_on_edge_immediate (pe, stmts);
1117 return ni_name;
1121 /* Calculate the number of iterations above which vectorized loop will be
1122 preferred than scalar loop. NITERS_PROLOG is the number of iterations
1123 of prolog loop. If it's integer const, the integer number is also passed
1124 in INT_NITERS_PROLOG. BOUND_PROLOG is the upper bound (included) of
1125 number of iterations of prolog loop. VFM1 is vector factor minus one.
1126 If CHECK_PROFITABILITY is true, TH is the threshold below which scalar
1127 (rather than vectorized) loop will be executed. This function stores
1128 upper bound (included) of the result in BOUND_SCALAR. */
1130 static tree
1131 vect_gen_scalar_loop_niters (tree niters_prolog, int int_niters_prolog,
1132 int bound_prolog, int vfm1, int th,
1133 int *bound_scalar, bool check_profitability)
1135 tree type = TREE_TYPE (niters_prolog);
1136 tree niters = fold_build2 (PLUS_EXPR, type, niters_prolog,
1137 build_int_cst (type, vfm1));
1139 *bound_scalar = vfm1 + bound_prolog;
1140 if (check_profitability)
1142 /* TH indicates the minimum niters of vectorized loop, while we
1143 compute the maximum niters of scalar loop. */
1144 th--;
1145 /* Peeling for constant times. */
1146 if (int_niters_prolog >= 0)
1148 *bound_scalar = (int_niters_prolog + vfm1 < th
1149 ? th
1150 : vfm1 + int_niters_prolog);
1151 return build_int_cst (type, *bound_scalar);
1153 /* Peeling for unknown times. Note BOUND_PROLOG is the upper
1154 bound (inlcuded) of niters of prolog loop. */
1155 if (th >= vfm1 + bound_prolog)
1157 *bound_scalar = th;
1158 return build_int_cst (type, th);
1160 /* Need to do runtime comparison, but BOUND_SCALAR remains the same. */
1161 else if (th > vfm1)
1162 return fold_build2 (MAX_EXPR, type, build_int_cst (type, th), niters);
1164 return niters;
1167 /* This function generates the following statements:
1169 niters = number of iterations loop executes (after peeling)
1170 niters_vector = niters / vf
1172 and places them on the loop preheader edge. NITERS_NO_OVERFLOW is
1173 true if NITERS doesn't overflow. */
1175 void
1176 vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters,
1177 tree *niters_vector_ptr, bool niters_no_overflow)
1179 tree ni_minus_gap, var;
1180 tree niters_vector;
1181 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1182 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1183 tree log_vf = build_int_cst (TREE_TYPE (niters), exact_log2 (vf));
1185 /* If epilogue loop is required because of data accesses with gaps, we
1186 subtract one iteration from the total number of iterations here for
1187 correct calculation of RATIO. */
1188 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1190 ni_minus_gap = fold_build2 (MINUS_EXPR, TREE_TYPE (niters),
1191 niters,
1192 build_one_cst (TREE_TYPE (niters)));
1193 if (!is_gimple_val (ni_minus_gap))
1195 var = create_tmp_var (TREE_TYPE (niters), "ni_gap");
1196 gimple *stmts = NULL;
1197 ni_minus_gap = force_gimple_operand (ni_minus_gap, &stmts,
1198 true, var);
1199 gsi_insert_seq_on_edge_immediate (pe, stmts);
1202 else
1203 ni_minus_gap = niters;
1205 /* Create: niters >> log2(vf) */
1206 /* If it's known that niters == number of latch executions + 1 doesn't
1207 overflow, we can generate niters >> log2(vf); otherwise we generate
1208 (niters - vf) >> log2(vf) + 1 by using the fact that we know ratio
1209 will be at least one. */
1210 if (niters_no_overflow)
1211 niters_vector = fold_build2 (RSHIFT_EXPR, TREE_TYPE (niters),
1212 ni_minus_gap, log_vf);
1213 else
1214 niters_vector
1215 = fold_build2 (PLUS_EXPR, TREE_TYPE (niters),
1216 fold_build2 (RSHIFT_EXPR, TREE_TYPE (niters),
1217 fold_build2 (MINUS_EXPR, TREE_TYPE (niters),
1218 ni_minus_gap,
1219 build_int_cst
1220 (TREE_TYPE (niters), vf)),
1221 log_vf),
1222 build_int_cst (TREE_TYPE (niters), 1));
1224 if (!is_gimple_val (niters_vector))
1226 var = create_tmp_var (TREE_TYPE (niters), "bnd");
1227 gimple *stmts = NULL;
1228 niters_vector = force_gimple_operand (niters_vector, &stmts, true, var);
1229 gsi_insert_seq_on_edge_immediate (pe, stmts);
1231 *niters_vector_ptr = niters_vector;
1233 return;
1236 /* Given NITERS_VECTOR which is the number of iterations for vectorized
1237 loop specified by LOOP_VINFO after vectorization, compute the number
1238 of iterations before vectorization (niters_vector * vf) and store it
1239 to NITERS_VECTOR_MULT_VF_PTR. */
1241 static void
1242 vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo,
1243 tree niters_vector,
1244 tree *niters_vector_mult_vf_ptr)
1246 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1247 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1248 tree type = TREE_TYPE (niters_vector);
1249 tree log_vf = build_int_cst (type, exact_log2 (vf));
1250 basic_block exit_bb = single_exit (loop)->dest;
1252 gcc_assert (niters_vector_mult_vf_ptr != NULL);
1253 tree niters_vector_mult_vf = fold_build2 (LSHIFT_EXPR, type,
1254 niters_vector, log_vf);
1255 if (!is_gimple_val (niters_vector_mult_vf))
1257 tree var = create_tmp_var (type, "niters_vector_mult_vf");
1258 gimple_seq stmts = NULL;
1259 niters_vector_mult_vf = force_gimple_operand (niters_vector_mult_vf,
1260 &stmts, true, var);
1261 gimple_stmt_iterator gsi = gsi_start_bb (exit_bb);
1262 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1264 *niters_vector_mult_vf_ptr = niters_vector_mult_vf;
1267 /* Function slpeel_tree_duplicate_loop_to_edge_cfg duplciates FIRST/SECOND
1268 from SECOND/FIRST and puts it at the original loop's preheader/exit
1269 edge, the two loops are arranged as below:
1271 preheader_a:
1272 first_loop:
1273 header_a:
1274 i_1 = PHI<i_0, i_2>;
1276 i_2 = i_1 + 1;
1277 if (cond_a)
1278 goto latch_a;
1279 else
1280 goto between_bb;
1281 latch_a:
1282 goto header_a;
1284 between_bb:
1285 ;; i_x = PHI<i_2>; ;; LCSSA phi node to be created for FIRST,
1287 second_loop:
1288 header_b:
1289 i_3 = PHI<i_0, i_4>; ;; Use of i_0 to be replaced with i_x,
1290 or with i_2 if no LCSSA phi is created
1291 under condition of CREATE_LCSSA_FOR_IV_PHIS.
1293 i_4 = i_3 + 1;
1294 if (cond_b)
1295 goto latch_b;
1296 else
1297 goto exit_bb;
1298 latch_b:
1299 goto header_b;
1301 exit_bb:
1303 This function creates loop closed SSA for the first loop; update the
1304 second loop's PHI nodes by replacing argument on incoming edge with the
1305 result of newly created lcssa PHI nodes. IF CREATE_LCSSA_FOR_IV_PHIS
1306 is false, Loop closed ssa phis will only be created for non-iv phis for
1307 the first loop.
1309 This function assumes exit bb of the first loop is preheader bb of the
1310 second loop, i.e, between_bb in the example code. With PHIs updated,
1311 the second loop will execute rest iterations of the first. */
1313 static void
1314 slpeel_update_phi_nodes_for_loops (loop_vec_info loop_vinfo,
1315 struct loop *first, struct loop *second,
1316 bool create_lcssa_for_iv_phis)
1318 gphi_iterator gsi_update, gsi_orig;
1319 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1321 edge first_latch_e = EDGE_SUCC (first->latch, 0);
1322 edge second_preheader_e = loop_preheader_edge (second);
1323 basic_block between_bb = single_exit (first)->dest;
1325 gcc_assert (between_bb == second_preheader_e->src);
1326 gcc_assert (single_pred_p (between_bb) && single_succ_p (between_bb));
1327 /* Either the first loop or the second is the loop to be vectorized. */
1328 gcc_assert (loop == first || loop == second);
1330 for (gsi_orig = gsi_start_phis (first->header),
1331 gsi_update = gsi_start_phis (second->header);
1332 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
1333 gsi_next (&gsi_orig), gsi_next (&gsi_update))
1335 gphi *orig_phi = gsi_orig.phi ();
1336 gphi *update_phi = gsi_update.phi ();
1338 tree arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, first_latch_e);
1339 /* Generate lcssa PHI node for the first loop. */
1340 gphi *vect_phi = (loop == first) ? orig_phi : update_phi;
1341 if (create_lcssa_for_iv_phis || !iv_phi_p (vect_phi))
1343 tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
1344 gphi *lcssa_phi = create_phi_node (new_res, between_bb);
1345 add_phi_arg (lcssa_phi, arg, single_exit (first), UNKNOWN_LOCATION);
1346 arg = new_res;
1349 /* Update PHI node in the second loop by replacing arg on the loop's
1350 incoming edge. */
1351 adjust_phi_and_debug_stmts (update_phi, second_preheader_e, arg);
1355 /* Function slpeel_add_loop_guard adds guard skipping from the beginning
1356 of SKIP_LOOP to the beginning of UPDATE_LOOP. GUARD_EDGE and MERGE_EDGE
1357 are two pred edges of the merge point before UPDATE_LOOP. The two loops
1358 appear like below:
1360 guard_bb:
1361 if (cond)
1362 goto merge_bb;
1363 else
1364 goto skip_loop;
1366 skip_loop:
1367 header_a:
1368 i_1 = PHI<i_0, i_2>;
1370 i_2 = i_1 + 1;
1371 if (cond_a)
1372 goto latch_a;
1373 else
1374 goto exit_a;
1375 latch_a:
1376 goto header_a;
1378 exit_a:
1379 i_5 = PHI<i_2>;
1381 merge_bb:
1382 ;; PHI (i_x = PHI<i_0, i_5>) to be created at merge point.
1384 update_loop:
1385 header_b:
1386 i_3 = PHI<i_5, i_4>; ;; Use of i_5 to be replaced with i_x.
1388 i_4 = i_3 + 1;
1389 if (cond_b)
1390 goto latch_b;
1391 else
1392 goto exit_bb;
1393 latch_b:
1394 goto header_b;
1396 exit_bb:
1398 This function creates PHI nodes at merge_bb and replaces the use of i_5
1399 in the update_loop's PHI node with the result of new PHI result. */
1401 static void
1402 slpeel_update_phi_nodes_for_guard1 (struct loop *skip_loop,
1403 struct loop *update_loop,
1404 edge guard_edge, edge merge_edge)
1406 source_location merge_loc, guard_loc;
1407 edge orig_e = loop_preheader_edge (skip_loop);
1408 edge update_e = loop_preheader_edge (update_loop);
1409 gphi_iterator gsi_orig, gsi_update;
1411 for ((gsi_orig = gsi_start_phis (skip_loop->header),
1412 gsi_update = gsi_start_phis (update_loop->header));
1413 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
1414 gsi_next (&gsi_orig), gsi_next (&gsi_update))
1416 gphi *orig_phi = gsi_orig.phi ();
1417 gphi *update_phi = gsi_update.phi ();
1419 /* Generate new phi node at merge bb of the guard. */
1420 tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
1421 gphi *new_phi = create_phi_node (new_res, guard_edge->dest);
1423 /* Merge bb has two incoming edges: GUARD_EDGE and MERGE_EDGE. Set the
1424 args in NEW_PHI for these edges. */
1425 tree merge_arg = PHI_ARG_DEF_FROM_EDGE (update_phi, update_e);
1426 tree guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
1427 merge_loc = gimple_phi_arg_location_from_edge (update_phi, update_e);
1428 guard_loc = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
1429 add_phi_arg (new_phi, merge_arg, merge_edge, merge_loc);
1430 add_phi_arg (new_phi, guard_arg, guard_edge, guard_loc);
1432 /* Update phi in UPDATE_PHI. */
1433 adjust_phi_and_debug_stmts (update_phi, update_e, new_res);
1437 /* LCSSA_PHI is a lcssa phi of EPILOG loop which is copied from LOOP,
1438 this function searches for the corresponding lcssa phi node in exit
1439 bb of LOOP. If it is found, return the phi result; otherwise return
1440 NULL. */
1442 static tree
1443 find_guard_arg (struct loop *loop, struct loop *epilog ATTRIBUTE_UNUSED,
1444 gphi *lcssa_phi)
1446 gphi_iterator gsi;
1447 edge e = single_exit (loop);
1449 gcc_assert (single_pred_p (e->dest));
1450 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
1452 gphi *phi = gsi.phi ();
1453 if (operand_equal_p (PHI_ARG_DEF (phi, 0),
1454 PHI_ARG_DEF (lcssa_phi, 0), 0))
1455 return PHI_RESULT (phi);
1457 return NULL_TREE;
1460 /* LOOP and EPILOG are two consecutive loops in CFG and EPILOG is copied
1461 from LOOP. Function slpeel_add_loop_guard adds guard skipping from a
1462 point between the two loops to the end of EPILOG. Edges GUARD_EDGE
1463 and MERGE_EDGE are the two pred edges of merge_bb at the end of EPILOG.
1464 The CFG looks like:
1466 loop:
1467 header_a:
1468 i_1 = PHI<i_0, i_2>;
1470 i_2 = i_1 + 1;
1471 if (cond_a)
1472 goto latch_a;
1473 else
1474 goto exit_a;
1475 latch_a:
1476 goto header_a;
1478 exit_a:
1480 guard_bb:
1481 if (cond)
1482 goto merge_bb;
1483 else
1484 goto epilog_loop;
1486 ;; fall_through_bb
1488 epilog_loop:
1489 header_b:
1490 i_3 = PHI<i_2, i_4>;
1492 i_4 = i_3 + 1;
1493 if (cond_b)
1494 goto latch_b;
1495 else
1496 goto merge_bb;
1497 latch_b:
1498 goto header_b;
1500 merge_bb:
1501 ; PHI node (i_y = PHI<i_2, i_4>) to be created at merge point.
1503 exit_bb:
1504 i_x = PHI<i_4>; ;Use of i_4 to be replaced with i_y in merge_bb.
1506 For each name used out side EPILOG (i.e - for each name that has a lcssa
1507 phi in exit_bb) we create a new PHI in merge_bb. The new PHI has two
1508 args corresponding to GUARD_EDGE and MERGE_EDGE. Arg for MERGE_EDGE is
1509 the arg of the original PHI in exit_bb, arg for GUARD_EDGE is defined
1510 by LOOP and is found in the exit bb of LOOP. Arg of the original PHI
1511 in exit_bb will also be updated. */
1513 static void
1514 slpeel_update_phi_nodes_for_guard2 (struct loop *loop, struct loop *epilog,
1515 edge guard_edge, edge merge_edge)
1517 gphi_iterator gsi;
1518 basic_block merge_bb = guard_edge->dest;
1520 gcc_assert (single_succ_p (merge_bb));
1521 edge e = single_succ_edge (merge_bb);
1522 basic_block exit_bb = e->dest;
1523 gcc_assert (single_pred_p (exit_bb));
1524 gcc_assert (single_pred (exit_bb) == single_exit (epilog)->dest);
1526 for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1528 gphi *update_phi = gsi.phi ();
1529 tree old_arg = PHI_ARG_DEF (update_phi, 0);
1530 /* This loop-closed-phi actually doesn't represent a use out of the
1531 loop - the phi arg is a constant. */
1532 if (TREE_CODE (old_arg) != SSA_NAME)
1533 continue;
1535 tree merge_arg = get_current_def (old_arg);
1536 if (!merge_arg)
1537 merge_arg = old_arg;
1539 tree guard_arg = find_guard_arg (loop, epilog, update_phi);
1540 /* If the var is live after loop but not a reduction, we simply
1541 use the old arg. */
1542 if (!guard_arg)
1543 guard_arg = old_arg;
1545 /* Create new phi node in MERGE_BB: */
1546 tree new_res = copy_ssa_name (PHI_RESULT (update_phi));
1547 gphi *merge_phi = create_phi_node (new_res, merge_bb);
1549 /* MERGE_BB has two incoming edges: GUARD_EDGE and MERGE_EDGE, Set
1550 the two PHI args in merge_phi for these edges. */
1551 add_phi_arg (merge_phi, merge_arg, merge_edge, UNKNOWN_LOCATION);
1552 add_phi_arg (merge_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
1554 /* Update the original phi in exit_bb. */
1555 adjust_phi_and_debug_stmts (update_phi, e, new_res);
1559 /* EPILOG loop is duplicated from the original loop for vectorizing,
1560 the arg of its loop closed ssa PHI needs to be updated. */
1562 static void
1563 slpeel_update_phi_nodes_for_lcssa (struct loop *epilog)
1565 gphi_iterator gsi;
1566 basic_block exit_bb = single_exit (epilog)->dest;
1568 gcc_assert (single_pred_p (exit_bb));
1569 edge e = EDGE_PRED (exit_bb, 0);
1570 for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1571 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
1574 /* Function vect_do_peeling.
1576 Input:
1577 - LOOP_VINFO: Represent a loop to be vectorized, which looks like:
1579 preheader:
1580 LOOP:
1581 header_bb:
1582 loop_body
1583 if (exit_loop_cond) goto exit_bb
1584 else goto header_bb
1585 exit_bb:
1587 - NITERS: The number of iterations of the loop.
1588 - NITERSM1: The number of iterations of the loop's latch.
1589 - NITERS_NO_OVERFLOW: No overflow in computing NITERS.
1590 - TH, CHECK_PROFITABILITY: Threshold of niters to vectorize loop if
1591 CHECK_PROFITABILITY is true.
1592 Output:
1593 - NITERS_VECTOR: The number of iterations of loop after vectorization.
1595 This function peels prolog and epilog from the loop, adds guards skipping
1596 PROLOG and EPILOG for various conditions. As a result, the changed CFG
1597 would look like:
1599 guard_bb_1:
1600 if (prefer_scalar_loop) goto merge_bb_1
1601 else goto guard_bb_2
1603 guard_bb_2:
1604 if (skip_prolog) goto merge_bb_2
1605 else goto prolog_preheader
1607 prolog_preheader:
1608 PROLOG:
1609 prolog_header_bb:
1610 prolog_body
1611 if (exit_prolog_cond) goto prolog_exit_bb
1612 else goto prolog_header_bb
1613 prolog_exit_bb:
1615 merge_bb_2:
1617 vector_preheader:
1618 VECTOR LOOP:
1619 vector_header_bb:
1620 vector_body
1621 if (exit_vector_cond) goto vector_exit_bb
1622 else goto vector_header_bb
1623 vector_exit_bb:
1625 guard_bb_3:
1626 if (skip_epilog) goto merge_bb_3
1627 else goto epilog_preheader
1629 merge_bb_1:
1631 epilog_preheader:
1632 EPILOG:
1633 epilog_header_bb:
1634 epilog_body
1635 if (exit_epilog_cond) goto merge_bb_3
1636 else goto epilog_header_bb
1638 merge_bb_3:
1640 Note this function peels prolog and epilog only if it's necessary,
1641 as well as guards.
1642 Returns created epilogue or NULL.
1644 TODO: Guard for prefer_scalar_loop should be emitted along with
1645 versioning conditions if loop versioning is needed. */
1648 struct loop *
1649 vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
1650 tree *niters_vector, int th, bool check_profitability,
1651 bool niters_no_overflow)
1653 edge e, guard_e;
1654 tree type = TREE_TYPE (niters), guard_cond;
1655 basic_block guard_bb, guard_to;
1656 int prob_prolog, prob_vector, prob_epilog;
1657 int bound_prolog = 0, bound_scalar = 0, bound = 0;
1658 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1659 int prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1660 bool epilog_peeling = (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)
1661 || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
1663 if (!prolog_peeling && !epilog_peeling)
1664 return NULL;
1666 prob_vector = 9 * REG_BR_PROB_BASE / 10;
1667 if ((vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo)) == 2)
1668 vf = 3;
1669 prob_prolog = prob_epilog = (vf - 1) * REG_BR_PROB_BASE / vf;
1670 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1672 struct loop *prolog, *epilog = NULL, *loop = LOOP_VINFO_LOOP (loop_vinfo);
1673 struct loop *first_loop = loop;
1674 bool irred_flag = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP;
1675 create_lcssa_for_virtual_phi (loop);
1676 update_ssa (TODO_update_ssa_only_virtuals);
1678 if (MAY_HAVE_DEBUG_STMTS)
1680 gcc_assert (!adjust_vec.exists ());
1681 adjust_vec.create (32);
1683 initialize_original_copy_tables ();
1685 /* Prolog loop may be skipped. */
1686 bool skip_prolog = (prolog_peeling != 0);
1687 /* Skip to epilog if scalar loop may be preferred. It's only used when
1688 we peel for epilog loop. */
1689 bool skip_vector = (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo));
1690 /* Epilog loop must be executed if the number of iterations for epilog
1691 loop is known at compile time, otherwise we need to add a check at
1692 the end of vector loop and skip to the end of epilog loop. */
1693 bool skip_epilog = (prolog_peeling < 0
1694 || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo));
1695 /* PEELING_FOR_GAPS is special because epilog loop must be executed. */
1696 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1697 skip_epilog = false;
1699 /* Record the anchor bb at which guard should be placed if scalar loop
1700 may be preferred. */
1701 basic_block anchor = loop_preheader_edge (loop)->src;
1702 if (skip_vector)
1704 split_edge (loop_preheader_edge (loop));
1706 /* Due to the order in which we peel prolog and epilog, we first
1707 propagate probability to the whole loop. The purpose is to
1708 avoid adjusting probabilities of both prolog and vector loops
1709 separately. Note in this case, the probability of epilog loop
1710 needs to be scaled back later. */
1711 basic_block bb_before_loop = loop_preheader_edge (loop)->src;
1712 scale_bbs_frequencies_int (&bb_before_loop, 1, prob_vector,
1713 REG_BR_PROB_BASE);
1714 scale_loop_profile (loop, prob_vector, bound);
1717 tree niters_prolog = build_int_cst (type, 0);
1718 source_location loop_loc = find_loop_location (loop);
1719 struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
1720 if (prolog_peeling)
1722 e = loop_preheader_edge (loop);
1723 if (!slpeel_can_duplicate_loop_p (loop, e))
1725 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1726 "loop can't be duplicated to preheader edge.\n");
1727 gcc_unreachable ();
1729 /* Peel prolog and put it on preheader edge of loop. */
1730 prolog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
1731 if (!prolog)
1733 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1734 "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1735 gcc_unreachable ();
1737 slpeel_update_phi_nodes_for_loops (loop_vinfo, prolog, loop, true);
1738 first_loop = prolog;
1739 reset_original_copy_tables ();
1741 /* Generate and update the number of iterations for prolog loop. */
1742 niters_prolog = vect_gen_prolog_loop_niters (loop_vinfo, anchor,
1743 &bound_prolog);
1744 slpeel_make_loop_iterate_ntimes (prolog, niters_prolog);
1746 /* Skip the prolog loop. */
1747 if (skip_prolog)
1749 guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
1750 niters_prolog, build_int_cst (type, 0));
1751 guard_bb = loop_preheader_edge (prolog)->src;
1752 basic_block bb_after_prolog = loop_preheader_edge (loop)->src;
1753 guard_to = split_edge (loop_preheader_edge (loop));
1754 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
1755 guard_to, guard_bb,
1756 inverse_probability (prob_prolog),
1757 irred_flag);
1758 e = EDGE_PRED (guard_to, 0);
1759 e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
1760 slpeel_update_phi_nodes_for_guard1 (prolog, loop, guard_e, e);
1762 scale_bbs_frequencies_int (&bb_after_prolog, 1, prob_prolog,
1763 REG_BR_PROB_BASE);
1764 scale_loop_profile (prolog, prob_prolog, bound_prolog);
1766 /* Update init address of DRs. */
1767 vect_update_inits_of_drs (loop_vinfo, niters_prolog);
1768 /* Update niters for vector loop. */
1769 LOOP_VINFO_NITERS (loop_vinfo)
1770 = fold_build2 (MINUS_EXPR, type, niters, niters_prolog);
1771 LOOP_VINFO_NITERSM1 (loop_vinfo)
1772 = fold_build2 (MINUS_EXPR, type,
1773 LOOP_VINFO_NITERSM1 (loop_vinfo), niters_prolog);
1774 niters = vect_build_loop_niters (loop_vinfo);
1776 /* Prolog iterates at most bound_prolog times, latch iterates at
1777 most bound_prolog - 1 times. */
1778 record_niter_bound (prolog, bound_prolog - 1, false, true);
1779 delete_update_ssa ();
1780 adjust_vec_debug_stmts ();
1781 scev_reset ();
1784 if (epilog_peeling)
1786 e = single_exit (loop);
1787 if (!slpeel_can_duplicate_loop_p (loop, e))
1789 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1790 "loop can't be duplicated to exit edge.\n");
1791 gcc_unreachable ();
1793 /* Peel epilog and put it on exit edge of loop. */
1794 epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
1795 if (!epilog)
1797 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1798 "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1799 gcc_unreachable ();
1801 slpeel_update_phi_nodes_for_loops (loop_vinfo, loop, epilog, false);
1803 /* Scalar version loop may be preferred. In this case, add guard
1804 and skip to epilog. Note this only happens when the number of
1805 iterations of loop is unknown at compile time, otherwise this
1806 won't be vectorized. */
1807 if (skip_vector)
1809 /* Additional epilogue iteration is peeled if gap exists. */
1810 bool peel_for_gaps = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo);
1811 tree t = vect_gen_scalar_loop_niters (niters_prolog, prolog_peeling,
1812 bound_prolog,
1813 peel_for_gaps ? vf : vf - 1,
1814 th, &bound_scalar,
1815 check_profitability);
1816 /* Build guard against NITERSM1 since NITERS may overflow. */
1817 guard_cond = fold_build2 (LT_EXPR, boolean_type_node, nitersm1, t);
1818 guard_bb = anchor;
1819 guard_to = split_edge (loop_preheader_edge (epilog));
1820 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
1821 guard_to, guard_bb,
1822 inverse_probability (prob_vector),
1823 irred_flag);
1824 e = EDGE_PRED (guard_to, 0);
1825 e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
1826 slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e);
1828 /* Simply propagate profile info from guard_bb to guard_to which is
1829 a merge point of control flow. */
1830 guard_to->frequency = guard_bb->frequency;
1831 guard_to->count = guard_bb->count;
1832 single_succ_edge (guard_to)->count = guard_to->count;
1833 /* Scale probability of epilog loop back. */
1834 int scale_up = REG_BR_PROB_BASE * REG_BR_PROB_BASE / prob_vector;
1835 scale_loop_frequencies (epilog, scale_up, REG_BR_PROB_BASE);
1838 basic_block bb_before_epilog = loop_preheader_edge (epilog)->src;
1839 tree niters_vector_mult_vf;
1840 /* If loop is peeled for non-zero constant times, now niters refers to
1841 orig_niters - prolog_peeling, it won't overflow even the orig_niters
1842 overflows. */
1843 niters_no_overflow |= (prolog_peeling > 0);
1844 vect_gen_vector_loop_niters (loop_vinfo, niters,
1845 niters_vector, niters_no_overflow);
1846 vect_gen_vector_loop_niters_mult_vf (loop_vinfo, *niters_vector,
1847 &niters_vector_mult_vf);
1848 /* Update IVs of original loop as if they were advanced by
1849 niters_vector_mult_vf steps. */
1850 gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
1851 edge update_e = skip_vector ? e : loop_preheader_edge (epilog);
1852 vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf,
1853 update_e);
1855 if (skip_epilog)
1857 guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
1858 niters, niters_vector_mult_vf);
1859 guard_bb = single_exit (loop)->dest;
1860 guard_to = split_edge (single_exit (epilog));
1861 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond, guard_to,
1862 skip_vector ? anchor : guard_bb,
1863 inverse_probability (prob_epilog),
1864 irred_flag);
1865 slpeel_update_phi_nodes_for_guard2 (loop, epilog, guard_e,
1866 single_exit (epilog));
1867 /* Only need to handle basic block before epilog loop if it's not
1868 the guard_bb, which is the case when skip_vector is true. */
1869 if (guard_bb != bb_before_epilog)
1871 prob_epilog = (combine_probabilities (prob_vector, prob_epilog)
1872 + inverse_probability (prob_vector));
1874 scale_bbs_frequencies_int (&bb_before_epilog, 1, prob_epilog,
1875 REG_BR_PROB_BASE);
1877 scale_loop_profile (epilog, prob_epilog, bound);
1879 else
1880 slpeel_update_phi_nodes_for_lcssa (epilog);
1882 bound = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) ? vf - 1 : vf - 2;
1883 /* We share epilog loop with scalar version loop. */
1884 bound = MAX (bound, bound_scalar - 1);
1885 record_niter_bound (epilog, bound, false, true);
1887 delete_update_ssa ();
1888 adjust_vec_debug_stmts ();
1889 scev_reset ();
1891 adjust_vec.release ();
1892 free_original_copy_tables ();
1894 return epilog;
1897 /* Function vect_create_cond_for_niters_checks.
1899 Create a conditional expression that represents the run-time checks for
1900 loop's niter. The loop is guaranteed to to terminate if the run-time
1901 checks hold.
1903 Input:
1904 COND_EXPR - input conditional expression. New conditions will be chained
1905 with logical AND operation. If it is NULL, then the function
1906 is used to return the number of alias checks.
1907 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
1908 to be checked.
1910 Output:
1911 COND_EXPR - conditional expression.
1913 The returned COND_EXPR is the conditional expression to be used in the
1914 if statement that controls which version of the loop gets executed at
1915 runtime. */
1917 static void
1918 vect_create_cond_for_niters_checks (loop_vec_info loop_vinfo, tree *cond_expr)
1920 tree part_cond_expr = LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo);
1922 if (*cond_expr)
1923 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1924 *cond_expr, part_cond_expr);
1925 else
1926 *cond_expr = part_cond_expr;
1929 /* Function vect_create_cond_for_align_checks.
1931 Create a conditional expression that represents the alignment checks for
1932 all of data references (array element references) whose alignment must be
1933 checked at runtime.
1935 Input:
1936 COND_EXPR - input conditional expression. New conditions will be chained
1937 with logical AND operation.
1938 LOOP_VINFO - two fields of the loop information are used.
1939 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
1940 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
1942 Output:
1943 COND_EXPR_STMT_LIST - statements needed to construct the conditional
1944 expression.
1945 The returned value is the conditional expression to be used in the if
1946 statement that controls which version of the loop gets executed at runtime.
1948 The algorithm makes two assumptions:
1949 1) The number of bytes "n" in a vector is a power of 2.
1950 2) An address "a" is aligned if a%n is zero and that this
1951 test can be done as a&(n-1) == 0. For example, for 16
1952 byte vectors the test is a&0xf == 0. */
1954 static void
1955 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
1956 tree *cond_expr,
1957 gimple_seq *cond_expr_stmt_list)
1959 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1960 vec<gimple *> may_misalign_stmts
1961 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1962 gimple *ref_stmt;
1963 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
1964 tree mask_cst;
1965 unsigned int i;
1966 tree int_ptrsize_type;
1967 char tmp_name[20];
1968 tree or_tmp_name = NULL_TREE;
1969 tree and_tmp_name;
1970 gimple *and_stmt;
1971 tree ptrsize_zero;
1972 tree part_cond_expr;
1974 /* Check that mask is one less than a power of 2, i.e., mask is
1975 all zeros followed by all ones. */
1976 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
1978 int_ptrsize_type = signed_type_for (ptr_type_node);
1980 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
1981 of the first vector of the i'th data reference. */
1983 FOR_EACH_VEC_ELT (may_misalign_stmts, i, ref_stmt)
1985 gimple_seq new_stmt_list = NULL;
1986 tree addr_base;
1987 tree addr_tmp_name;
1988 tree new_or_tmp_name;
1989 gimple *addr_stmt, *or_stmt;
1990 stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
1991 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1992 bool negative = tree_int_cst_compare
1993 (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
1994 tree offset = negative
1995 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
1997 /* create: addr_tmp = (int)(address_of_first_vector) */
1998 addr_base =
1999 vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
2000 offset, loop);
2001 if (new_stmt_list != NULL)
2002 gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
2004 sprintf (tmp_name, "addr2int%d", i);
2005 addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2006 addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base);
2007 gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
2009 /* The addresses are OR together. */
2011 if (or_tmp_name != NULL_TREE)
2013 /* create: or_tmp = or_tmp | addr_tmp */
2014 sprintf (tmp_name, "orptrs%d", i);
2015 new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2016 or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
2017 or_tmp_name, addr_tmp_name);
2018 gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
2019 or_tmp_name = new_or_tmp_name;
2021 else
2022 or_tmp_name = addr_tmp_name;
2024 } /* end for i */
2026 mask_cst = build_int_cst (int_ptrsize_type, mask);
2028 /* create: and_tmp = or_tmp & mask */
2029 and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
2031 and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR,
2032 or_tmp_name, mask_cst);
2033 gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
2035 /* Make and_tmp the left operand of the conditional test against zero.
2036 if and_tmp has a nonzero bit then some address is unaligned. */
2037 ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2038 part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
2039 and_tmp_name, ptrsize_zero);
2040 if (*cond_expr)
2041 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2042 *cond_expr, part_cond_expr);
2043 else
2044 *cond_expr = part_cond_expr;
2047 /* Given LOOP's two data references and segment lengths described by DR_A
2048 and DR_B, create expression checking if the two addresses ranges intersect
2049 with each other based on index of the two addresses. This can only be
2050 done if DR_A and DR_B referring to the same (array) object and the index
2051 is the only difference. For example:
2053 DR_A DR_B
2054 data-ref arr[i] arr[j]
2055 base_object arr arr
2056 index {i_0, +, 1}_loop {j_0, +, 1}_loop
2058 The addresses and their index are like:
2060 |<- ADDR_A ->| |<- ADDR_B ->|
2061 ------------------------------------------------------->
2062 | | | | | | | | | |
2063 ------------------------------------------------------->
2064 i_0 ... i_0+4 j_0 ... j_0+4
2066 We can create expression based on index rather than address:
2068 (i_0 + 4 < j_0 || j_0 + 4 < i_0)
2070 Note evolution step of index needs to be considered in comparison. */
2072 static bool
2073 create_intersect_range_checks_index (struct loop *loop, tree *cond_expr,
2074 const dr_with_seg_len& dr_a,
2075 const dr_with_seg_len& dr_b)
2077 if (integer_zerop (DR_STEP (dr_a.dr))
2078 || integer_zerop (DR_STEP (dr_b.dr))
2079 || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
2080 return false;
2082 if (!tree_fits_uhwi_p (dr_a.seg_len) || !tree_fits_uhwi_p (dr_b.seg_len))
2083 return false;
2085 if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
2086 return false;
2088 if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
2089 return false;
2091 if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
2092 return false;
2094 gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
2096 bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
2097 unsigned HOST_WIDE_INT abs_step
2098 = absu_hwi (tree_to_shwi (DR_STEP (dr_a.dr)));
2100 unsigned HOST_WIDE_INT seg_len1 = tree_to_uhwi (dr_a.seg_len);
2101 unsigned HOST_WIDE_INT seg_len2 = tree_to_uhwi (dr_b.seg_len);
2102 /* Infer the number of iterations with which the memory segment is accessed
2103 by DR. In other words, alias is checked if memory segment accessed by
2104 DR_A in some iterations intersect with memory segment accessed by DR_B
2105 in the same amount iterations.
2106 Note segnment length is a linear function of number of iterations with
2107 DR_STEP as the coefficient. */
2108 unsigned HOST_WIDE_INT niter_len1 = (seg_len1 + abs_step - 1) / abs_step;
2109 unsigned HOST_WIDE_INT niter_len2 = (seg_len2 + abs_step - 1) / abs_step;
2111 unsigned int i;
2112 for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
2114 tree access1 = DR_ACCESS_FN (dr_a.dr, i);
2115 tree access2 = DR_ACCESS_FN (dr_b.dr, i);
2116 /* Two indices must be the same if they are not scev, or not scev wrto
2117 current loop being vecorized. */
2118 if (TREE_CODE (access1) != POLYNOMIAL_CHREC
2119 || TREE_CODE (access2) != POLYNOMIAL_CHREC
2120 || CHREC_VARIABLE (access1) != (unsigned)loop->num
2121 || CHREC_VARIABLE (access2) != (unsigned)loop->num)
2123 if (operand_equal_p (access1, access2, 0))
2124 continue;
2126 return false;
2128 /* The two indices must have the same step. */
2129 if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
2130 return false;
2132 tree idx_step = CHREC_RIGHT (access1);
2133 /* Index must have const step, otherwise DR_STEP won't be constant. */
2134 gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
2135 /* Index must evaluate in the same direction as DR. */
2136 gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
2138 tree min1 = CHREC_LEFT (access1);
2139 tree min2 = CHREC_LEFT (access2);
2140 if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
2141 return false;
2143 /* Ideally, alias can be checked against loop's control IV, but we
2144 need to prove linear mapping between control IV and reference
2145 index. Although that should be true, we check against (array)
2146 index of data reference. Like segment length, index length is
2147 linear function of the number of iterations with index_step as
2148 the coefficient, i.e, niter_len * idx_step. */
2149 tree idx_len1 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
2150 build_int_cst (TREE_TYPE (min1),
2151 niter_len1));
2152 tree idx_len2 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
2153 build_int_cst (TREE_TYPE (min2),
2154 niter_len2));
2155 tree max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1), min1, idx_len1);
2156 tree max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2), min2, idx_len2);
2157 /* Adjust ranges for negative step. */
2158 if (neg_step)
2160 min1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1), max1, idx_step);
2161 max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1),
2162 CHREC_LEFT (access1), idx_step);
2163 min2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2), max2, idx_step);
2164 max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2),
2165 CHREC_LEFT (access2), idx_step);
2167 tree part_cond_expr
2168 = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2169 fold_build2 (LE_EXPR, boolean_type_node, max1, min2),
2170 fold_build2 (LE_EXPR, boolean_type_node, max2, min1));
2171 if (*cond_expr)
2172 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2173 *cond_expr, part_cond_expr);
2174 else
2175 *cond_expr = part_cond_expr;
2177 return true;
2180 /* Given two data references and segment lengths described by DR_A and DR_B,
2181 create expression checking if the two addresses ranges intersect with
2182 each other:
2184 ((DR_A_addr_0 + DR_A_segment_length_0) <= DR_B_addr_0)
2185 || (DR_B_addr_0 + DER_B_segment_length_0) <= DR_A_addr_0)) */
2187 static void
2188 create_intersect_range_checks (struct loop *loop, tree *cond_expr,
2189 const dr_with_seg_len& dr_a,
2190 const dr_with_seg_len& dr_b)
2192 *cond_expr = NULL_TREE;
2193 if (create_intersect_range_checks_index (loop, cond_expr, dr_a, dr_b))
2194 return;
2196 tree segment_length_a = dr_a.seg_len;
2197 tree segment_length_b = dr_b.seg_len;
2198 tree addr_base_a = DR_BASE_ADDRESS (dr_a.dr);
2199 tree addr_base_b = DR_BASE_ADDRESS (dr_b.dr);
2200 tree offset_a = DR_OFFSET (dr_a.dr), offset_b = DR_OFFSET (dr_b.dr);
2202 offset_a = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_a),
2203 offset_a, DR_INIT (dr_a.dr));
2204 offset_b = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_b),
2205 offset_b, DR_INIT (dr_b.dr));
2206 addr_base_a = fold_build_pointer_plus (addr_base_a, offset_a);
2207 addr_base_b = fold_build_pointer_plus (addr_base_b, offset_b);
2209 tree seg_a_min = addr_base_a;
2210 tree seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
2211 /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
2212 bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
2213 [a, a+12) */
2214 if (tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0)
2216 tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a.dr)));
2217 seg_a_min = fold_build_pointer_plus (seg_a_max, unit_size);
2218 seg_a_max = fold_build_pointer_plus (addr_base_a, unit_size);
2221 tree seg_b_min = addr_base_b;
2222 tree seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
2223 if (tree_int_cst_compare (DR_STEP (dr_b.dr), size_zero_node) < 0)
2225 tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b.dr)));
2226 seg_b_min = fold_build_pointer_plus (seg_b_max, unit_size);
2227 seg_b_max = fold_build_pointer_plus (addr_base_b, unit_size);
2229 *cond_expr
2230 = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2231 fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
2232 fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
2235 /* Function vect_create_cond_for_alias_checks.
2237 Create a conditional expression that represents the run-time checks for
2238 overlapping of address ranges represented by a list of data references
2239 relations passed as input.
2241 Input:
2242 COND_EXPR - input conditional expression. New conditions will be chained
2243 with logical AND operation. If it is NULL, then the function
2244 is used to return the number of alias checks.
2245 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2246 to be checked.
2248 Output:
2249 COND_EXPR - conditional expression.
2251 The returned COND_EXPR is the conditional expression to be used in the if
2252 statement that controls which version of the loop gets executed at runtime.
2255 void
2256 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
2258 vec<dr_with_seg_len_pair_t> comp_alias_ddrs =
2259 LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
2260 tree part_cond_expr;
2262 if (comp_alias_ddrs.is_empty ())
2263 return;
2265 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2266 for (size_t i = 0, s = comp_alias_ddrs.length (); i < s; ++i)
2268 const dr_with_seg_len& dr_a = comp_alias_ddrs[i].first;
2269 const dr_with_seg_len& dr_b = comp_alias_ddrs[i].second;
2271 if (dump_enabled_p ())
2273 dump_printf_loc (MSG_NOTE, vect_location,
2274 "create runtime check for data references ");
2275 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a.dr));
2276 dump_printf (MSG_NOTE, " and ");
2277 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b.dr));
2278 dump_printf (MSG_NOTE, "\n");
2281 /* Create condition expression for each pair data references. */
2282 create_intersect_range_checks (loop, &part_cond_expr, dr_a, dr_b);
2283 if (*cond_expr)
2284 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2285 *cond_expr, part_cond_expr);
2286 else
2287 *cond_expr = part_cond_expr;
2290 if (dump_enabled_p ())
2291 dump_printf_loc (MSG_NOTE, vect_location,
2292 "created %u versioning for alias checks.\n",
2293 comp_alias_ddrs.length ());
2297 /* Function vect_loop_versioning.
2299 If the loop has data references that may or may not be aligned or/and
2300 has data reference relations whose independence was not proven then
2301 two versions of the loop need to be generated, one which is vectorized
2302 and one which isn't. A test is then generated to control which of the
2303 loops is executed. The test checks for the alignment of all of the
2304 data references that may or may not be aligned. An additional
2305 sequence of runtime tests is generated for each pairs of DDRs whose
2306 independence was not proven. The vectorized version of loop is
2307 executed only if both alias and alignment tests are passed.
2309 The test generated to check which version of loop is executed
2310 is modified to also check for profitability as indicated by the
2311 cost model threshold TH.
2313 The versioning precondition(s) are placed in *COND_EXPR and
2314 *COND_EXPR_STMT_LIST. */
2316 void
2317 vect_loop_versioning (loop_vec_info loop_vinfo,
2318 unsigned int th, bool check_profitability)
2320 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *nloop;
2321 struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
2322 basic_block condition_bb;
2323 gphi_iterator gsi;
2324 gimple_stmt_iterator cond_exp_gsi;
2325 basic_block merge_bb;
2326 basic_block new_exit_bb;
2327 edge new_exit_e, e;
2328 gphi *orig_phi, *new_phi;
2329 tree cond_expr = NULL_TREE;
2330 gimple_seq cond_expr_stmt_list = NULL;
2331 tree arg;
2332 unsigned prob = 4 * REG_BR_PROB_BASE / 5;
2333 gimple_seq gimplify_stmt_list = NULL;
2334 tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2335 bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
2336 bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
2337 bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo);
2339 if (check_profitability)
2340 cond_expr = fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
2341 build_int_cst (TREE_TYPE (scalar_loop_iters),
2342 th));
2344 if (version_niter)
2345 vect_create_cond_for_niters_checks (loop_vinfo, &cond_expr);
2347 if (cond_expr)
2348 cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
2349 is_gimple_condexpr, NULL_TREE);
2351 if (version_align)
2352 vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
2353 &cond_expr_stmt_list);
2355 if (version_alias)
2356 vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
2358 cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,
2359 is_gimple_condexpr, NULL_TREE);
2360 gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
2362 initialize_original_copy_tables ();
2363 if (scalar_loop)
2365 edge scalar_e;
2366 basic_block preheader, scalar_preheader;
2368 /* We don't want to scale SCALAR_LOOP's frequencies, we need to
2369 scale LOOP's frequencies instead. */
2370 nloop = loop_version (scalar_loop, cond_expr, &condition_bb,
2371 prob, REG_BR_PROB_BASE - prob,
2372 REG_BR_PROB_BASE, REG_BR_PROB_BASE - prob, true);
2373 scale_loop_frequencies (loop, prob, REG_BR_PROB_BASE);
2374 /* CONDITION_BB was created above SCALAR_LOOP's preheader,
2375 while we need to move it above LOOP's preheader. */
2376 e = loop_preheader_edge (loop);
2377 scalar_e = loop_preheader_edge (scalar_loop);
2378 gcc_assert (empty_block_p (e->src)
2379 && single_pred_p (e->src));
2380 gcc_assert (empty_block_p (scalar_e->src)
2381 && single_pred_p (scalar_e->src));
2382 gcc_assert (single_pred_p (condition_bb));
2383 preheader = e->src;
2384 scalar_preheader = scalar_e->src;
2385 scalar_e = find_edge (condition_bb, scalar_preheader);
2386 e = single_pred_edge (preheader);
2387 redirect_edge_and_branch_force (single_pred_edge (condition_bb),
2388 scalar_preheader);
2389 redirect_edge_and_branch_force (scalar_e, preheader);
2390 redirect_edge_and_branch_force (e, condition_bb);
2391 set_immediate_dominator (CDI_DOMINATORS, condition_bb,
2392 single_pred (condition_bb));
2393 set_immediate_dominator (CDI_DOMINATORS, scalar_preheader,
2394 single_pred (scalar_preheader));
2395 set_immediate_dominator (CDI_DOMINATORS, preheader,
2396 condition_bb);
2398 else
2399 nloop = loop_version (loop, cond_expr, &condition_bb,
2400 prob, REG_BR_PROB_BASE - prob,
2401 prob, REG_BR_PROB_BASE - prob, true);
2403 if (version_niter)
2405 /* The versioned loop could be infinite, we need to clear existing
2406 niter information which is copied from the original loop. */
2407 gcc_assert (loop_constraint_set_p (loop, LOOP_C_FINITE));
2408 vect_free_loop_info_assumptions (nloop);
2409 /* And set constraint LOOP_C_INFINITE for niter analyzer. */
2410 loop_constraint_set (loop, LOOP_C_INFINITE);
2413 if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
2414 && dump_enabled_p ())
2416 if (version_alias)
2417 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2418 "loop versioned for vectorization because of "
2419 "possible aliasing\n");
2420 if (version_align)
2421 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2422 "loop versioned for vectorization to enhance "
2423 "alignment\n");
2426 free_original_copy_tables ();
2428 /* Loop versioning violates an assumption we try to maintain during
2429 vectorization - that the loop exit block has a single predecessor.
2430 After versioning, the exit block of both loop versions is the same
2431 basic block (i.e. it has two predecessors). Just in order to simplify
2432 following transformations in the vectorizer, we fix this situation
2433 here by adding a new (empty) block on the exit-edge of the loop,
2434 with the proper loop-exit phis to maintain loop-closed-form.
2435 If loop versioning wasn't done from loop, but scalar_loop instead,
2436 merge_bb will have already just a single successor. */
2438 merge_bb = single_exit (loop)->dest;
2439 if (scalar_loop == NULL || EDGE_COUNT (merge_bb->preds) >= 2)
2441 gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
2442 new_exit_bb = split_edge (single_exit (loop));
2443 new_exit_e = single_exit (loop);
2444 e = EDGE_SUCC (new_exit_bb, 0);
2446 for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2448 tree new_res;
2449 orig_phi = gsi.phi ();
2450 new_res = copy_ssa_name (PHI_RESULT (orig_phi));
2451 new_phi = create_phi_node (new_res, new_exit_bb);
2452 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2453 add_phi_arg (new_phi, arg, new_exit_e,
2454 gimple_phi_arg_location_from_edge (orig_phi, e));
2455 adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
2459 /* End loop-exit-fixes after versioning. */
2461 if (cond_expr_stmt_list)
2463 cond_exp_gsi = gsi_last_bb (condition_bb);
2464 gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
2465 GSI_SAME_STMT);
2467 update_ssa (TODO_update_ssa);