[ARM] PR target/71436: Restrict *load_multiple pattern till after LRA
[official-gcc.git] / gcc / tree-vect-loop-manip.c
blob2f82061afa9720831d5742c386627cfccfe06f39
1 /* Vectorizer Specific Loop Manipulations
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "cfghooks.h"
29 #include "tree-pass.h"
30 #include "ssa.h"
31 #include "fold-const.h"
32 #include "cfganal.h"
33 #include "gimplify.h"
34 #include "gimple-iterator.h"
35 #include "gimplify-me.h"
36 #include "tree-cfg.h"
37 #include "tree-ssa-loop-manip.h"
38 #include "tree-into-ssa.h"
39 #include "tree-ssa.h"
40 #include "cfgloop.h"
41 #include "tree-scalar-evolution.h"
42 #include "tree-vectorizer.h"
43 #include "tree-ssa-loop-ivopts.h"
45 /*************************************************************************
46 Simple Loop Peeling Utilities
48 Utilities to support loop peeling for vectorization purposes.
49 *************************************************************************/
52 /* Renames the use *OP_P. */
54 static void
55 rename_use_op (use_operand_p op_p)
57 tree new_name;
59 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
60 return;
62 new_name = get_current_def (USE_FROM_PTR (op_p));
64 /* Something defined outside of the loop. */
65 if (!new_name)
66 return;
68 /* An ordinary ssa name defined in the loop. */
70 SET_USE (op_p, new_name);
74 /* Renames the variables in basic block BB. Allow renaming of PHI arguments
75 on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
76 true. */
78 static void
79 rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop)
81 gimple *stmt;
82 use_operand_p use_p;
83 ssa_op_iter iter;
84 edge e;
85 edge_iterator ei;
86 struct loop *loop = bb->loop_father;
87 struct loop *outer_loop = NULL;
89 if (rename_from_outer_loop)
91 gcc_assert (loop);
92 outer_loop = loop_outer (loop);
95 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
96 gsi_next (&gsi))
98 stmt = gsi_stmt (gsi);
99 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
100 rename_use_op (use_p);
103 FOR_EACH_EDGE (e, ei, bb->preds)
105 if (!flow_bb_inside_loop_p (loop, e->src))
107 if (!rename_from_outer_loop)
108 continue;
109 if (e->src != outer_loop->header)
111 if (outer_loop->inner->next)
113 /* If outer_loop has 2 inner loops, allow there to
114 be an extra basic block which decides which of the
115 two loops to use using LOOP_VECTORIZED. */
116 if (!single_pred_p (e->src)
117 || single_pred (e->src) != outer_loop->header)
118 continue;
120 else
121 continue;
124 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
125 gsi_next (&gsi))
126 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
131 struct adjust_info
133 tree from, to;
134 basic_block bb;
137 /* A stack of values to be adjusted in debug stmts. We have to
138 process them LIFO, so that the closest substitution applies. If we
139 processed them FIFO, without the stack, we might substitute uses
140 with a PHI DEF that would soon become non-dominant, and when we got
141 to the suitable one, it wouldn't have anything to substitute any
142 more. */
143 static vec<adjust_info, va_heap> adjust_vec;
145 /* Adjust any debug stmts that referenced AI->from values to use the
146 loop-closed AI->to, if the references are dominated by AI->bb and
147 not by the definition of AI->from. */
149 static void
150 adjust_debug_stmts_now (adjust_info *ai)
152 basic_block bbphi = ai->bb;
153 tree orig_def = ai->from;
154 tree new_def = ai->to;
155 imm_use_iterator imm_iter;
156 gimple *stmt;
157 basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
159 gcc_assert (dom_info_available_p (CDI_DOMINATORS));
161 /* Adjust any debug stmts that held onto non-loop-closed
162 references. */
163 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
165 use_operand_p use_p;
166 basic_block bbuse;
168 if (!is_gimple_debug (stmt))
169 continue;
171 gcc_assert (gimple_debug_bind_p (stmt));
173 bbuse = gimple_bb (stmt);
175 if ((bbuse == bbphi
176 || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
177 && !(bbuse == bbdef
178 || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
180 if (new_def)
181 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
182 SET_USE (use_p, new_def);
183 else
185 gimple_debug_bind_reset_value (stmt);
186 update_stmt (stmt);
192 /* Adjust debug stmts as scheduled before. */
194 static void
195 adjust_vec_debug_stmts (void)
197 if (!MAY_HAVE_DEBUG_STMTS)
198 return;
200 gcc_assert (adjust_vec.exists ());
202 while (!adjust_vec.is_empty ())
204 adjust_debug_stmts_now (&adjust_vec.last ());
205 adjust_vec.pop ();
209 /* Adjust any debug stmts that referenced FROM values to use the
210 loop-closed TO, if the references are dominated by BB and not by
211 the definition of FROM. If adjust_vec is non-NULL, adjustments
212 will be postponed until adjust_vec_debug_stmts is called. */
214 static void
215 adjust_debug_stmts (tree from, tree to, basic_block bb)
217 adjust_info ai;
219 if (MAY_HAVE_DEBUG_STMTS
220 && TREE_CODE (from) == SSA_NAME
221 && ! SSA_NAME_IS_DEFAULT_DEF (from)
222 && ! virtual_operand_p (from))
224 ai.from = from;
225 ai.to = to;
226 ai.bb = bb;
228 if (adjust_vec.exists ())
229 adjust_vec.safe_push (ai);
230 else
231 adjust_debug_stmts_now (&ai);
235 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
236 to adjust any debug stmts that referenced the old phi arg,
237 presumably non-loop-closed references left over from other
238 transformations. */
240 static void
241 adjust_phi_and_debug_stmts (gimple *update_phi, edge e, tree new_def)
243 tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
245 SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
247 if (MAY_HAVE_DEBUG_STMTS)
248 adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
249 gimple_bb (update_phi));
252 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
253 that starts at zero, increases by one and its limit is NITERS.
255 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
257 void
258 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
260 tree indx_before_incr, indx_after_incr;
261 gcond *cond_stmt;
262 gcond *orig_cond;
263 edge exit_edge = single_exit (loop);
264 gimple_stmt_iterator loop_cond_gsi;
265 gimple_stmt_iterator incr_gsi;
266 bool insert_after;
267 tree init = build_int_cst (TREE_TYPE (niters), 0);
268 tree step = build_int_cst (TREE_TYPE (niters), 1);
269 source_location loop_loc;
270 enum tree_code code;
272 orig_cond = get_loop_exit_condition (loop);
273 gcc_assert (orig_cond);
274 loop_cond_gsi = gsi_for_stmt (orig_cond);
276 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
277 create_iv (init, step, NULL_TREE, loop,
278 &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
280 indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
281 true, NULL_TREE, true,
282 GSI_SAME_STMT);
283 niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
284 true, GSI_SAME_STMT);
286 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
287 cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
288 NULL_TREE);
290 gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
292 /* Remove old loop exit test: */
293 gsi_remove (&loop_cond_gsi, true);
294 free_stmt_vec_info (orig_cond);
296 loop_loc = find_loop_location (loop);
297 if (dump_enabled_p ())
299 if (LOCATION_LOCUS (loop_loc) != UNKNOWN_LOCATION)
300 dump_printf (MSG_NOTE, "\nloop at %s:%d: ", LOCATION_FILE (loop_loc),
301 LOCATION_LINE (loop_loc));
302 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, cond_stmt, 0);
305 /* Record the number of latch iterations. */
306 loop->nb_iterations = fold_build2 (MINUS_EXPR, TREE_TYPE (niters), niters,
307 build_int_cst (TREE_TYPE (niters), 1));
310 /* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg.
311 For all PHI arguments in FROM->dest and TO->dest from those
312 edges ensure that TO->dest PHI arguments have current_def
313 to that in from. */
315 static void
316 slpeel_duplicate_current_defs_from_edges (edge from, edge to)
318 gimple_stmt_iterator gsi_from, gsi_to;
320 for (gsi_from = gsi_start_phis (from->dest),
321 gsi_to = gsi_start_phis (to->dest);
322 !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);)
324 gimple *from_phi = gsi_stmt (gsi_from);
325 gimple *to_phi = gsi_stmt (gsi_to);
326 tree from_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, from);
327 tree to_arg = PHI_ARG_DEF_FROM_EDGE (to_phi, to);
328 if (virtual_operand_p (from_arg))
330 gsi_next (&gsi_from);
331 continue;
333 if (virtual_operand_p (to_arg))
335 gsi_next (&gsi_to);
336 continue;
338 if (TREE_CODE (from_arg) != SSA_NAME)
339 gcc_assert (operand_equal_p (from_arg, to_arg, 0));
340 else
342 if (get_current_def (to_arg) == NULL_TREE)
343 set_current_def (to_arg, get_current_def (from_arg));
345 gsi_next (&gsi_from);
346 gsi_next (&gsi_to);
349 gphi *from_phi = get_virtual_phi (from->dest);
350 gphi *to_phi = get_virtual_phi (to->dest);
351 if (from_phi)
352 set_current_def (PHI_ARG_DEF_FROM_EDGE (to_phi, to),
353 get_current_def (PHI_ARG_DEF_FROM_EDGE (from_phi, from)));
357 /* Given LOOP this function generates a new copy of it and puts it
358 on E which is either the entry or exit of LOOP. If SCALAR_LOOP is
359 non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
360 basic blocks from SCALAR_LOOP instead of LOOP, but to either the
361 entry or exit of LOOP. */
363 struct loop *
364 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop,
365 struct loop *scalar_loop, edge e)
367 struct loop *new_loop;
368 basic_block *new_bbs, *bbs, *pbbs;
369 bool at_exit;
370 bool was_imm_dom;
371 basic_block exit_dest;
372 edge exit, new_exit;
373 bool duplicate_outer_loop = false;
375 exit = single_exit (loop);
376 at_exit = (e == exit);
377 if (!at_exit && e != loop_preheader_edge (loop))
378 return NULL;
380 if (scalar_loop == NULL)
381 scalar_loop = loop;
383 bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
384 pbbs = bbs + 1;
385 get_loop_body_with_size (scalar_loop, pbbs, scalar_loop->num_nodes);
386 /* Allow duplication of outer loops. */
387 if (scalar_loop->inner)
388 duplicate_outer_loop = true;
389 /* Check whether duplication is possible. */
390 if (!can_copy_bbs_p (pbbs, scalar_loop->num_nodes))
392 free (bbs);
393 return NULL;
396 /* Generate new loop structure. */
397 new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
398 duplicate_subloops (scalar_loop, new_loop);
400 exit_dest = exit->dest;
401 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
402 exit_dest) == loop->header ?
403 true : false);
405 /* Also copy the pre-header, this avoids jumping through hoops to
406 duplicate the loop entry PHI arguments. Create an empty
407 pre-header unconditionally for this. */
408 basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
409 edge entry_e = single_pred_edge (preheader);
410 bbs[0] = preheader;
411 new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
413 exit = single_exit (scalar_loop);
414 copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
415 &exit, 1, &new_exit, NULL,
416 at_exit ? loop->latch : e->src, true);
417 exit = single_exit (loop);
418 basic_block new_preheader = new_bbs[0];
420 add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
422 if (scalar_loop != loop)
424 /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
425 SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop,
426 but LOOP will not. slpeel_update_phi_nodes_for_guard{1,2} expects
427 the LOOP SSA_NAMEs (on the exit edge and edge from latch to
428 header) to have current_def set, so copy them over. */
429 slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop),
430 exit);
431 slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop->latch,
433 EDGE_SUCC (loop->latch, 0));
436 if (at_exit) /* Add the loop copy at exit. */
438 if (scalar_loop != loop)
440 gphi_iterator gsi;
441 new_exit = redirect_edge_and_branch (new_exit, exit_dest);
443 for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi);
444 gsi_next (&gsi))
446 gphi *phi = gsi.phi ();
447 tree orig_arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
448 location_t orig_locus
449 = gimple_phi_arg_location_from_edge (phi, e);
451 add_phi_arg (phi, orig_arg, new_exit, orig_locus);
454 redirect_edge_and_branch_force (e, new_preheader);
455 flush_pending_stmts (e);
456 set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
457 if (was_imm_dom || duplicate_outer_loop)
458 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
460 /* And remove the non-necessary forwarder again. Keep the other
461 one so we have a proper pre-header for the loop at the exit edge. */
462 redirect_edge_pred (single_succ_edge (preheader),
463 single_pred (preheader));
464 delete_basic_block (preheader);
465 set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
466 loop_preheader_edge (scalar_loop)->src);
468 else /* Add the copy at entry. */
470 if (scalar_loop != loop)
472 /* Remove the non-necessary forwarder of scalar_loop again. */
473 redirect_edge_pred (single_succ_edge (preheader),
474 single_pred (preheader));
475 delete_basic_block (preheader);
476 set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
477 loop_preheader_edge (scalar_loop)->src);
478 preheader = split_edge (loop_preheader_edge (loop));
479 entry_e = single_pred_edge (preheader);
482 redirect_edge_and_branch_force (entry_e, new_preheader);
483 flush_pending_stmts (entry_e);
484 set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
486 redirect_edge_and_branch_force (new_exit, preheader);
487 flush_pending_stmts (new_exit);
488 set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
490 /* And remove the non-necessary forwarder again. Keep the other
491 one so we have a proper pre-header for the loop at the exit edge. */
492 redirect_edge_pred (single_succ_edge (new_preheader),
493 single_pred (new_preheader));
494 delete_basic_block (new_preheader);
495 set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
496 loop_preheader_edge (new_loop)->src);
499 for (unsigned i = 0; i < scalar_loop->num_nodes + 1; i++)
500 rename_variables_in_bb (new_bbs[i], duplicate_outer_loop);
502 if (scalar_loop != loop)
504 /* Update new_loop->header PHIs, so that on the preheader
505 edge they are the ones from loop rather than scalar_loop. */
506 gphi_iterator gsi_orig, gsi_new;
507 edge orig_e = loop_preheader_edge (loop);
508 edge new_e = loop_preheader_edge (new_loop);
510 for (gsi_orig = gsi_start_phis (loop->header),
511 gsi_new = gsi_start_phis (new_loop->header);
512 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_new);
513 gsi_next (&gsi_orig), gsi_next (&gsi_new))
515 gphi *orig_phi = gsi_orig.phi ();
516 gphi *new_phi = gsi_new.phi ();
517 tree orig_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
518 location_t orig_locus
519 = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
521 add_phi_arg (new_phi, orig_arg, new_e, orig_locus);
525 free (new_bbs);
526 free (bbs);
528 checking_verify_dominators (CDI_DOMINATORS);
530 return new_loop;
534 /* Given the condition expression COND, put it as the last statement of
535 GUARD_BB; set both edges' probability; set dominator of GUARD_TO to
536 DOM_BB; return the skip edge. GUARD_TO is the target basic block to
537 skip the loop. PROBABILITY is the skip edge's probability. */
539 static edge
540 slpeel_add_loop_guard (basic_block guard_bb, tree cond,
541 basic_block guard_to, basic_block dom_bb,
542 int probability)
544 gimple_stmt_iterator gsi;
545 edge new_e, enter_e;
546 gcond *cond_stmt;
547 gimple_seq gimplify_stmt_list = NULL;
549 enter_e = EDGE_SUCC (guard_bb, 0);
550 enter_e->flags &= ~EDGE_FALLTHRU;
551 enter_e->flags |= EDGE_FALSE_VALUE;
552 gsi = gsi_last_bb (guard_bb);
554 cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
555 NULL_TREE);
556 if (gimplify_stmt_list)
557 gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
559 cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
560 gsi = gsi_last_bb (guard_bb);
561 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
563 /* Add new edge to connect guard block to the merge/loop-exit block. */
564 new_e = make_edge (guard_bb, guard_to, EDGE_TRUE_VALUE);
566 new_e->count = guard_bb->count;
567 new_e->probability = probability;
568 new_e->count = apply_probability (enter_e->count, probability);
569 enter_e->count -= new_e->count;
570 enter_e->probability = inverse_probability (probability);
571 set_immediate_dominator (CDI_DOMINATORS, guard_to, dom_bb);
573 /* Split enter_e to preserve LOOPS_HAVE_PREHEADERS. */
574 if (enter_e->dest->loop_father->header == enter_e->dest)
575 split_edge (enter_e);
577 return new_e;
581 /* This function verifies that the following restrictions apply to LOOP:
582 (1) it consists of exactly 2 basic blocks - header, and an empty latch
583 for innermost loop and 5 basic blocks for outer-loop.
584 (2) it is single entry, single exit
585 (3) its exit condition is the last stmt in the header
586 (4) E is the entry/exit edge of LOOP.
589 bool
590 slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
592 edge exit_e = single_exit (loop);
593 edge entry_e = loop_preheader_edge (loop);
594 gcond *orig_cond = get_loop_exit_condition (loop);
595 gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
596 unsigned int num_bb = loop->inner? 5 : 2;
598 /* All loops have an outer scope; the only case loop->outer is NULL is for
599 the function itself. */
600 if (!loop_outer (loop)
601 || loop->num_nodes != num_bb
602 || !empty_block_p (loop->latch)
603 || !single_exit (loop)
604 /* Verify that new loop exit condition can be trivially modified. */
605 || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
606 || (e != exit_e && e != entry_e))
607 return false;
609 return true;
612 /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
613 in the exit bb and rename all the uses after the loop. This simplifies
614 the *guard[12] routines, which assume loop closed SSA form for all PHIs
615 (but normally loop closed SSA form doesn't require virtual PHIs to be
616 in the same form). Doing this early simplifies the checking what
617 uses should be renamed. */
619 static void
620 create_lcssa_for_virtual_phi (struct loop *loop)
622 gphi_iterator gsi;
623 edge exit_e = single_exit (loop);
625 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
626 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
628 gphi *phi = gsi.phi ();
629 for (gsi = gsi_start_phis (exit_e->dest);
630 !gsi_end_p (gsi); gsi_next (&gsi))
631 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
632 break;
633 if (gsi_end_p (gsi))
635 tree new_vop = copy_ssa_name (PHI_RESULT (phi));
636 gphi *new_phi = create_phi_node (new_vop, exit_e->dest);
637 tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
638 imm_use_iterator imm_iter;
639 gimple *stmt;
640 use_operand_p use_p;
642 add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
643 gimple_phi_set_result (new_phi, new_vop);
644 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
645 if (stmt != new_phi
646 && !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
647 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
648 SET_USE (use_p, new_vop);
650 break;
655 /* Function vect_get_loop_location.
657 Extract the location of the loop in the source code.
658 If the loop is not well formed for vectorization, an estimated
659 location is calculated.
660 Return the loop location if succeed and NULL if not. */
662 source_location
663 find_loop_location (struct loop *loop)
665 gimple *stmt = NULL;
666 basic_block bb;
667 gimple_stmt_iterator si;
669 if (!loop)
670 return UNKNOWN_LOCATION;
672 stmt = get_loop_exit_condition (loop);
674 if (stmt
675 && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
676 return gimple_location (stmt);
678 /* If we got here the loop is probably not "well formed",
679 try to estimate the loop location */
681 if (!loop->header)
682 return UNKNOWN_LOCATION;
684 bb = loop->header;
686 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
688 stmt = gsi_stmt (si);
689 if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
690 return gimple_location (stmt);
693 return UNKNOWN_LOCATION;
696 /* Return true if PHI defines an IV of the loop to be vectorized. */
698 static bool
699 iv_phi_p (gphi *phi)
701 if (virtual_operand_p (PHI_RESULT (phi)))
702 return false;
704 stmt_vec_info stmt_info = vinfo_for_stmt (phi);
705 gcc_assert (stmt_info != NULL);
706 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
707 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
708 return false;
710 return true;
713 /* Function vect_can_advance_ivs_p
715 In case the number of iterations that LOOP iterates is unknown at compile
716 time, an epilog loop will be generated, and the loop induction variables
717 (IVs) will be "advanced" to the value they are supposed to take just before
718 the epilog loop. Here we check that the access function of the loop IVs
719 and the expression that represents the loop bound are simple enough.
720 These restrictions will be relaxed in the future. */
722 bool
723 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
725 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
726 basic_block bb = loop->header;
727 gphi_iterator gsi;
729 /* Analyze phi functions of the loop header. */
731 if (dump_enabled_p ())
732 dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
733 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
735 tree evolution_part;
737 gphi *phi = gsi.phi ();
738 if (dump_enabled_p ())
740 dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
741 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
744 /* Skip virtual phi's. The data dependences that are associated with
745 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.
747 Skip reduction phis. */
748 if (!iv_phi_p (phi))
750 if (dump_enabled_p ())
751 dump_printf_loc (MSG_NOTE, vect_location,
752 "reduc or virtual phi. skip.\n");
753 continue;
756 /* Analyze the evolution function. */
758 evolution_part
759 = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
760 if (evolution_part == NULL_TREE)
762 if (dump_enabled_p ())
763 dump_printf (MSG_MISSED_OPTIMIZATION,
764 "No access function or evolution.\n");
765 return false;
768 /* FORNOW: We do not transform initial conditions of IVs
769 which evolution functions are not invariants in the loop. */
771 if (!expr_invariant_in_loop_p (loop, evolution_part))
773 if (dump_enabled_p ())
774 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
775 "evolution not invariant in loop.\n");
776 return false;
779 /* FORNOW: We do not transform initial conditions of IVs
780 which evolution functions are a polynomial of degree >= 2. */
782 if (tree_is_chrec (evolution_part))
784 if (dump_enabled_p ())
785 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
786 "evolution is chrec.\n");
787 return false;
791 return true;
795 /* Function vect_update_ivs_after_vectorizer.
797 "Advance" the induction variables of LOOP to the value they should take
798 after the execution of LOOP. This is currently necessary because the
799 vectorizer does not handle induction variables that are used after the
800 loop. Such a situation occurs when the last iterations of LOOP are
801 peeled, because:
802 1. We introduced new uses after LOOP for IVs that were not originally used
803 after LOOP: the IVs of LOOP are now used by an epilog loop.
804 2. LOOP is going to be vectorized; this means that it will iterate N/VF
805 times, whereas the loop IVs should be bumped N times.
807 Input:
808 - LOOP - a loop that is going to be vectorized. The last few iterations
809 of LOOP were peeled.
810 - NITERS - the number of iterations that LOOP executes (before it is
811 vectorized). i.e, the number of times the ivs should be bumped.
812 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
813 coming out from LOOP on which there are uses of the LOOP ivs
814 (this is the path from LOOP->exit to epilog_loop->preheader).
816 The new definitions of the ivs are placed in LOOP->exit.
817 The phi args associated with the edge UPDATE_E in the bb
818 UPDATE_E->dest are updated accordingly.
820 Assumption 1: Like the rest of the vectorizer, this function assumes
821 a single loop exit that has a single predecessor.
823 Assumption 2: The phi nodes in the LOOP header and in update_bb are
824 organized in the same order.
826 Assumption 3: The access function of the ivs is simple enough (see
827 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
829 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
830 coming out of LOOP on which the ivs of LOOP are used (this is the path
831 that leads to the epilog loop; other paths skip the epilog loop). This
832 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
833 needs to have its phis updated.
836 static void
837 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo,
838 tree niters, edge update_e)
840 gphi_iterator gsi, gsi1;
841 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
842 basic_block update_bb = update_e->dest;
843 basic_block exit_bb = single_exit (loop)->dest;
845 /* Make sure there exists a single-predecessor exit bb: */
846 gcc_assert (single_pred_p (exit_bb));
847 gcc_assert (single_succ_edge (exit_bb) == update_e);
849 for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
850 !gsi_end_p (gsi) && !gsi_end_p (gsi1);
851 gsi_next (&gsi), gsi_next (&gsi1))
853 tree init_expr;
854 tree step_expr, off;
855 tree type;
856 tree var, ni, ni_name;
857 gimple_stmt_iterator last_gsi;
859 gphi *phi = gsi.phi ();
860 gphi *phi1 = gsi1.phi ();
861 if (dump_enabled_p ())
863 dump_printf_loc (MSG_NOTE, vect_location,
864 "vect_update_ivs_after_vectorizer: phi: ");
865 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
868 /* Skip reduction and virtual phis. */
869 if (!iv_phi_p (phi))
871 if (dump_enabled_p ())
872 dump_printf_loc (MSG_NOTE, vect_location,
873 "reduc or virtual phi. skip.\n");
874 continue;
877 type = TREE_TYPE (gimple_phi_result (phi));
878 step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
879 step_expr = unshare_expr (step_expr);
881 /* FORNOW: We do not support IVs whose evolution function is a polynomial
882 of degree >= 2 or exponential. */
883 gcc_assert (!tree_is_chrec (step_expr));
885 init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
887 off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
888 fold_convert (TREE_TYPE (step_expr), niters),
889 step_expr);
890 if (POINTER_TYPE_P (type))
891 ni = fold_build_pointer_plus (init_expr, off);
892 else
893 ni = fold_build2 (PLUS_EXPR, type,
894 init_expr, fold_convert (type, off));
896 var = create_tmp_var (type, "tmp");
898 last_gsi = gsi_last_bb (exit_bb);
899 gimple_seq new_stmts = NULL;
900 ni_name = force_gimple_operand (ni, &new_stmts, false, var);
901 /* Exit_bb shouldn't be empty. */
902 if (!gsi_end_p (last_gsi))
903 gsi_insert_seq_after (&last_gsi, new_stmts, GSI_SAME_STMT);
904 else
905 gsi_insert_seq_before (&last_gsi, new_stmts, GSI_SAME_STMT);
907 /* Fix phi expressions in the successor bb. */
908 adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
912 /* Function vect_gen_prolog_loop_niters
914 Generate the number of iterations which should be peeled as prolog for the
915 loop represented by LOOP_VINFO. It is calculated as the misalignment of
916 DR - the data reference recorded in LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).
917 As a result, after the execution of this loop, the data reference DR will
918 refer to an aligned location. The following computation is generated:
920 If the misalignment of DR is known at compile time:
921 addr_mis = int mis = DR_MISALIGNMENT (dr);
922 Else, compute address misalignment in bytes:
923 addr_mis = addr & (vectype_align - 1)
925 prolog_niters = ((VF - addr_mis/elem_size)&(VF-1))/step
927 (elem_size = element type size; an element is the scalar element whose type
928 is the inner type of the vectype)
930 The computations will be emitted at the end of BB. We also compute and
931 store upper bound (included) of the result in BOUND.
933 When the step of the data-ref in the loop is not 1 (as in interleaved data
934 and SLP), the number of iterations of the prolog must be divided by the step
935 (which is equal to the size of interleaved group).
937 The above formulas assume that VF == number of elements in the vector. This
938 may not hold when there are multiple-types in the loop.
939 In this case, for some data-references in the loop the VF does not represent
940 the number of elements that fit in the vector. Therefore, instead of VF we
941 use TYPE_VECTOR_SUBPARTS. */
943 static tree
944 vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
945 basic_block bb, int *bound)
947 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
948 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
949 tree var;
950 tree niters_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
951 gimple_seq stmts = NULL, new_stmts = NULL;
952 tree iters, iters_name;
953 gimple *dr_stmt = DR_STMT (dr);
954 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
955 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
956 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
957 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
959 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
961 int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
963 if (dump_enabled_p ())
964 dump_printf_loc (MSG_NOTE, vect_location,
965 "known peeling = %d.\n", npeel);
967 iters = build_int_cst (niters_type, npeel);
968 *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
970 else
972 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
973 tree offset = negative
974 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
975 tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
976 &stmts, offset, loop);
977 tree type = unsigned_type_for (TREE_TYPE (start_addr));
978 tree vectype_align_minus_1 = build_int_cst (type, vectype_align - 1);
979 HOST_WIDE_INT elem_size =
980 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
981 tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
982 tree nelements_minus_1 = build_int_cst (type, nelements - 1);
983 tree nelements_tree = build_int_cst (type, nelements);
984 tree byte_misalign;
985 tree elem_misalign;
987 /* Create: byte_misalign = addr & (vectype_align - 1) */
988 byte_misalign =
989 fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
990 vectype_align_minus_1);
992 /* Create: elem_misalign = byte_misalign / element_size */
993 elem_misalign =
994 fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
996 /* Create: (niters_type) (nelements - elem_misalign)&(nelements - 1) */
997 if (negative)
998 iters = fold_build2 (MINUS_EXPR, type, elem_misalign, nelements_tree);
999 else
1000 iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
1001 iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
1002 iters = fold_convert (niters_type, iters);
1003 *bound = nelements - 1;
1006 if (dump_enabled_p ())
1008 dump_printf_loc (MSG_NOTE, vect_location,
1009 "niters for prolog loop: ");
1010 dump_generic_expr (MSG_NOTE, TDF_SLIM, iters);
1011 dump_printf (MSG_NOTE, "\n");
1014 var = create_tmp_var (niters_type, "prolog_loop_niters");
1015 iters_name = force_gimple_operand (iters, &new_stmts, false, var);
1017 if (new_stmts)
1018 gimple_seq_add_seq (&stmts, new_stmts);
1019 if (stmts)
1021 gcc_assert (single_succ_p (bb));
1022 gimple_stmt_iterator gsi = gsi_last_bb (bb);
1023 if (gsi_end_p (gsi))
1024 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1025 else
1026 gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
1028 return iters_name;
1032 /* Function vect_update_init_of_dr
1034 NITERS iterations were peeled from LOOP. DR represents a data reference
1035 in LOOP. This function updates the information recorded in DR to
1036 account for the fact that the first NITERS iterations had already been
1037 executed. Specifically, it updates the OFFSET field of DR. */
1039 static void
1040 vect_update_init_of_dr (struct data_reference *dr, tree niters)
1042 tree offset = DR_OFFSET (dr);
1044 niters = fold_build2 (MULT_EXPR, sizetype,
1045 fold_convert (sizetype, niters),
1046 fold_convert (sizetype, DR_STEP (dr)));
1047 offset = fold_build2 (PLUS_EXPR, sizetype,
1048 fold_convert (sizetype, offset), niters);
1049 DR_OFFSET (dr) = offset;
1053 /* Function vect_update_inits_of_drs
1055 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1056 This function updates the information recorded for the data references in
1057 the loop to account for the fact that the first NITERS iterations had
1058 already been executed. Specifically, it updates the initial_condition of
1059 the access_function of all the data_references in the loop. */
1061 static void
1062 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
1064 unsigned int i;
1065 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1066 struct data_reference *dr;
1068 if (dump_enabled_p ())
1069 dump_printf_loc (MSG_NOTE, vect_location,
1070 "=== vect_update_inits_of_dr ===\n");
1072 /* Adjust niters to sizetype and insert stmts on loop preheader edge. */
1073 if (!types_compatible_p (sizetype, TREE_TYPE (niters)))
1075 gimple_seq seq;
1076 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1077 tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
1079 niters = fold_convert (sizetype, niters);
1080 niters = force_gimple_operand (niters, &seq, false, var);
1081 if (seq)
1083 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1084 gcc_assert (!new_bb);
1088 FOR_EACH_VEC_ELT (datarefs, i, dr)
1089 vect_update_init_of_dr (dr, niters);
1093 /* This function builds ni_name = number of iterations. Statements
1094 are emitted on the loop preheader edge. */
1096 tree
1097 vect_build_loop_niters (loop_vec_info loop_vinfo)
1099 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1100 if (TREE_CODE (ni) == INTEGER_CST)
1101 return ni;
1102 else
1104 tree ni_name, var;
1105 gimple_seq stmts = NULL;
1106 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1108 var = create_tmp_var (TREE_TYPE (ni), "niters");
1109 ni_name = force_gimple_operand (ni, &stmts, false, var);
1110 if (stmts)
1111 gsi_insert_seq_on_edge_immediate (pe, stmts);
1113 return ni_name;
1117 /* Calculate the number of iterations above which vectorized loop will be
1118 preferred than scalar loop. NITERS_PROLOG is the number of iterations
1119 of prolog loop. If it's integer const, the integer number is also passed
1120 in INT_NITERS_PROLOG. BOUND_PROLOG is the upper bound (included) of
1121 number of iterations of prolog loop. VFM1 is vector factor minus one.
1122 If CHECK_PROFITABILITY is true, TH is the threshold below which scalar
1123 (rather than vectorized) loop will be executed. This function stores
1124 upper bound (included) of the result in BOUND_SCALAR. */
1126 static tree
1127 vect_gen_scalar_loop_niters (tree niters_prolog, int int_niters_prolog,
1128 int bound_prolog, int vfm1, int th,
1129 int *bound_scalar, bool check_profitability)
1131 tree type = TREE_TYPE (niters_prolog);
1132 tree niters = fold_build2 (PLUS_EXPR, type, niters_prolog,
1133 build_int_cst (type, vfm1));
1135 *bound_scalar = vfm1 + bound_prolog;
1136 if (check_profitability)
1138 /* TH indicates the minimum niters of vectorized loop, while we
1139 compute the maximum niters of scalar loop. */
1140 th--;
1141 /* Peeling for constant times. */
1142 if (int_niters_prolog >= 0)
1144 *bound_scalar = (int_niters_prolog + vfm1 < th
1145 ? th
1146 : vfm1 + int_niters_prolog);
1147 return build_int_cst (type, *bound_scalar);
1149 /* Peeling for unknown times. Note BOUND_PROLOG is the upper
1150 bound (inlcuded) of niters of prolog loop. */
1151 if (th >= vfm1 + bound_prolog)
1153 *bound_scalar = th;
1154 return build_int_cst (type, th);
1156 /* Need to do runtime comparison, but BOUND_SCALAR remains the same. */
1157 else if (th > vfm1)
1158 return fold_build2 (MAX_EXPR, type, build_int_cst (type, th), niters);
1160 return niters;
1163 /* This function generates the following statements:
1165 niters = number of iterations loop executes (after peeling)
1166 niters_vector = niters / vf
1168 and places them on the loop preheader edge. NITERS_NO_OVERFLOW is
1169 true if NITERS doesn't overflow. */
1171 void
1172 vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters,
1173 tree *niters_vector_ptr, bool niters_no_overflow)
1175 tree ni_minus_gap, var;
1176 tree niters_vector;
1177 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1178 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1179 tree log_vf = build_int_cst (TREE_TYPE (niters), exact_log2 (vf));
1181 /* If epilogue loop is required because of data accesses with gaps, we
1182 subtract one iteration from the total number of iterations here for
1183 correct calculation of RATIO. */
1184 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1186 ni_minus_gap = fold_build2 (MINUS_EXPR, TREE_TYPE (niters),
1187 niters,
1188 build_one_cst (TREE_TYPE (niters)));
1189 if (!is_gimple_val (ni_minus_gap))
1191 var = create_tmp_var (TREE_TYPE (niters), "ni_gap");
1192 gimple *stmts = NULL;
1193 ni_minus_gap = force_gimple_operand (ni_minus_gap, &stmts,
1194 true, var);
1195 gsi_insert_seq_on_edge_immediate (pe, stmts);
1198 else
1199 ni_minus_gap = niters;
1201 /* Create: niters >> log2(vf) */
1202 /* If it's known that niters == number of latch executions + 1 doesn't
1203 overflow, we can generate niters >> log2(vf); otherwise we generate
1204 (niters - vf) >> log2(vf) + 1 by using the fact that we know ratio
1205 will be at least one. */
1206 if (niters_no_overflow)
1207 niters_vector = fold_build2 (RSHIFT_EXPR, TREE_TYPE (niters),
1208 ni_minus_gap, log_vf);
1209 else
1210 niters_vector
1211 = fold_build2 (PLUS_EXPR, TREE_TYPE (niters),
1212 fold_build2 (RSHIFT_EXPR, TREE_TYPE (niters),
1213 fold_build2 (MINUS_EXPR, TREE_TYPE (niters),
1214 ni_minus_gap,
1215 build_int_cst
1216 (TREE_TYPE (niters), vf)),
1217 log_vf),
1218 build_int_cst (TREE_TYPE (niters), 1));
1220 if (!is_gimple_val (niters_vector))
1222 var = create_tmp_var (TREE_TYPE (niters), "bnd");
1223 gimple *stmts = NULL;
1224 niters_vector = force_gimple_operand (niters_vector, &stmts, true, var);
1225 gsi_insert_seq_on_edge_immediate (pe, stmts);
1227 *niters_vector_ptr = niters_vector;
1229 return;
1232 /* Given NITERS_VECTOR which is the number of iterations for vectorized
1233 loop specified by LOOP_VINFO after vectorization, compute the number
1234 of iterations before vectorization (niters_vector * vf) and store it
1235 to NITERS_VECTOR_MULT_VF_PTR. */
1237 static void
1238 vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo,
1239 tree niters_vector,
1240 tree *niters_vector_mult_vf_ptr)
1242 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1243 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1244 tree type = TREE_TYPE (niters_vector);
1245 tree log_vf = build_int_cst (type, exact_log2 (vf));
1246 basic_block exit_bb = single_exit (loop)->dest;
1248 gcc_assert (niters_vector_mult_vf_ptr != NULL);
1249 tree niters_vector_mult_vf = fold_build2 (LSHIFT_EXPR, type,
1250 niters_vector, log_vf);
1251 if (!is_gimple_val (niters_vector_mult_vf))
1253 tree var = create_tmp_var (type, "niters_vector_mult_vf");
1254 gimple_seq stmts = NULL;
1255 niters_vector_mult_vf = force_gimple_operand (niters_vector_mult_vf,
1256 &stmts, true, var);
1257 gimple_stmt_iterator gsi = gsi_start_bb (exit_bb);
1258 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1260 *niters_vector_mult_vf_ptr = niters_vector_mult_vf;
1263 /* Function slpeel_tree_duplicate_loop_to_edge_cfg duplciates FIRST/SECOND
1264 from SECOND/FIRST and puts it at the original loop's preheader/exit
1265 edge, the two loops are arranged as below:
1267 preheader_a:
1268 first_loop:
1269 header_a:
1270 i_1 = PHI<i_0, i_2>;
1272 i_2 = i_1 + 1;
1273 if (cond_a)
1274 goto latch_a;
1275 else
1276 goto between_bb;
1277 latch_a:
1278 goto header_a;
1280 between_bb:
1281 ;; i_x = PHI<i_2>; ;; LCSSA phi node to be created for FIRST,
1283 second_loop:
1284 header_b:
1285 i_3 = PHI<i_0, i_4>; ;; Use of i_0 to be replaced with i_x,
1286 or with i_2 if no LCSSA phi is created
1287 under condition of CREATE_LCSSA_FOR_IV_PHIS.
1289 i_4 = i_3 + 1;
1290 if (cond_b)
1291 goto latch_b;
1292 else
1293 goto exit_bb;
1294 latch_b:
1295 goto header_b;
1297 exit_bb:
1299 This function creates loop closed SSA for the first loop; update the
1300 second loop's PHI nodes by replacing argument on incoming edge with the
1301 result of newly created lcssa PHI nodes. IF CREATE_LCSSA_FOR_IV_PHIS
1302 is false, Loop closed ssa phis will only be created for non-iv phis for
1303 the first loop.
1305 This function assumes exit bb of the first loop is preheader bb of the
1306 second loop, i.e, between_bb in the example code. With PHIs updated,
1307 the second loop will execute rest iterations of the first. */
1309 static void
1310 slpeel_update_phi_nodes_for_loops (loop_vec_info loop_vinfo,
1311 struct loop *first, struct loop *second,
1312 bool create_lcssa_for_iv_phis)
1314 gphi_iterator gsi_update, gsi_orig;
1315 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1317 edge first_latch_e = EDGE_SUCC (first->latch, 0);
1318 edge second_preheader_e = loop_preheader_edge (second);
1319 basic_block between_bb = single_exit (first)->dest;
1321 gcc_assert (between_bb == second_preheader_e->src);
1322 gcc_assert (single_pred_p (between_bb) && single_succ_p (between_bb));
1323 /* Either the first loop or the second is the loop to be vectorized. */
1324 gcc_assert (loop == first || loop == second);
1326 for (gsi_orig = gsi_start_phis (first->header),
1327 gsi_update = gsi_start_phis (second->header);
1328 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
1329 gsi_next (&gsi_orig), gsi_next (&gsi_update))
1331 gphi *orig_phi = gsi_orig.phi ();
1332 gphi *update_phi = gsi_update.phi ();
1334 tree arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, first_latch_e);
1335 /* Generate lcssa PHI node for the first loop. */
1336 gphi *vect_phi = (loop == first) ? orig_phi : update_phi;
1337 if (create_lcssa_for_iv_phis || !iv_phi_p (vect_phi))
1339 tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
1340 gphi *lcssa_phi = create_phi_node (new_res, between_bb);
1341 add_phi_arg (lcssa_phi, arg, single_exit (first), UNKNOWN_LOCATION);
1342 arg = new_res;
1345 /* Update PHI node in the second loop by replacing arg on the loop's
1346 incoming edge. */
1347 adjust_phi_and_debug_stmts (update_phi, second_preheader_e, arg);
1351 /* Function slpeel_add_loop_guard adds guard skipping from the beginning
1352 of SKIP_LOOP to the beginning of UPDATE_LOOP. GUARD_EDGE and MERGE_EDGE
1353 are two pred edges of the merge point before UPDATE_LOOP. The two loops
1354 appear like below:
1356 guard_bb:
1357 if (cond)
1358 goto merge_bb;
1359 else
1360 goto skip_loop;
1362 skip_loop:
1363 header_a:
1364 i_1 = PHI<i_0, i_2>;
1366 i_2 = i_1 + 1;
1367 if (cond_a)
1368 goto latch_a;
1369 else
1370 goto exit_a;
1371 latch_a:
1372 goto header_a;
1374 exit_a:
1375 i_5 = PHI<i_2>;
1377 merge_bb:
1378 ;; PHI (i_x = PHI<i_0, i_5>) to be created at merge point.
1380 update_loop:
1381 header_b:
1382 i_3 = PHI<i_5, i_4>; ;; Use of i_5 to be replaced with i_x.
1384 i_4 = i_3 + 1;
1385 if (cond_b)
1386 goto latch_b;
1387 else
1388 goto exit_bb;
1389 latch_b:
1390 goto header_b;
1392 exit_bb:
1394 This function creates PHI nodes at merge_bb and replaces the use of i_5
1395 in the update_loop's PHI node with the result of new PHI result. */
1397 static void
1398 slpeel_update_phi_nodes_for_guard1 (struct loop *skip_loop,
1399 struct loop *update_loop,
1400 edge guard_edge, edge merge_edge)
1402 source_location merge_loc, guard_loc;
1403 edge orig_e = loop_preheader_edge (skip_loop);
1404 edge update_e = loop_preheader_edge (update_loop);
1405 gphi_iterator gsi_orig, gsi_update;
1407 for ((gsi_orig = gsi_start_phis (skip_loop->header),
1408 gsi_update = gsi_start_phis (update_loop->header));
1409 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
1410 gsi_next (&gsi_orig), gsi_next (&gsi_update))
1412 gphi *orig_phi = gsi_orig.phi ();
1413 gphi *update_phi = gsi_update.phi ();
1415 /* Generate new phi node at merge bb of the guard. */
1416 tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
1417 gphi *new_phi = create_phi_node (new_res, guard_edge->dest);
1419 /* Merge bb has two incoming edges: GUARD_EDGE and MERGE_EDGE. Set the
1420 args in NEW_PHI for these edges. */
1421 tree merge_arg = PHI_ARG_DEF_FROM_EDGE (update_phi, update_e);
1422 tree guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
1423 merge_loc = gimple_phi_arg_location_from_edge (update_phi, update_e);
1424 guard_loc = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
1425 add_phi_arg (new_phi, merge_arg, merge_edge, merge_loc);
1426 add_phi_arg (new_phi, guard_arg, guard_edge, guard_loc);
1428 /* Update phi in UPDATE_PHI. */
1429 adjust_phi_and_debug_stmts (update_phi, update_e, new_res);
1433 /* LCSSA_PHI is a lcssa phi of EPILOG loop which is copied from LOOP,
1434 this function searches for the corresponding lcssa phi node in exit
1435 bb of LOOP. If it is found, return the phi result; otherwise return
1436 NULL. */
1438 static tree
1439 find_guard_arg (struct loop *loop, struct loop *epilog ATTRIBUTE_UNUSED,
1440 gphi *lcssa_phi)
1442 gphi_iterator gsi;
1443 edge e = single_exit (loop);
1445 gcc_assert (single_pred_p (e->dest));
1446 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
1448 gphi *phi = gsi.phi ();
1449 if (operand_equal_p (PHI_ARG_DEF (phi, 0),
1450 PHI_ARG_DEF (lcssa_phi, 0), 0))
1451 return PHI_RESULT (phi);
1453 return NULL_TREE;
1456 /* LOOP and EPILOG are two consecutive loops in CFG and EPILOG is copied
1457 from LOOP. Function slpeel_add_loop_guard adds guard skipping from a
1458 point between the two loops to the end of EPILOG. Edges GUARD_EDGE
1459 and MERGE_EDGE are the two pred edges of merge_bb at the end of EPILOG.
1460 The CFG looks like:
1462 loop:
1463 header_a:
1464 i_1 = PHI<i_0, i_2>;
1466 i_2 = i_1 + 1;
1467 if (cond_a)
1468 goto latch_a;
1469 else
1470 goto exit_a;
1471 latch_a:
1472 goto header_a;
1474 exit_a:
1476 guard_bb:
1477 if (cond)
1478 goto merge_bb;
1479 else
1480 goto epilog_loop;
1482 ;; fall_through_bb
1484 epilog_loop:
1485 header_b:
1486 i_3 = PHI<i_2, i_4>;
1488 i_4 = i_3 + 1;
1489 if (cond_b)
1490 goto latch_b;
1491 else
1492 goto merge_bb;
1493 latch_b:
1494 goto header_b;
1496 merge_bb:
1497 ; PHI node (i_y = PHI<i_2, i_4>) to be created at merge point.
1499 exit_bb:
1500 i_x = PHI<i_4>; ;Use of i_4 to be replaced with i_y in merge_bb.
1502 For each name used out side EPILOG (i.e - for each name that has a lcssa
1503 phi in exit_bb) we create a new PHI in merge_bb. The new PHI has two
1504 args corresponding to GUARD_EDGE and MERGE_EDGE. Arg for MERGE_EDGE is
1505 the arg of the original PHI in exit_bb, arg for GUARD_EDGE is defined
1506 by LOOP and is found in the exit bb of LOOP. Arg of the original PHI
1507 in exit_bb will also be updated. */
1509 static void
1510 slpeel_update_phi_nodes_for_guard2 (struct loop *loop, struct loop *epilog,
1511 edge guard_edge, edge merge_edge)
1513 gphi_iterator gsi;
1514 basic_block merge_bb = guard_edge->dest;
1516 gcc_assert (single_succ_p (merge_bb));
1517 edge e = single_succ_edge (merge_bb);
1518 basic_block exit_bb = e->dest;
1519 gcc_assert (single_pred_p (exit_bb));
1520 gcc_assert (single_pred (exit_bb) == single_exit (epilog)->dest);
1522 for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1524 gphi *update_phi = gsi.phi ();
1525 tree old_arg = PHI_ARG_DEF (update_phi, 0);
1526 /* This loop-closed-phi actually doesn't represent a use out of the
1527 loop - the phi arg is a constant. */
1528 if (TREE_CODE (old_arg) != SSA_NAME)
1529 continue;
1531 tree merge_arg = get_current_def (old_arg);
1532 if (!merge_arg)
1533 merge_arg = old_arg;
1535 tree guard_arg = find_guard_arg (loop, epilog, update_phi);
1536 /* If the var is live after loop but not a reduction, we simply
1537 use the old arg. */
1538 if (!guard_arg)
1539 guard_arg = old_arg;
1541 /* Create new phi node in MERGE_BB: */
1542 tree new_res = copy_ssa_name (PHI_RESULT (update_phi));
1543 gphi *merge_phi = create_phi_node (new_res, merge_bb);
1545 /* MERGE_BB has two incoming edges: GUARD_EDGE and MERGE_EDGE, Set
1546 the two PHI args in merge_phi for these edges. */
1547 add_phi_arg (merge_phi, merge_arg, merge_edge, UNKNOWN_LOCATION);
1548 add_phi_arg (merge_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
1550 /* Update the original phi in exit_bb. */
1551 adjust_phi_and_debug_stmts (update_phi, e, new_res);
1555 /* EPILOG loop is duplicated from the original loop for vectorizing,
1556 the arg of its loop closed ssa PHI needs to be updated. */
1558 static void
1559 slpeel_update_phi_nodes_for_lcssa (struct loop *epilog)
1561 gphi_iterator gsi;
1562 basic_block exit_bb = single_exit (epilog)->dest;
1564 gcc_assert (single_pred_p (exit_bb));
1565 edge e = EDGE_PRED (exit_bb, 0);
1566 for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1567 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
1570 /* Function vect_do_peeling.
1572 Input:
1573 - LOOP_VINFO: Represent a loop to be vectorized, which looks like:
1575 preheader:
1576 LOOP:
1577 header_bb:
1578 loop_body
1579 if (exit_loop_cond) goto exit_bb
1580 else goto header_bb
1581 exit_bb:
1583 - NITERS: The number of iterations of the loop.
1584 - NITERSM1: The number of iterations of the loop's latch.
1585 - NITERS_NO_OVERFLOW: No overflow in computing NITERS.
1586 - TH, CHECK_PROFITABILITY: Threshold of niters to vectorize loop if
1587 CHECK_PROFITABILITY is true.
1588 Output:
1589 - NITERS_VECTOR: The number of iterations of loop after vectorization.
1591 This function peels prolog and epilog from the loop, adds guards skipping
1592 PROLOG and EPILOG for various conditions. As a result, the changed CFG
1593 would look like:
1595 guard_bb_1:
1596 if (prefer_scalar_loop) goto merge_bb_1
1597 else goto guard_bb_2
1599 guard_bb_2:
1600 if (skip_prolog) goto merge_bb_2
1601 else goto prolog_preheader
1603 prolog_preheader:
1604 PROLOG:
1605 prolog_header_bb:
1606 prolog_body
1607 if (exit_prolog_cond) goto prolog_exit_bb
1608 else goto prolog_header_bb
1609 prolog_exit_bb:
1611 merge_bb_2:
1613 vector_preheader:
1614 VECTOR LOOP:
1615 vector_header_bb:
1616 vector_body
1617 if (exit_vector_cond) goto vector_exit_bb
1618 else goto vector_header_bb
1619 vector_exit_bb:
1621 guard_bb_3:
1622 if (skip_epilog) goto merge_bb_3
1623 else goto epilog_preheader
1625 merge_bb_1:
1627 epilog_preheader:
1628 EPILOG:
1629 epilog_header_bb:
1630 epilog_body
1631 if (exit_epilog_cond) goto merge_bb_3
1632 else goto epilog_header_bb
1634 merge_bb_3:
1636 Note this function peels prolog and epilog only if it's necessary,
1637 as well as guards.
1638 Returns created epilogue or NULL.
1640 TODO: Guard for prefer_scalar_loop should be emitted along with
1641 versioning conditions if loop versioning is needed. */
1644 struct loop *
1645 vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
1646 tree *niters_vector, int th, bool check_profitability,
1647 bool niters_no_overflow)
1649 edge e, guard_e;
1650 tree type = TREE_TYPE (niters), guard_cond;
1651 basic_block guard_bb, guard_to;
1652 int prob_prolog, prob_vector, prob_epilog;
1653 int bound_prolog = 0, bound_scalar = 0, bound = 0;
1654 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1655 int prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1656 bool epilog_peeling = (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)
1657 || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
1659 if (!prolog_peeling && !epilog_peeling)
1660 return NULL;
1662 prob_vector = 9 * REG_BR_PROB_BASE / 10;
1663 if ((vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo)) == 2)
1664 vf = 3;
1665 prob_prolog = prob_epilog = (vf - 1) * REG_BR_PROB_BASE / vf;
1666 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1668 struct loop *prolog, *epilog = NULL, *loop = LOOP_VINFO_LOOP (loop_vinfo);
1669 struct loop *first_loop = loop;
1670 create_lcssa_for_virtual_phi (loop);
1671 update_ssa (TODO_update_ssa_only_virtuals);
1673 if (MAY_HAVE_DEBUG_STMTS)
1675 gcc_assert (!adjust_vec.exists ());
1676 adjust_vec.create (32);
1678 initialize_original_copy_tables ();
1680 /* Prolog loop may be skipped. */
1681 bool skip_prolog = (prolog_peeling != 0);
1682 /* Skip to epilog if scalar loop may be preferred. It's only used when
1683 we peel for epilog loop. */
1684 bool skip_vector = (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo));
1685 /* Epilog loop must be executed if the number of iterations for epilog
1686 loop is known at compile time, otherwise we need to add a check at
1687 the end of vector loop and skip to the end of epilog loop. */
1688 bool skip_epilog = (prolog_peeling < 0
1689 || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo));
1690 /* PEELING_FOR_GAPS is special because epilog loop must be executed. */
1691 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1692 skip_epilog = false;
1694 /* Record the anchor bb at which guard should be placed if scalar loop
1695 may be preferred. */
1696 basic_block anchor = loop_preheader_edge (loop)->src;
1697 if (skip_vector)
1699 split_edge (loop_preheader_edge (loop));
1701 /* Due to the order in which we peel prolog and epilog, we first
1702 propagate probability to the whole loop. The purpose is to
1703 avoid adjusting probabilities of both prolog and vector loops
1704 separately. Note in this case, the probability of epilog loop
1705 needs to be scaled back later. */
1706 basic_block bb_before_loop = loop_preheader_edge (loop)->src;
1707 scale_bbs_frequencies_int (&bb_before_loop, 1, prob_vector,
1708 REG_BR_PROB_BASE);
1709 scale_loop_profile (loop, prob_vector, bound);
1712 tree niters_prolog = build_int_cst (type, 0);
1713 source_location loop_loc = find_loop_location (loop);
1714 struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
1715 if (prolog_peeling)
1717 e = loop_preheader_edge (loop);
1718 if (!slpeel_can_duplicate_loop_p (loop, e))
1720 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1721 "loop can't be duplicated to preheader edge.\n");
1722 gcc_unreachable ();
1724 /* Peel prolog and put it on preheader edge of loop. */
1725 prolog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
1726 if (!prolog)
1728 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1729 "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1730 gcc_unreachable ();
1732 slpeel_update_phi_nodes_for_loops (loop_vinfo, prolog, loop, true);
1733 first_loop = prolog;
1734 reset_original_copy_tables ();
1736 /* Generate and update the number of iterations for prolog loop. */
1737 niters_prolog = vect_gen_prolog_loop_niters (loop_vinfo, anchor,
1738 &bound_prolog);
1739 slpeel_make_loop_iterate_ntimes (prolog, niters_prolog);
1741 /* Skip the prolog loop. */
1742 if (skip_prolog)
1744 guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
1745 niters_prolog, build_int_cst (type, 0));
1746 guard_bb = loop_preheader_edge (prolog)->src;
1747 basic_block bb_after_prolog = loop_preheader_edge (loop)->src;
1748 guard_to = split_edge (loop_preheader_edge (loop));
1749 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
1750 guard_to, guard_bb,
1751 inverse_probability (prob_prolog));
1752 e = EDGE_PRED (guard_to, 0);
1753 e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
1754 slpeel_update_phi_nodes_for_guard1 (prolog, loop, guard_e, e);
1756 scale_bbs_frequencies_int (&bb_after_prolog, 1, prob_prolog,
1757 REG_BR_PROB_BASE);
1758 scale_loop_profile (prolog, prob_prolog, bound_prolog);
1760 /* Update init address of DRs. */
1761 vect_update_inits_of_drs (loop_vinfo, niters_prolog);
1762 /* Update niters for vector loop. */
1763 LOOP_VINFO_NITERS (loop_vinfo)
1764 = fold_build2 (MINUS_EXPR, type, niters, niters_prolog);
1765 LOOP_VINFO_NITERSM1 (loop_vinfo)
1766 = fold_build2 (MINUS_EXPR, type,
1767 LOOP_VINFO_NITERSM1 (loop_vinfo), niters_prolog);
1768 niters = vect_build_loop_niters (loop_vinfo);
1770 /* Prolog iterates at most bound_prolog times, latch iterates at
1771 most bound_prolog - 1 times. */
1772 record_niter_bound (prolog, bound_prolog - 1, false, true);
1773 delete_update_ssa ();
1774 adjust_vec_debug_stmts ();
1775 scev_reset ();
1778 if (epilog_peeling)
1780 e = single_exit (loop);
1781 if (!slpeel_can_duplicate_loop_p (loop, e))
1783 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1784 "loop can't be duplicated to exit edge.\n");
1785 gcc_unreachable ();
1787 /* Peel epilog and put it on exit edge of loop. */
1788 epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
1789 if (!epilog)
1791 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1792 "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1793 gcc_unreachable ();
1795 slpeel_update_phi_nodes_for_loops (loop_vinfo, loop, epilog, false);
1797 /* Scalar version loop may be preferred. In this case, add guard
1798 and skip to epilog. Note this only happens when the number of
1799 iterations of loop is unknown at compile time, otherwise this
1800 won't be vectorized. */
1801 if (skip_vector)
1803 /* Additional epilogue iteration is peeled if gap exists. */
1804 bool peel_for_gaps = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo);
1805 tree t = vect_gen_scalar_loop_niters (niters_prolog, prolog_peeling,
1806 bound_prolog,
1807 peel_for_gaps ? vf : vf - 1,
1808 th, &bound_scalar,
1809 check_profitability);
1810 /* Build guard against NITERSM1 since NITERS may overflow. */
1811 guard_cond = fold_build2 (LT_EXPR, boolean_type_node, nitersm1, t);
1812 guard_bb = anchor;
1813 guard_to = split_edge (loop_preheader_edge (epilog));
1814 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
1815 guard_to, guard_bb,
1816 inverse_probability (prob_vector));
1817 e = EDGE_PRED (guard_to, 0);
1818 e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
1819 slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e);
1821 /* Simply propagate profile info from guard_bb to guard_to which is
1822 a merge point of control flow. */
1823 guard_to->frequency = guard_bb->frequency;
1824 guard_to->count = guard_bb->count;
1825 single_succ_edge (guard_to)->count = guard_to->count;
1826 /* Scale probability of epilog loop back. */
1827 int scale_up = REG_BR_PROB_BASE * REG_BR_PROB_BASE / prob_vector;
1828 scale_loop_frequencies (epilog, scale_up, REG_BR_PROB_BASE);
1831 basic_block bb_before_epilog = loop_preheader_edge (epilog)->src;
1832 tree niters_vector_mult_vf;
1833 /* If loop is peeled for non-zero constant times, now niters refers to
1834 orig_niters - prolog_peeling, it won't overflow even the orig_niters
1835 overflows. */
1836 niters_no_overflow |= (prolog_peeling > 0);
1837 vect_gen_vector_loop_niters (loop_vinfo, niters,
1838 niters_vector, niters_no_overflow);
1839 vect_gen_vector_loop_niters_mult_vf (loop_vinfo, *niters_vector,
1840 &niters_vector_mult_vf);
1841 /* Update IVs of original loop as if they were advanced by
1842 niters_vector_mult_vf steps. */
1843 gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
1844 edge update_e = skip_vector ? e : loop_preheader_edge (epilog);
1845 vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf,
1846 update_e);
1848 if (skip_epilog)
1850 guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
1851 niters, niters_vector_mult_vf);
1852 guard_bb = single_exit (loop)->dest;
1853 guard_to = split_edge (single_exit (epilog));
1854 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond, guard_to,
1855 skip_vector ? anchor : guard_bb,
1856 inverse_probability (prob_epilog));
1857 slpeel_update_phi_nodes_for_guard2 (loop, epilog, guard_e,
1858 single_exit (epilog));
1859 /* Only need to handle basic block before epilog loop if it's not
1860 the guard_bb, which is the case when skip_vector is true. */
1861 if (guard_bb != bb_before_epilog)
1863 prob_epilog = (combine_probabilities (prob_vector, prob_epilog)
1864 + inverse_probability (prob_vector));
1866 scale_bbs_frequencies_int (&bb_before_epilog, 1, prob_epilog,
1867 REG_BR_PROB_BASE);
1869 scale_loop_profile (epilog, prob_epilog, bound);
1871 else
1872 slpeel_update_phi_nodes_for_lcssa (epilog);
1874 bound = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) ? vf - 1 : vf - 2;
1875 /* We share epilog loop with scalar version loop. */
1876 bound = MAX (bound, bound_scalar - 1);
1877 record_niter_bound (epilog, bound, false, true);
1879 delete_update_ssa ();
1880 adjust_vec_debug_stmts ();
1881 scev_reset ();
1883 adjust_vec.release ();
1884 free_original_copy_tables ();
1886 return epilog;
1889 /* Function vect_create_cond_for_niters_checks.
1891 Create a conditional expression that represents the run-time checks for
1892 loop's niter. The loop is guaranteed to to terminate if the run-time
1893 checks hold.
1895 Input:
1896 COND_EXPR - input conditional expression. New conditions will be chained
1897 with logical AND operation. If it is NULL, then the function
1898 is used to return the number of alias checks.
1899 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
1900 to be checked.
1902 Output:
1903 COND_EXPR - conditional expression.
1905 The returned COND_EXPR is the conditional expression to be used in the
1906 if statement that controls which version of the loop gets executed at
1907 runtime. */
1909 static void
1910 vect_create_cond_for_niters_checks (loop_vec_info loop_vinfo, tree *cond_expr)
1912 tree part_cond_expr = LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo);
1914 if (*cond_expr)
1915 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1916 *cond_expr, part_cond_expr);
1917 else
1918 *cond_expr = part_cond_expr;
1921 /* Function vect_create_cond_for_align_checks.
1923 Create a conditional expression that represents the alignment checks for
1924 all of data references (array element references) whose alignment must be
1925 checked at runtime.
1927 Input:
1928 COND_EXPR - input conditional expression. New conditions will be chained
1929 with logical AND operation.
1930 LOOP_VINFO - two fields of the loop information are used.
1931 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
1932 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
1934 Output:
1935 COND_EXPR_STMT_LIST - statements needed to construct the conditional
1936 expression.
1937 The returned value is the conditional expression to be used in the if
1938 statement that controls which version of the loop gets executed at runtime.
1940 The algorithm makes two assumptions:
1941 1) The number of bytes "n" in a vector is a power of 2.
1942 2) An address "a" is aligned if a%n is zero and that this
1943 test can be done as a&(n-1) == 0. For example, for 16
1944 byte vectors the test is a&0xf == 0. */
1946 static void
1947 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
1948 tree *cond_expr,
1949 gimple_seq *cond_expr_stmt_list)
1951 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1952 vec<gimple *> may_misalign_stmts
1953 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1954 gimple *ref_stmt;
1955 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
1956 tree mask_cst;
1957 unsigned int i;
1958 tree int_ptrsize_type;
1959 char tmp_name[20];
1960 tree or_tmp_name = NULL_TREE;
1961 tree and_tmp_name;
1962 gimple *and_stmt;
1963 tree ptrsize_zero;
1964 tree part_cond_expr;
1966 /* Check that mask is one less than a power of 2, i.e., mask is
1967 all zeros followed by all ones. */
1968 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
1970 int_ptrsize_type = signed_type_for (ptr_type_node);
1972 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
1973 of the first vector of the i'th data reference. */
1975 FOR_EACH_VEC_ELT (may_misalign_stmts, i, ref_stmt)
1977 gimple_seq new_stmt_list = NULL;
1978 tree addr_base;
1979 tree addr_tmp_name;
1980 tree new_or_tmp_name;
1981 gimple *addr_stmt, *or_stmt;
1982 stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
1983 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1984 bool negative = tree_int_cst_compare
1985 (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
1986 tree offset = negative
1987 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
1989 /* create: addr_tmp = (int)(address_of_first_vector) */
1990 addr_base =
1991 vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
1992 offset, loop);
1993 if (new_stmt_list != NULL)
1994 gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
1996 sprintf (tmp_name, "addr2int%d", i);
1997 addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
1998 addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base);
1999 gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
2001 /* The addresses are OR together. */
2003 if (or_tmp_name != NULL_TREE)
2005 /* create: or_tmp = or_tmp | addr_tmp */
2006 sprintf (tmp_name, "orptrs%d", i);
2007 new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2008 or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
2009 or_tmp_name, addr_tmp_name);
2010 gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
2011 or_tmp_name = new_or_tmp_name;
2013 else
2014 or_tmp_name = addr_tmp_name;
2016 } /* end for i */
2018 mask_cst = build_int_cst (int_ptrsize_type, mask);
2020 /* create: and_tmp = or_tmp & mask */
2021 and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
2023 and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR,
2024 or_tmp_name, mask_cst);
2025 gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
2027 /* Make and_tmp the left operand of the conditional test against zero.
2028 if and_tmp has a nonzero bit then some address is unaligned. */
2029 ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2030 part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
2031 and_tmp_name, ptrsize_zero);
2032 if (*cond_expr)
2033 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2034 *cond_expr, part_cond_expr);
2035 else
2036 *cond_expr = part_cond_expr;
2039 /* Given two data references and segment lengths described by DR_A and DR_B,
2040 create expression checking if the two addresses ranges intersect with
2041 each other based on index of the two addresses. This can only be done
2042 if DR_A and DR_B referring to the same (array) object and the index is
2043 the only difference. For example:
2045 DR_A DR_B
2046 data-ref arr[i] arr[j]
2047 base_object arr arr
2048 index {i_0, +, 1}_loop {j_0, +, 1}_loop
2050 The addresses and their index are like:
2052 |<- ADDR_A ->| |<- ADDR_B ->|
2053 ------------------------------------------------------->
2054 | | | | | | | | | |
2055 ------------------------------------------------------->
2056 i_0 ... i_0+4 j_0 ... j_0+4
2058 We can create expression based on index rather than address:
2060 (i_0 + 4 < j_0 || j_0 + 4 < i_0)
2062 Note evolution step of index needs to be considered in comparison. */
2064 static bool
2065 create_intersect_range_checks_index (loop_vec_info loop_vinfo, tree *cond_expr,
2066 const dr_with_seg_len& dr_a,
2067 const dr_with_seg_len& dr_b)
2069 if (integer_zerop (DR_STEP (dr_a.dr))
2070 || integer_zerop (DR_STEP (dr_b.dr))
2071 || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
2072 return false;
2074 if (!tree_fits_uhwi_p (dr_a.seg_len) || !tree_fits_uhwi_p (dr_b.seg_len))
2075 return false;
2077 if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
2078 return false;
2080 if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
2081 return false;
2083 if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
2084 return false;
2086 gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
2088 bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
2089 unsigned HOST_WIDE_INT abs_step
2090 = absu_hwi (tree_to_shwi (DR_STEP (dr_a.dr)));
2092 unsigned HOST_WIDE_INT seg_len1 = tree_to_uhwi (dr_a.seg_len);
2093 unsigned HOST_WIDE_INT seg_len2 = tree_to_uhwi (dr_b.seg_len);
2094 /* Infer the number of iterations with which the memory segment is accessed
2095 by DR. In other words, alias is checked if memory segment accessed by
2096 DR_A in some iterations intersect with memory segment accessed by DR_B
2097 in the same amount iterations.
2098 Note segnment length is a linear function of number of iterations with
2099 DR_STEP as the coefficient. */
2100 unsigned HOST_WIDE_INT niter_len1 = (seg_len1 + abs_step - 1) / abs_step;
2101 unsigned HOST_WIDE_INT niter_len2 = (seg_len2 + abs_step - 1) / abs_step;
2103 unsigned int i;
2104 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2105 for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
2107 tree access1 = DR_ACCESS_FN (dr_a.dr, i);
2108 tree access2 = DR_ACCESS_FN (dr_b.dr, i);
2109 /* Two indices must be the same if they are not scev, or not scev wrto
2110 current loop being vecorized. */
2111 if (TREE_CODE (access1) != POLYNOMIAL_CHREC
2112 || TREE_CODE (access2) != POLYNOMIAL_CHREC
2113 || CHREC_VARIABLE (access1) != (unsigned)loop->num
2114 || CHREC_VARIABLE (access2) != (unsigned)loop->num)
2116 if (operand_equal_p (access1, access2, 0))
2117 continue;
2119 return false;
2121 /* The two indices must have the same step. */
2122 if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
2123 return false;
2125 tree idx_step = CHREC_RIGHT (access1);
2126 /* Index must have const step, otherwise DR_STEP won't be constant. */
2127 gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
2128 /* Index must evaluate in the same direction as DR. */
2129 gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
2131 tree min1 = CHREC_LEFT (access1);
2132 tree min2 = CHREC_LEFT (access2);
2133 if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
2134 return false;
2136 /* Ideally, alias can be checked against loop's control IV, but we
2137 need to prove linear mapping between control IV and reference
2138 index. Although that should be true, we check against (array)
2139 index of data reference. Like segment length, index length is
2140 linear function of the number of iterations with index_step as
2141 the coefficient, i.e, niter_len * idx_step. */
2142 tree idx_len1 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
2143 build_int_cst (TREE_TYPE (min1),
2144 niter_len1));
2145 tree idx_len2 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
2146 build_int_cst (TREE_TYPE (min2),
2147 niter_len2));
2148 tree max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1), min1, idx_len1);
2149 tree max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2), min2, idx_len2);
2150 /* Adjust ranges for negative step. */
2151 if (neg_step)
2153 min1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1), max1, idx_step);
2154 max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1),
2155 CHREC_LEFT (access1), idx_step);
2156 min2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2), max2, idx_step);
2157 max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2),
2158 CHREC_LEFT (access2), idx_step);
2160 tree part_cond_expr
2161 = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2162 fold_build2 (LE_EXPR, boolean_type_node, max1, min2),
2163 fold_build2 (LE_EXPR, boolean_type_node, max2, min1));
2164 if (*cond_expr)
2165 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2166 *cond_expr, part_cond_expr);
2167 else
2168 *cond_expr = part_cond_expr;
2170 return true;
2173 /* Given two data references and segment lengths described by DR_A and DR_B,
2174 create expression checking if the two addresses ranges intersect with
2175 each other:
2177 ((DR_A_addr_0 + DR_A_segment_length_0) <= DR_B_addr_0)
2178 || (DR_B_addr_0 + DER_B_segment_length_0) <= DR_A_addr_0)) */
2180 static void
2181 create_intersect_range_checks (loop_vec_info loop_vinfo, tree *cond_expr,
2182 const dr_with_seg_len& dr_a,
2183 const dr_with_seg_len& dr_b)
2185 *cond_expr = NULL_TREE;
2186 if (create_intersect_range_checks_index (loop_vinfo, cond_expr, dr_a, dr_b))
2187 return;
2189 tree segment_length_a = dr_a.seg_len;
2190 tree segment_length_b = dr_b.seg_len;
2191 tree addr_base_a = DR_BASE_ADDRESS (dr_a.dr);
2192 tree addr_base_b = DR_BASE_ADDRESS (dr_b.dr);
2193 tree offset_a = DR_OFFSET (dr_a.dr), offset_b = DR_OFFSET (dr_b.dr);
2195 offset_a = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_a),
2196 offset_a, DR_INIT (dr_a.dr));
2197 offset_b = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_b),
2198 offset_b, DR_INIT (dr_b.dr));
2199 addr_base_a = fold_build_pointer_plus (addr_base_a, offset_a);
2200 addr_base_b = fold_build_pointer_plus (addr_base_b, offset_b);
2202 tree seg_a_min = addr_base_a;
2203 tree seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
2204 /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
2205 bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
2206 [a, a+12) */
2207 if (tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0)
2209 tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a.dr)));
2210 seg_a_min = fold_build_pointer_plus (seg_a_max, unit_size);
2211 seg_a_max = fold_build_pointer_plus (addr_base_a, unit_size);
2214 tree seg_b_min = addr_base_b;
2215 tree seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
2216 if (tree_int_cst_compare (DR_STEP (dr_b.dr), size_zero_node) < 0)
2218 tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b.dr)));
2219 seg_b_min = fold_build_pointer_plus (seg_b_max, unit_size);
2220 seg_b_max = fold_build_pointer_plus (addr_base_b, unit_size);
2222 *cond_expr
2223 = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2224 fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
2225 fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
2228 /* Function vect_create_cond_for_alias_checks.
2230 Create a conditional expression that represents the run-time checks for
2231 overlapping of address ranges represented by a list of data references
2232 relations passed as input.
2234 Input:
2235 COND_EXPR - input conditional expression. New conditions will be chained
2236 with logical AND operation. If it is NULL, then the function
2237 is used to return the number of alias checks.
2238 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2239 to be checked.
2241 Output:
2242 COND_EXPR - conditional expression.
2244 The returned COND_EXPR is the conditional expression to be used in the if
2245 statement that controls which version of the loop gets executed at runtime.
2248 void
2249 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
2251 vec<dr_with_seg_len_pair_t> comp_alias_ddrs =
2252 LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
2253 tree part_cond_expr;
2255 if (comp_alias_ddrs.is_empty ())
2256 return;
2258 for (size_t i = 0, s = comp_alias_ddrs.length (); i < s; ++i)
2260 const dr_with_seg_len& dr_a = comp_alias_ddrs[i].first;
2261 const dr_with_seg_len& dr_b = comp_alias_ddrs[i].second;
2263 if (dump_enabled_p ())
2265 dump_printf_loc (MSG_NOTE, vect_location,
2266 "create runtime check for data references ");
2267 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a.dr));
2268 dump_printf (MSG_NOTE, " and ");
2269 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b.dr));
2270 dump_printf (MSG_NOTE, "\n");
2273 /* Create condition expression for each pair data references. */
2274 create_intersect_range_checks (loop_vinfo, &part_cond_expr, dr_a, dr_b);
2275 if (*cond_expr)
2276 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2277 *cond_expr, part_cond_expr);
2278 else
2279 *cond_expr = part_cond_expr;
2282 if (dump_enabled_p ())
2283 dump_printf_loc (MSG_NOTE, vect_location,
2284 "created %u versioning for alias checks.\n",
2285 comp_alias_ddrs.length ());
2289 /* Function vect_loop_versioning.
2291 If the loop has data references that may or may not be aligned or/and
2292 has data reference relations whose independence was not proven then
2293 two versions of the loop need to be generated, one which is vectorized
2294 and one which isn't. A test is then generated to control which of the
2295 loops is executed. The test checks for the alignment of all of the
2296 data references that may or may not be aligned. An additional
2297 sequence of runtime tests is generated for each pairs of DDRs whose
2298 independence was not proven. The vectorized version of loop is
2299 executed only if both alias and alignment tests are passed.
2301 The test generated to check which version of loop is executed
2302 is modified to also check for profitability as indicated by the
2303 cost model threshold TH.
2305 The versioning precondition(s) are placed in *COND_EXPR and
2306 *COND_EXPR_STMT_LIST. */
2308 void
2309 vect_loop_versioning (loop_vec_info loop_vinfo,
2310 unsigned int th, bool check_profitability)
2312 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *nloop;
2313 struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
2314 basic_block condition_bb;
2315 gphi_iterator gsi;
2316 gimple_stmt_iterator cond_exp_gsi;
2317 basic_block merge_bb;
2318 basic_block new_exit_bb;
2319 edge new_exit_e, e;
2320 gphi *orig_phi, *new_phi;
2321 tree cond_expr = NULL_TREE;
2322 gimple_seq cond_expr_stmt_list = NULL;
2323 tree arg;
2324 unsigned prob = 4 * REG_BR_PROB_BASE / 5;
2325 gimple_seq gimplify_stmt_list = NULL;
2326 tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2327 bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
2328 bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
2329 bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo);
2331 if (check_profitability)
2332 cond_expr = fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
2333 build_int_cst (TREE_TYPE (scalar_loop_iters),
2334 th));
2336 if (version_niter)
2337 vect_create_cond_for_niters_checks (loop_vinfo, &cond_expr);
2339 if (cond_expr)
2340 cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
2341 is_gimple_condexpr, NULL_TREE);
2343 if (version_align)
2344 vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
2345 &cond_expr_stmt_list);
2347 if (version_alias)
2348 vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
2350 cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,
2351 is_gimple_condexpr, NULL_TREE);
2352 gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
2354 initialize_original_copy_tables ();
2355 if (scalar_loop)
2357 edge scalar_e;
2358 basic_block preheader, scalar_preheader;
2360 /* We don't want to scale SCALAR_LOOP's frequencies, we need to
2361 scale LOOP's frequencies instead. */
2362 nloop = loop_version (scalar_loop, cond_expr, &condition_bb,
2363 prob, REG_BR_PROB_BASE - prob,
2364 REG_BR_PROB_BASE, REG_BR_PROB_BASE - prob, true);
2365 scale_loop_frequencies (loop, prob, REG_BR_PROB_BASE);
2366 /* CONDITION_BB was created above SCALAR_LOOP's preheader,
2367 while we need to move it above LOOP's preheader. */
2368 e = loop_preheader_edge (loop);
2369 scalar_e = loop_preheader_edge (scalar_loop);
2370 gcc_assert (empty_block_p (e->src)
2371 && single_pred_p (e->src));
2372 gcc_assert (empty_block_p (scalar_e->src)
2373 && single_pred_p (scalar_e->src));
2374 gcc_assert (single_pred_p (condition_bb));
2375 preheader = e->src;
2376 scalar_preheader = scalar_e->src;
2377 scalar_e = find_edge (condition_bb, scalar_preheader);
2378 e = single_pred_edge (preheader);
2379 redirect_edge_and_branch_force (single_pred_edge (condition_bb),
2380 scalar_preheader);
2381 redirect_edge_and_branch_force (scalar_e, preheader);
2382 redirect_edge_and_branch_force (e, condition_bb);
2383 set_immediate_dominator (CDI_DOMINATORS, condition_bb,
2384 single_pred (condition_bb));
2385 set_immediate_dominator (CDI_DOMINATORS, scalar_preheader,
2386 single_pred (scalar_preheader));
2387 set_immediate_dominator (CDI_DOMINATORS, preheader,
2388 condition_bb);
2390 else
2391 nloop = loop_version (loop, cond_expr, &condition_bb,
2392 prob, REG_BR_PROB_BASE - prob,
2393 prob, REG_BR_PROB_BASE - prob, true);
2395 if (version_niter)
2397 /* The versioned loop could be infinite, we need to clear existing
2398 niter information which is copied from the original loop. */
2399 gcc_assert (loop_constraint_set_p (loop, LOOP_C_FINITE));
2400 vect_free_loop_info_assumptions (nloop);
2401 /* And set constraint LOOP_C_INFINITE for niter analyzer. */
2402 loop_constraint_set (loop, LOOP_C_INFINITE);
2405 if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
2406 && dump_enabled_p ())
2408 if (version_alias)
2409 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2410 "loop versioned for vectorization because of "
2411 "possible aliasing\n");
2412 if (version_align)
2413 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2414 "loop versioned for vectorization to enhance "
2415 "alignment\n");
2418 free_original_copy_tables ();
2420 /* Loop versioning violates an assumption we try to maintain during
2421 vectorization - that the loop exit block has a single predecessor.
2422 After versioning, the exit block of both loop versions is the same
2423 basic block (i.e. it has two predecessors). Just in order to simplify
2424 following transformations in the vectorizer, we fix this situation
2425 here by adding a new (empty) block on the exit-edge of the loop,
2426 with the proper loop-exit phis to maintain loop-closed-form.
2427 If loop versioning wasn't done from loop, but scalar_loop instead,
2428 merge_bb will have already just a single successor. */
2430 merge_bb = single_exit (loop)->dest;
2431 if (scalar_loop == NULL || EDGE_COUNT (merge_bb->preds) >= 2)
2433 gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
2434 new_exit_bb = split_edge (single_exit (loop));
2435 new_exit_e = single_exit (loop);
2436 e = EDGE_SUCC (new_exit_bb, 0);
2438 for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2440 tree new_res;
2441 orig_phi = gsi.phi ();
2442 new_res = copy_ssa_name (PHI_RESULT (orig_phi));
2443 new_phi = create_phi_node (new_res, new_exit_bb);
2444 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2445 add_phi_arg (new_phi, arg, new_exit_e,
2446 gimple_phi_arg_location_from_edge (orig_phi, e));
2447 adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
2451 /* End loop-exit-fixes after versioning. */
2453 if (cond_expr_stmt_list)
2455 cond_exp_gsi = gsi_last_bb (condition_bb);
2456 gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
2457 GSI_SAME_STMT);
2459 update_ssa (TODO_update_ssa);