PR c++/79143
[official-gcc.git] / gcc / tree-vect-loop-manip.c
blobf29449c37fdfc272dd2a8ab3ff027d0b229f0b73
1 /* Vectorizer Specific Loop Manipulations
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "cfghooks.h"
29 #include "tree-pass.h"
30 #include "ssa.h"
31 #include "fold-const.h"
32 #include "cfganal.h"
33 #include "gimplify.h"
34 #include "gimple-iterator.h"
35 #include "gimplify-me.h"
36 #include "tree-cfg.h"
37 #include "tree-ssa-loop-manip.h"
38 #include "tree-into-ssa.h"
39 #include "tree-ssa.h"
40 #include "cfgloop.h"
41 #include "tree-scalar-evolution.h"
42 #include "tree-vectorizer.h"
43 #include "tree-ssa-loop-ivopts.h"
45 /*************************************************************************
46 Simple Loop Peeling Utilities
48 Utilities to support loop peeling for vectorization purposes.
49 *************************************************************************/
52 /* Renames the use *OP_P. */
54 static void
55 rename_use_op (use_operand_p op_p)
57 tree new_name;
59 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
60 return;
62 new_name = get_current_def (USE_FROM_PTR (op_p));
64 /* Something defined outside of the loop. */
65 if (!new_name)
66 return;
68 /* An ordinary ssa name defined in the loop. */
70 SET_USE (op_p, new_name);
74 /* Renames the variables in basic block BB. Allow renaming of PHI arguments
75 on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
76 true. */
78 static void
79 rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop)
81 gimple *stmt;
82 use_operand_p use_p;
83 ssa_op_iter iter;
84 edge e;
85 edge_iterator ei;
86 struct loop *loop = bb->loop_father;
87 struct loop *outer_loop = NULL;
89 if (rename_from_outer_loop)
91 gcc_assert (loop);
92 outer_loop = loop_outer (loop);
95 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
96 gsi_next (&gsi))
98 stmt = gsi_stmt (gsi);
99 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
100 rename_use_op (use_p);
103 FOR_EACH_EDGE (e, ei, bb->preds)
105 if (!flow_bb_inside_loop_p (loop, e->src))
107 if (!rename_from_outer_loop)
108 continue;
109 if (e->src != outer_loop->header)
111 if (outer_loop->inner->next)
113 /* If outer_loop has 2 inner loops, allow there to
114 be an extra basic block which decides which of the
115 two loops to use using LOOP_VECTORIZED. */
116 if (!single_pred_p (e->src)
117 || single_pred (e->src) != outer_loop->header)
118 continue;
120 else
121 continue;
124 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
125 gsi_next (&gsi))
126 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
131 struct adjust_info
133 tree from, to;
134 basic_block bb;
137 /* A stack of values to be adjusted in debug stmts. We have to
138 process them LIFO, so that the closest substitution applies. If we
139 processed them FIFO, without the stack, we might substitute uses
140 with a PHI DEF that would soon become non-dominant, and when we got
141 to the suitable one, it wouldn't have anything to substitute any
142 more. */
143 static vec<adjust_info, va_heap> adjust_vec;
145 /* Adjust any debug stmts that referenced AI->from values to use the
146 loop-closed AI->to, if the references are dominated by AI->bb and
147 not by the definition of AI->from. */
149 static void
150 adjust_debug_stmts_now (adjust_info *ai)
152 basic_block bbphi = ai->bb;
153 tree orig_def = ai->from;
154 tree new_def = ai->to;
155 imm_use_iterator imm_iter;
156 gimple *stmt;
157 basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
159 gcc_assert (dom_info_available_p (CDI_DOMINATORS));
161 /* Adjust any debug stmts that held onto non-loop-closed
162 references. */
163 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
165 use_operand_p use_p;
166 basic_block bbuse;
168 if (!is_gimple_debug (stmt))
169 continue;
171 gcc_assert (gimple_debug_bind_p (stmt));
173 bbuse = gimple_bb (stmt);
175 if ((bbuse == bbphi
176 || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
177 && !(bbuse == bbdef
178 || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
180 if (new_def)
181 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
182 SET_USE (use_p, new_def);
183 else
185 gimple_debug_bind_reset_value (stmt);
186 update_stmt (stmt);
192 /* Adjust debug stmts as scheduled before. */
194 static void
195 adjust_vec_debug_stmts (void)
197 if (!MAY_HAVE_DEBUG_STMTS)
198 return;
200 gcc_assert (adjust_vec.exists ());
202 while (!adjust_vec.is_empty ())
204 adjust_debug_stmts_now (&adjust_vec.last ());
205 adjust_vec.pop ();
209 /* Adjust any debug stmts that referenced FROM values to use the
210 loop-closed TO, if the references are dominated by BB and not by
211 the definition of FROM. If adjust_vec is non-NULL, adjustments
212 will be postponed until adjust_vec_debug_stmts is called. */
214 static void
215 adjust_debug_stmts (tree from, tree to, basic_block bb)
217 adjust_info ai;
219 if (MAY_HAVE_DEBUG_STMTS
220 && TREE_CODE (from) == SSA_NAME
221 && ! SSA_NAME_IS_DEFAULT_DEF (from)
222 && ! virtual_operand_p (from))
224 ai.from = from;
225 ai.to = to;
226 ai.bb = bb;
228 if (adjust_vec.exists ())
229 adjust_vec.safe_push (ai);
230 else
231 adjust_debug_stmts_now (&ai);
235 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
236 to adjust any debug stmts that referenced the old phi arg,
237 presumably non-loop-closed references left over from other
238 transformations. */
240 static void
241 adjust_phi_and_debug_stmts (gimple *update_phi, edge e, tree new_def)
243 tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
245 SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
247 if (MAY_HAVE_DEBUG_STMTS)
248 adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
249 gimple_bb (update_phi));
252 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
253 that starts at zero, increases by one and its limit is NITERS.
255 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
257 void
258 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
260 tree indx_before_incr, indx_after_incr;
261 gcond *cond_stmt;
262 gcond *orig_cond;
263 edge exit_edge = single_exit (loop);
264 gimple_stmt_iterator loop_cond_gsi;
265 gimple_stmt_iterator incr_gsi;
266 bool insert_after;
267 tree init = build_int_cst (TREE_TYPE (niters), 0);
268 tree step = build_int_cst (TREE_TYPE (niters), 1);
269 source_location loop_loc;
270 enum tree_code code;
272 orig_cond = get_loop_exit_condition (loop);
273 gcc_assert (orig_cond);
274 loop_cond_gsi = gsi_for_stmt (orig_cond);
276 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
277 create_iv (init, step, NULL_TREE, loop,
278 &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
280 indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
281 true, NULL_TREE, true,
282 GSI_SAME_STMT);
283 niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
284 true, GSI_SAME_STMT);
286 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
287 cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
288 NULL_TREE);
290 gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
292 /* Remove old loop exit test: */
293 gsi_remove (&loop_cond_gsi, true);
294 free_stmt_vec_info (orig_cond);
296 loop_loc = find_loop_location (loop);
297 if (dump_enabled_p ())
299 if (LOCATION_LOCUS (loop_loc) != UNKNOWN_LOCATION)
300 dump_printf (MSG_NOTE, "\nloop at %s:%d: ", LOCATION_FILE (loop_loc),
301 LOCATION_LINE (loop_loc));
302 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, cond_stmt, 0);
305 /* Record the number of latch iterations. */
306 loop->nb_iterations = fold_build2 (MINUS_EXPR, TREE_TYPE (niters), niters,
307 build_int_cst (TREE_TYPE (niters), 1));
310 /* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg.
311 For all PHI arguments in FROM->dest and TO->dest from those
312 edges ensure that TO->dest PHI arguments have current_def
313 to that in from. */
315 static void
316 slpeel_duplicate_current_defs_from_edges (edge from, edge to)
318 gimple_stmt_iterator gsi_from, gsi_to;
320 for (gsi_from = gsi_start_phis (from->dest),
321 gsi_to = gsi_start_phis (to->dest);
322 !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);)
324 gimple *from_phi = gsi_stmt (gsi_from);
325 gimple *to_phi = gsi_stmt (gsi_to);
326 tree from_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, from);
327 tree to_arg = PHI_ARG_DEF_FROM_EDGE (to_phi, to);
328 if (virtual_operand_p (from_arg))
330 gsi_next (&gsi_from);
331 continue;
333 if (virtual_operand_p (to_arg))
335 gsi_next (&gsi_to);
336 continue;
338 if (TREE_CODE (from_arg) != SSA_NAME)
339 gcc_assert (operand_equal_p (from_arg, to_arg, 0));
340 else
342 if (get_current_def (to_arg) == NULL_TREE)
343 set_current_def (to_arg, get_current_def (from_arg));
345 gsi_next (&gsi_from);
346 gsi_next (&gsi_to);
349 gphi *from_phi = get_virtual_phi (from->dest);
350 gphi *to_phi = get_virtual_phi (to->dest);
351 if (from_phi)
352 set_current_def (PHI_ARG_DEF_FROM_EDGE (to_phi, to),
353 get_current_def (PHI_ARG_DEF_FROM_EDGE (from_phi, from)));
357 /* Given LOOP this function generates a new copy of it and puts it
358 on E which is either the entry or exit of LOOP. If SCALAR_LOOP is
359 non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
360 basic blocks from SCALAR_LOOP instead of LOOP, but to either the
361 entry or exit of LOOP. */
363 struct loop *
364 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop,
365 struct loop *scalar_loop, edge e)
367 struct loop *new_loop;
368 basic_block *new_bbs, *bbs, *pbbs;
369 bool at_exit;
370 bool was_imm_dom;
371 basic_block exit_dest;
372 edge exit, new_exit;
373 bool duplicate_outer_loop = false;
375 exit = single_exit (loop);
376 at_exit = (e == exit);
377 if (!at_exit && e != loop_preheader_edge (loop))
378 return NULL;
380 if (scalar_loop == NULL)
381 scalar_loop = loop;
383 bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
384 pbbs = bbs + 1;
385 get_loop_body_with_size (scalar_loop, pbbs, scalar_loop->num_nodes);
386 /* Allow duplication of outer loops. */
387 if (scalar_loop->inner)
388 duplicate_outer_loop = true;
389 /* Check whether duplication is possible. */
390 if (!can_copy_bbs_p (pbbs, scalar_loop->num_nodes))
392 free (bbs);
393 return NULL;
396 /* Generate new loop structure. */
397 new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
398 duplicate_subloops (scalar_loop, new_loop);
400 exit_dest = exit->dest;
401 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
402 exit_dest) == loop->header ?
403 true : false);
405 /* Also copy the pre-header, this avoids jumping through hoops to
406 duplicate the loop entry PHI arguments. Create an empty
407 pre-header unconditionally for this. */
408 basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
409 edge entry_e = single_pred_edge (preheader);
410 bbs[0] = preheader;
411 new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
413 exit = single_exit (scalar_loop);
414 copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
415 &exit, 1, &new_exit, NULL,
416 at_exit ? loop->latch : e->src, true);
417 exit = single_exit (loop);
418 basic_block new_preheader = new_bbs[0];
420 add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
422 if (scalar_loop != loop)
424 /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
425 SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop,
426 but LOOP will not. slpeel_update_phi_nodes_for_guard{1,2} expects
427 the LOOP SSA_NAMEs (on the exit edge and edge from latch to
428 header) to have current_def set, so copy them over. */
429 slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop),
430 exit);
431 slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop->latch,
433 EDGE_SUCC (loop->latch, 0));
436 if (at_exit) /* Add the loop copy at exit. */
438 if (scalar_loop != loop)
440 gphi_iterator gsi;
441 new_exit = redirect_edge_and_branch (new_exit, exit_dest);
443 for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi);
444 gsi_next (&gsi))
446 gphi *phi = gsi.phi ();
447 tree orig_arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
448 location_t orig_locus
449 = gimple_phi_arg_location_from_edge (phi, e);
451 add_phi_arg (phi, orig_arg, new_exit, orig_locus);
454 redirect_edge_and_branch_force (e, new_preheader);
455 flush_pending_stmts (e);
456 set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
457 if (was_imm_dom || duplicate_outer_loop)
458 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
460 /* And remove the non-necessary forwarder again. Keep the other
461 one so we have a proper pre-header for the loop at the exit edge. */
462 redirect_edge_pred (single_succ_edge (preheader),
463 single_pred (preheader));
464 delete_basic_block (preheader);
465 set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
466 loop_preheader_edge (scalar_loop)->src);
468 else /* Add the copy at entry. */
470 if (scalar_loop != loop)
472 /* Remove the non-necessary forwarder of scalar_loop again. */
473 redirect_edge_pred (single_succ_edge (preheader),
474 single_pred (preheader));
475 delete_basic_block (preheader);
476 set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
477 loop_preheader_edge (scalar_loop)->src);
478 preheader = split_edge (loop_preheader_edge (loop));
479 entry_e = single_pred_edge (preheader);
482 redirect_edge_and_branch_force (entry_e, new_preheader);
483 flush_pending_stmts (entry_e);
484 set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
486 redirect_edge_and_branch_force (new_exit, preheader);
487 flush_pending_stmts (new_exit);
488 set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
490 /* And remove the non-necessary forwarder again. Keep the other
491 one so we have a proper pre-header for the loop at the exit edge. */
492 redirect_edge_pred (single_succ_edge (new_preheader),
493 single_pred (new_preheader));
494 delete_basic_block (new_preheader);
495 set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
496 loop_preheader_edge (new_loop)->src);
499 for (unsigned i = 0; i < scalar_loop->num_nodes + 1; i++)
500 rename_variables_in_bb (new_bbs[i], duplicate_outer_loop);
502 if (scalar_loop != loop)
504 /* Update new_loop->header PHIs, so that on the preheader
505 edge they are the ones from loop rather than scalar_loop. */
506 gphi_iterator gsi_orig, gsi_new;
507 edge orig_e = loop_preheader_edge (loop);
508 edge new_e = loop_preheader_edge (new_loop);
510 for (gsi_orig = gsi_start_phis (loop->header),
511 gsi_new = gsi_start_phis (new_loop->header);
512 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_new);
513 gsi_next (&gsi_orig), gsi_next (&gsi_new))
515 gphi *orig_phi = gsi_orig.phi ();
516 gphi *new_phi = gsi_new.phi ();
517 tree orig_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
518 location_t orig_locus
519 = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
521 add_phi_arg (new_phi, orig_arg, new_e, orig_locus);
525 free (new_bbs);
526 free (bbs);
528 checking_verify_dominators (CDI_DOMINATORS);
530 return new_loop;
534 /* Given the condition expression COND, put it as the last statement of
535 GUARD_BB; set both edges' probability; set dominator of GUARD_TO to
536 DOM_BB; return the skip edge. GUARD_TO is the target basic block to
537 skip the loop. PROBABILITY is the skip edge's probability. */
539 static edge
540 slpeel_add_loop_guard (basic_block guard_bb, tree cond,
541 basic_block guard_to, basic_block dom_bb,
542 int probability)
544 gimple_stmt_iterator gsi;
545 edge new_e, enter_e;
546 gcond *cond_stmt;
547 gimple_seq gimplify_stmt_list = NULL;
549 enter_e = EDGE_SUCC (guard_bb, 0);
550 enter_e->flags &= ~EDGE_FALLTHRU;
551 enter_e->flags |= EDGE_FALSE_VALUE;
552 gsi = gsi_last_bb (guard_bb);
554 cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
555 NULL_TREE);
556 if (gimplify_stmt_list)
557 gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
559 cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
560 gsi = gsi_last_bb (guard_bb);
561 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
563 /* Add new edge to connect guard block to the merge/loop-exit block. */
564 new_e = make_edge (guard_bb, guard_to, EDGE_TRUE_VALUE);
566 new_e->count = guard_bb->count;
567 new_e->probability = probability;
568 new_e->count = apply_probability (enter_e->count, probability);
569 enter_e->count -= new_e->count;
570 enter_e->probability = inverse_probability (probability);
571 set_immediate_dominator (CDI_DOMINATORS, guard_to, dom_bb);
572 return new_e;
576 /* This function verifies that the following restrictions apply to LOOP:
577 (1) it consists of exactly 2 basic blocks - header, and an empty latch
578 for innermost loop and 5 basic blocks for outer-loop.
579 (2) it is single entry, single exit
580 (3) its exit condition is the last stmt in the header
581 (4) E is the entry/exit edge of LOOP.
584 bool
585 slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
587 edge exit_e = single_exit (loop);
588 edge entry_e = loop_preheader_edge (loop);
589 gcond *orig_cond = get_loop_exit_condition (loop);
590 gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
591 unsigned int num_bb = loop->inner? 5 : 2;
593 /* All loops have an outer scope; the only case loop->outer is NULL is for
594 the function itself. */
595 if (!loop_outer (loop)
596 || loop->num_nodes != num_bb
597 || !empty_block_p (loop->latch)
598 || !single_exit (loop)
599 /* Verify that new loop exit condition can be trivially modified. */
600 || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
601 || (e != exit_e && e != entry_e))
602 return false;
604 return true;
607 /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
608 in the exit bb and rename all the uses after the loop. This simplifies
609 the *guard[12] routines, which assume loop closed SSA form for all PHIs
610 (but normally loop closed SSA form doesn't require virtual PHIs to be
611 in the same form). Doing this early simplifies the checking what
612 uses should be renamed. */
614 static void
615 create_lcssa_for_virtual_phi (struct loop *loop)
617 gphi_iterator gsi;
618 edge exit_e = single_exit (loop);
620 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
621 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
623 gphi *phi = gsi.phi ();
624 for (gsi = gsi_start_phis (exit_e->dest);
625 !gsi_end_p (gsi); gsi_next (&gsi))
626 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
627 break;
628 if (gsi_end_p (gsi))
630 tree new_vop = copy_ssa_name (PHI_RESULT (phi));
631 gphi *new_phi = create_phi_node (new_vop, exit_e->dest);
632 tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
633 imm_use_iterator imm_iter;
634 gimple *stmt;
635 use_operand_p use_p;
637 add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
638 gimple_phi_set_result (new_phi, new_vop);
639 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
640 if (stmt != new_phi
641 && !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
642 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
643 SET_USE (use_p, new_vop);
645 break;
650 /* Function vect_get_loop_location.
652 Extract the location of the loop in the source code.
653 If the loop is not well formed for vectorization, an estimated
654 location is calculated.
655 Return the loop location if succeed and NULL if not. */
657 source_location
658 find_loop_location (struct loop *loop)
660 gimple *stmt = NULL;
661 basic_block bb;
662 gimple_stmt_iterator si;
664 if (!loop)
665 return UNKNOWN_LOCATION;
667 stmt = get_loop_exit_condition (loop);
669 if (stmt
670 && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
671 return gimple_location (stmt);
673 /* If we got here the loop is probably not "well formed",
674 try to estimate the loop location */
676 if (!loop->header)
677 return UNKNOWN_LOCATION;
679 bb = loop->header;
681 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
683 stmt = gsi_stmt (si);
684 if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
685 return gimple_location (stmt);
688 return UNKNOWN_LOCATION;
691 /* Return true if PHI defines an IV of the loop to be vectorized. */
693 static bool
694 iv_phi_p (gphi *phi)
696 if (virtual_operand_p (PHI_RESULT (phi)))
697 return false;
699 stmt_vec_info stmt_info = vinfo_for_stmt (phi);
700 gcc_assert (stmt_info != NULL);
701 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
702 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
703 return false;
705 return true;
708 /* Function vect_can_advance_ivs_p
710 In case the number of iterations that LOOP iterates is unknown at compile
711 time, an epilog loop will be generated, and the loop induction variables
712 (IVs) will be "advanced" to the value they are supposed to take just before
713 the epilog loop. Here we check that the access function of the loop IVs
714 and the expression that represents the loop bound are simple enough.
715 These restrictions will be relaxed in the future. */
717 bool
718 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
720 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
721 basic_block bb = loop->header;
722 gphi_iterator gsi;
724 /* Analyze phi functions of the loop header. */
726 if (dump_enabled_p ())
727 dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
728 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
730 tree evolution_part;
732 gphi *phi = gsi.phi ();
733 if (dump_enabled_p ())
735 dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
736 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
739 /* Skip virtual phi's. The data dependences that are associated with
740 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.
742 Skip reduction phis. */
743 if (!iv_phi_p (phi))
745 if (dump_enabled_p ())
746 dump_printf_loc (MSG_NOTE, vect_location,
747 "reduc or virtual phi. skip.\n");
748 continue;
751 /* Analyze the evolution function. */
753 evolution_part
754 = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
755 if (evolution_part == NULL_TREE)
757 if (dump_enabled_p ())
758 dump_printf (MSG_MISSED_OPTIMIZATION,
759 "No access function or evolution.\n");
760 return false;
763 /* FORNOW: We do not transform initial conditions of IVs
764 which evolution functions are not invariants in the loop. */
766 if (!expr_invariant_in_loop_p (loop, evolution_part))
768 if (dump_enabled_p ())
769 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
770 "evolution not invariant in loop.\n");
771 return false;
774 /* FORNOW: We do not transform initial conditions of IVs
775 which evolution functions are a polynomial of degree >= 2. */
777 if (tree_is_chrec (evolution_part))
779 if (dump_enabled_p ())
780 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
781 "evolution is chrec.\n");
782 return false;
786 return true;
790 /* Function vect_update_ivs_after_vectorizer.
792 "Advance" the induction variables of LOOP to the value they should take
793 after the execution of LOOP. This is currently necessary because the
794 vectorizer does not handle induction variables that are used after the
795 loop. Such a situation occurs when the last iterations of LOOP are
796 peeled, because:
797 1. We introduced new uses after LOOP for IVs that were not originally used
798 after LOOP: the IVs of LOOP are now used by an epilog loop.
799 2. LOOP is going to be vectorized; this means that it will iterate N/VF
800 times, whereas the loop IVs should be bumped N times.
802 Input:
803 - LOOP - a loop that is going to be vectorized. The last few iterations
804 of LOOP were peeled.
805 - NITERS - the number of iterations that LOOP executes (before it is
806 vectorized). i.e, the number of times the ivs should be bumped.
807 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
808 coming out from LOOP on which there are uses of the LOOP ivs
809 (this is the path from LOOP->exit to epilog_loop->preheader).
811 The new definitions of the ivs are placed in LOOP->exit.
812 The phi args associated with the edge UPDATE_E in the bb
813 UPDATE_E->dest are updated accordingly.
815 Assumption 1: Like the rest of the vectorizer, this function assumes
816 a single loop exit that has a single predecessor.
818 Assumption 2: The phi nodes in the LOOP header and in update_bb are
819 organized in the same order.
821 Assumption 3: The access function of the ivs is simple enough (see
822 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
824 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
825 coming out of LOOP on which the ivs of LOOP are used (this is the path
826 that leads to the epilog loop; other paths skip the epilog loop). This
827 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
828 needs to have its phis updated.
831 static void
832 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo,
833 tree niters, edge update_e)
835 gphi_iterator gsi, gsi1;
836 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
837 basic_block update_bb = update_e->dest;
838 basic_block exit_bb = single_exit (loop)->dest;
840 /* Make sure there exists a single-predecessor exit bb: */
841 gcc_assert (single_pred_p (exit_bb));
842 gcc_assert (single_succ_edge (exit_bb) == update_e);
844 for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
845 !gsi_end_p (gsi) && !gsi_end_p (gsi1);
846 gsi_next (&gsi), gsi_next (&gsi1))
848 tree init_expr;
849 tree step_expr, off;
850 tree type;
851 tree var, ni, ni_name;
852 gimple_stmt_iterator last_gsi;
854 gphi *phi = gsi.phi ();
855 gphi *phi1 = gsi1.phi ();
856 if (dump_enabled_p ())
858 dump_printf_loc (MSG_NOTE, vect_location,
859 "vect_update_ivs_after_vectorizer: phi: ");
860 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
863 /* Skip reduction and virtual phis. */
864 if (!iv_phi_p (phi))
866 if (dump_enabled_p ())
867 dump_printf_loc (MSG_NOTE, vect_location,
868 "reduc or virtual phi. skip.\n");
869 continue;
872 type = TREE_TYPE (gimple_phi_result (phi));
873 step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
874 step_expr = unshare_expr (step_expr);
876 /* FORNOW: We do not support IVs whose evolution function is a polynomial
877 of degree >= 2 or exponential. */
878 gcc_assert (!tree_is_chrec (step_expr));
880 init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
882 off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
883 fold_convert (TREE_TYPE (step_expr), niters),
884 step_expr);
885 if (POINTER_TYPE_P (type))
886 ni = fold_build_pointer_plus (init_expr, off);
887 else
888 ni = fold_build2 (PLUS_EXPR, type,
889 init_expr, fold_convert (type, off));
891 var = create_tmp_var (type, "tmp");
893 last_gsi = gsi_last_bb (exit_bb);
894 gimple_seq new_stmts = NULL;
895 ni_name = force_gimple_operand (ni, &new_stmts, false, var);
896 /* Exit_bb shouldn't be empty. */
897 if (!gsi_end_p (last_gsi))
898 gsi_insert_seq_after (&last_gsi, new_stmts, GSI_SAME_STMT);
899 else
900 gsi_insert_seq_before (&last_gsi, new_stmts, GSI_SAME_STMT);
902 /* Fix phi expressions in the successor bb. */
903 adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
907 /* Function vect_gen_prolog_loop_niters
909 Generate the number of iterations which should be peeled as prolog for the
910 loop represented by LOOP_VINFO. It is calculated as the misalignment of
911 DR - the data reference recorded in LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).
912 As a result, after the execution of this loop, the data reference DR will
913 refer to an aligned location. The following computation is generated:
915 If the misalignment of DR is known at compile time:
916 addr_mis = int mis = DR_MISALIGNMENT (dr);
917 Else, compute address misalignment in bytes:
918 addr_mis = addr & (vectype_align - 1)
920 prolog_niters = ((VF - addr_mis/elem_size)&(VF-1))/step
922 (elem_size = element type size; an element is the scalar element whose type
923 is the inner type of the vectype)
925 The computations will be emitted at the end of BB. We also compute and
926 store upper bound (included) of the result in BOUND.
928 When the step of the data-ref in the loop is not 1 (as in interleaved data
929 and SLP), the number of iterations of the prolog must be divided by the step
930 (which is equal to the size of interleaved group).
932 The above formulas assume that VF == number of elements in the vector. This
933 may not hold when there are multiple-types in the loop.
934 In this case, for some data-references in the loop the VF does not represent
935 the number of elements that fit in the vector. Therefore, instead of VF we
936 use TYPE_VECTOR_SUBPARTS. */
938 static tree
939 vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
940 basic_block bb, int *bound)
942 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
943 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
944 tree var;
945 tree niters_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
946 gimple_seq stmts = NULL, new_stmts = NULL;
947 tree iters, iters_name;
948 gimple *dr_stmt = DR_STMT (dr);
949 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
950 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
951 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
952 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
954 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
956 int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
958 if (dump_enabled_p ())
959 dump_printf_loc (MSG_NOTE, vect_location,
960 "known peeling = %d.\n", npeel);
962 iters = build_int_cst (niters_type, npeel);
963 *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
965 else
967 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
968 tree offset = negative
969 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
970 tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
971 &stmts, offset, loop);
972 tree type = unsigned_type_for (TREE_TYPE (start_addr));
973 tree vectype_align_minus_1 = build_int_cst (type, vectype_align - 1);
974 HOST_WIDE_INT elem_size =
975 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
976 tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
977 tree nelements_minus_1 = build_int_cst (type, nelements - 1);
978 tree nelements_tree = build_int_cst (type, nelements);
979 tree byte_misalign;
980 tree elem_misalign;
982 /* Create: byte_misalign = addr & (vectype_align - 1) */
983 byte_misalign =
984 fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
985 vectype_align_minus_1);
987 /* Create: elem_misalign = byte_misalign / element_size */
988 elem_misalign =
989 fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
991 /* Create: (niters_type) (nelements - elem_misalign)&(nelements - 1) */
992 if (negative)
993 iters = fold_build2 (MINUS_EXPR, type, elem_misalign, nelements_tree);
994 else
995 iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
996 iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
997 iters = fold_convert (niters_type, iters);
998 *bound = nelements - 1;
1001 if (dump_enabled_p ())
1003 dump_printf_loc (MSG_NOTE, vect_location,
1004 "niters for prolog loop: ");
1005 dump_generic_expr (MSG_NOTE, TDF_SLIM, iters);
1006 dump_printf (MSG_NOTE, "\n");
1009 var = create_tmp_var (niters_type, "prolog_loop_niters");
1010 iters_name = force_gimple_operand (iters, &new_stmts, false, var);
1012 if (new_stmts)
1013 gimple_seq_add_seq (&stmts, new_stmts);
1014 if (stmts)
1016 gcc_assert (single_succ_p (bb));
1017 gimple_stmt_iterator gsi = gsi_last_bb (bb);
1018 if (gsi_end_p (gsi))
1019 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1020 else
1021 gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
1023 return iters_name;
1027 /* Function vect_update_init_of_dr
1029 NITERS iterations were peeled from LOOP. DR represents a data reference
1030 in LOOP. This function updates the information recorded in DR to
1031 account for the fact that the first NITERS iterations had already been
1032 executed. Specifically, it updates the OFFSET field of DR. */
1034 static void
1035 vect_update_init_of_dr (struct data_reference *dr, tree niters)
1037 tree offset = DR_OFFSET (dr);
1039 niters = fold_build2 (MULT_EXPR, sizetype,
1040 fold_convert (sizetype, niters),
1041 fold_convert (sizetype, DR_STEP (dr)));
1042 offset = fold_build2 (PLUS_EXPR, sizetype,
1043 fold_convert (sizetype, offset), niters);
1044 DR_OFFSET (dr) = offset;
1048 /* Function vect_update_inits_of_drs
1050 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1051 This function updates the information recorded for the data references in
1052 the loop to account for the fact that the first NITERS iterations had
1053 already been executed. Specifically, it updates the initial_condition of
1054 the access_function of all the data_references in the loop. */
1056 static void
1057 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
1059 unsigned int i;
1060 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1061 struct data_reference *dr;
1063 if (dump_enabled_p ())
1064 dump_printf_loc (MSG_NOTE, vect_location,
1065 "=== vect_update_inits_of_dr ===\n");
1067 /* Adjust niters to sizetype and insert stmts on loop preheader edge. */
1068 if (!types_compatible_p (sizetype, TREE_TYPE (niters)))
1070 gimple_seq seq;
1071 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1072 tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
1074 niters = fold_convert (sizetype, niters);
1075 niters = force_gimple_operand (niters, &seq, false, var);
1076 if (seq)
1078 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1079 gcc_assert (!new_bb);
1083 FOR_EACH_VEC_ELT (datarefs, i, dr)
1084 vect_update_init_of_dr (dr, niters);
1088 /* This function builds ni_name = number of iterations. Statements
1089 are emitted on the loop preheader edge. */
1091 tree
1092 vect_build_loop_niters (loop_vec_info loop_vinfo)
1094 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1095 if (TREE_CODE (ni) == INTEGER_CST)
1096 return ni;
1097 else
1099 tree ni_name, var;
1100 gimple_seq stmts = NULL;
1101 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1103 var = create_tmp_var (TREE_TYPE (ni), "niters");
1104 ni_name = force_gimple_operand (ni, &stmts, false, var);
1105 if (stmts)
1106 gsi_insert_seq_on_edge_immediate (pe, stmts);
1108 return ni_name;
1112 /* Calculate the number of iterations above which vectorized loop will be
1113 preferred than scalar loop. NITERS_PROLOG is the number of iterations
1114 of prolog loop. If it's integer const, the integer number is also passed
1115 in INT_NITERS_PROLOG. BOUND_PROLOG is the upper bound (included) of
1116 number of iterations of prolog loop. VFM1 is vector factor minus one.
1117 If CHECK_PROFITABILITY is true, TH is the threshold below which scalar
1118 (rather than vectorized) loop will be executed. This function stores
1119 upper bound (included) of the result in BOUND_SCALAR. */
1121 static tree
1122 vect_gen_scalar_loop_niters (tree niters_prolog, int int_niters_prolog,
1123 int bound_prolog, int vfm1, int th,
1124 int *bound_scalar, bool check_profitability)
1126 tree type = TREE_TYPE (niters_prolog);
1127 tree niters = fold_build2 (PLUS_EXPR, type, niters_prolog,
1128 build_int_cst (type, vfm1));
1130 *bound_scalar = vfm1 + bound_prolog;
1131 if (check_profitability)
1133 /* TH indicates the minimum niters of vectorized loop, while we
1134 compute the maximum niters of scalar loop. */
1135 th--;
1136 /* Peeling for constant times. */
1137 if (int_niters_prolog >= 0)
1139 *bound_scalar = (int_niters_prolog + vfm1 < th
1140 ? th
1141 : vfm1 + int_niters_prolog);
1142 return build_int_cst (type, *bound_scalar);
1144 /* Peeling for unknown times. Note BOUND_PROLOG is the upper
1145 bound (inlcuded) of niters of prolog loop. */
1146 if (th >= vfm1 + bound_prolog)
1148 *bound_scalar = th;
1149 return build_int_cst (type, th);
1151 /* Need to do runtime comparison, but BOUND_SCALAR remains the same. */
1152 else if (th > vfm1)
1153 return fold_build2 (MAX_EXPR, type, build_int_cst (type, th), niters);
1155 return niters;
1158 /* This function generates the following statements:
1160 niters = number of iterations loop executes (after peeling)
1161 niters_vector = niters / vf
1163 and places them on the loop preheader edge. NITERS_NO_OVERFLOW is
1164 true if NITERS doesn't overflow. */
1166 void
1167 vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters,
1168 tree *niters_vector_ptr, bool niters_no_overflow)
1170 tree ni_minus_gap, var;
1171 tree niters_vector;
1172 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1173 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1174 tree log_vf = build_int_cst (TREE_TYPE (niters), exact_log2 (vf));
1176 /* If epilogue loop is required because of data accesses with gaps, we
1177 subtract one iteration from the total number of iterations here for
1178 correct calculation of RATIO. */
1179 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1181 ni_minus_gap = fold_build2 (MINUS_EXPR, TREE_TYPE (niters),
1182 niters,
1183 build_one_cst (TREE_TYPE (niters)));
1184 if (!is_gimple_val (ni_minus_gap))
1186 var = create_tmp_var (TREE_TYPE (niters), "ni_gap");
1187 gimple *stmts = NULL;
1188 ni_minus_gap = force_gimple_operand (ni_minus_gap, &stmts,
1189 true, var);
1190 gsi_insert_seq_on_edge_immediate (pe, stmts);
1193 else
1194 ni_minus_gap = niters;
1196 /* Create: niters >> log2(vf) */
1197 /* If it's known that niters == number of latch executions + 1 doesn't
1198 overflow, we can generate niters >> log2(vf); otherwise we generate
1199 (niters - vf) >> log2(vf) + 1 by using the fact that we know ratio
1200 will be at least one. */
1201 if (niters_no_overflow)
1202 niters_vector = fold_build2 (RSHIFT_EXPR, TREE_TYPE (niters),
1203 ni_minus_gap, log_vf);
1204 else
1205 niters_vector
1206 = fold_build2 (PLUS_EXPR, TREE_TYPE (niters),
1207 fold_build2 (RSHIFT_EXPR, TREE_TYPE (niters),
1208 fold_build2 (MINUS_EXPR, TREE_TYPE (niters),
1209 ni_minus_gap,
1210 build_int_cst
1211 (TREE_TYPE (niters), vf)),
1212 log_vf),
1213 build_int_cst (TREE_TYPE (niters), 1));
1215 if (!is_gimple_val (niters_vector))
1217 var = create_tmp_var (TREE_TYPE (niters), "bnd");
1218 gimple *stmts = NULL;
1219 niters_vector = force_gimple_operand (niters_vector, &stmts, true, var);
1220 gsi_insert_seq_on_edge_immediate (pe, stmts);
1222 *niters_vector_ptr = niters_vector;
1224 return;
1227 /* Given NITERS_VECTOR which is the number of iterations for vectorized
1228 loop specified by LOOP_VINFO after vectorization, compute the number
1229 of iterations before vectorization (niters_vector * vf) and store it
1230 to NITERS_VECTOR_MULT_VF_PTR. */
1232 static void
1233 vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo,
1234 tree niters_vector,
1235 tree *niters_vector_mult_vf_ptr)
1237 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1238 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1239 tree type = TREE_TYPE (niters_vector);
1240 tree log_vf = build_int_cst (type, exact_log2 (vf));
1241 basic_block exit_bb = single_exit (loop)->dest;
1243 gcc_assert (niters_vector_mult_vf_ptr != NULL);
1244 tree niters_vector_mult_vf = fold_build2 (LSHIFT_EXPR, type,
1245 niters_vector, log_vf);
1246 if (!is_gimple_val (niters_vector_mult_vf))
1248 tree var = create_tmp_var (type, "niters_vector_mult_vf");
1249 gimple_seq stmts = NULL;
1250 niters_vector_mult_vf = force_gimple_operand (niters_vector_mult_vf,
1251 &stmts, true, var);
1252 gimple_stmt_iterator gsi = gsi_start_bb (exit_bb);
1253 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1255 *niters_vector_mult_vf_ptr = niters_vector_mult_vf;
1258 /* Function slpeel_tree_duplicate_loop_to_edge_cfg duplciates FIRST/SECOND
1259 from SECOND/FIRST and puts it at the original loop's preheader/exit
1260 edge, the two loops are arranged as below:
1262 preheader_a:
1263 first_loop:
1264 header_a:
1265 i_1 = PHI<i_0, i_2>;
1267 i_2 = i_1 + 1;
1268 if (cond_a)
1269 goto latch_a;
1270 else
1271 goto between_bb;
1272 latch_a:
1273 goto header_a;
1275 between_bb:
1276 ;; i_x = PHI<i_2>; ;; LCSSA phi node to be created for FIRST,
1278 second_loop:
1279 header_b:
1280 i_3 = PHI<i_0, i_4>; ;; Use of i_0 to be replaced with i_x,
1281 or with i_2 if no LCSSA phi is created
1282 under condition of CREATE_LCSSA_FOR_IV_PHIS.
1284 i_4 = i_3 + 1;
1285 if (cond_b)
1286 goto latch_b;
1287 else
1288 goto exit_bb;
1289 latch_b:
1290 goto header_b;
1292 exit_bb:
1294 This function creates loop closed SSA for the first loop; update the
1295 second loop's PHI nodes by replacing argument on incoming edge with the
1296 result of newly created lcssa PHI nodes. IF CREATE_LCSSA_FOR_IV_PHIS
1297 is false, Loop closed ssa phis will only be created for non-iv phis for
1298 the first loop.
1300 This function assumes exit bb of the first loop is preheader bb of the
1301 second loop, i.e, between_bb in the example code. With PHIs updated,
1302 the second loop will execute rest iterations of the first. */
1304 static void
1305 slpeel_update_phi_nodes_for_loops (loop_vec_info loop_vinfo,
1306 struct loop *first, struct loop *second,
1307 bool create_lcssa_for_iv_phis)
1309 gphi_iterator gsi_update, gsi_orig;
1310 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1312 edge first_latch_e = EDGE_SUCC (first->latch, 0);
1313 edge second_preheader_e = loop_preheader_edge (second);
1314 basic_block between_bb = single_exit (first)->dest;
1316 gcc_assert (between_bb == second_preheader_e->src);
1317 gcc_assert (single_pred_p (between_bb) && single_succ_p (between_bb));
1318 /* Either the first loop or the second is the loop to be vectorized. */
1319 gcc_assert (loop == first || loop == second);
1321 for (gsi_orig = gsi_start_phis (first->header),
1322 gsi_update = gsi_start_phis (second->header);
1323 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
1324 gsi_next (&gsi_orig), gsi_next (&gsi_update))
1326 gphi *orig_phi = gsi_orig.phi ();
1327 gphi *update_phi = gsi_update.phi ();
1329 tree arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, first_latch_e);
1330 /* Generate lcssa PHI node for the first loop. */
1331 gphi *vect_phi = (loop == first) ? orig_phi : update_phi;
1332 if (create_lcssa_for_iv_phis || !iv_phi_p (vect_phi))
1334 tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
1335 gphi *lcssa_phi = create_phi_node (new_res, between_bb);
1336 add_phi_arg (lcssa_phi, arg, single_exit (first), UNKNOWN_LOCATION);
1337 arg = new_res;
1340 /* Update PHI node in the second loop by replacing arg on the loop's
1341 incoming edge. */
1342 adjust_phi_and_debug_stmts (update_phi, second_preheader_e, arg);
1346 /* Function slpeel_add_loop_guard adds guard skipping from the beginning
1347 of SKIP_LOOP to the beginning of UPDATE_LOOP. GUARD_EDGE and MERGE_EDGE
1348 are two pred edges of the merge point before UPDATE_LOOP. The two loops
1349 appear like below:
1351 guard_bb:
1352 if (cond)
1353 goto merge_bb;
1354 else
1355 goto skip_loop;
1357 skip_loop:
1358 header_a:
1359 i_1 = PHI<i_0, i_2>;
1361 i_2 = i_1 + 1;
1362 if (cond_a)
1363 goto latch_a;
1364 else
1365 goto exit_a;
1366 latch_a:
1367 goto header_a;
1369 exit_a:
1370 i_5 = PHI<i_2>;
1372 merge_bb:
1373 ;; PHI (i_x = PHI<i_0, i_5>) to be created at merge point.
1375 update_loop:
1376 header_b:
1377 i_3 = PHI<i_5, i_4>; ;; Use of i_5 to be replaced with i_x.
1379 i_4 = i_3 + 1;
1380 if (cond_b)
1381 goto latch_b;
1382 else
1383 goto exit_bb;
1384 latch_b:
1385 goto header_b;
1387 exit_bb:
1389 This function creates PHI nodes at merge_bb and replaces the use of i_5
1390 in the update_loop's PHI node with the result of new PHI result. */
1392 static void
1393 slpeel_update_phi_nodes_for_guard1 (struct loop *skip_loop,
1394 struct loop *update_loop,
1395 edge guard_edge, edge merge_edge)
1397 source_location merge_loc, guard_loc;
1398 edge orig_e = loop_preheader_edge (skip_loop);
1399 edge update_e = loop_preheader_edge (update_loop);
1400 gphi_iterator gsi_orig, gsi_update;
1402 for ((gsi_orig = gsi_start_phis (skip_loop->header),
1403 gsi_update = gsi_start_phis (update_loop->header));
1404 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
1405 gsi_next (&gsi_orig), gsi_next (&gsi_update))
1407 gphi *orig_phi = gsi_orig.phi ();
1408 gphi *update_phi = gsi_update.phi ();
1410 /* Generate new phi node at merge bb of the guard. */
1411 tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
1412 gphi *new_phi = create_phi_node (new_res, guard_edge->dest);
1414 /* Merge bb has two incoming edges: GUARD_EDGE and MERGE_EDGE. Set the
1415 args in NEW_PHI for these edges. */
1416 tree merge_arg = PHI_ARG_DEF_FROM_EDGE (update_phi, update_e);
1417 tree guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
1418 merge_loc = gimple_phi_arg_location_from_edge (update_phi, update_e);
1419 guard_loc = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
1420 add_phi_arg (new_phi, merge_arg, merge_edge, merge_loc);
1421 add_phi_arg (new_phi, guard_arg, guard_edge, guard_loc);
1423 /* Update phi in UPDATE_PHI. */
1424 adjust_phi_and_debug_stmts (update_phi, update_e, new_res);
1428 /* LCSSA_PHI is a lcssa phi of EPILOG loop which is copied from LOOP,
1429 this function searches for the corresponding lcssa phi node in exit
1430 bb of LOOP. If it is found, return the phi result; otherwise return
1431 NULL. */
1433 static tree
1434 find_guard_arg (struct loop *loop, struct loop *epilog ATTRIBUTE_UNUSED,
1435 gphi *lcssa_phi)
1437 gphi_iterator gsi;
1438 edge e = single_exit (loop);
1440 gcc_assert (single_pred_p (e->dest));
1441 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
1443 gphi *phi = gsi.phi ();
1444 if (operand_equal_p (PHI_ARG_DEF (phi, 0),
1445 PHI_ARG_DEF (lcssa_phi, 0), 0))
1446 return PHI_RESULT (phi);
1448 return NULL_TREE;
1451 /* LOOP and EPILOG are two consecutive loops in CFG and EPILOG is copied
1452 from LOOP. Function slpeel_add_loop_guard adds guard skipping from a
1453 point between the two loops to the end of EPILOG. Edges GUARD_EDGE
1454 and MERGE_EDGE are the two pred edges of merge_bb at the end of EPILOG.
1455 The CFG looks like:
1457 loop:
1458 header_a:
1459 i_1 = PHI<i_0, i_2>;
1461 i_2 = i_1 + 1;
1462 if (cond_a)
1463 goto latch_a;
1464 else
1465 goto exit_a;
1466 latch_a:
1467 goto header_a;
1469 exit_a:
1471 guard_bb:
1472 if (cond)
1473 goto merge_bb;
1474 else
1475 goto epilog_loop;
1477 ;; fall_through_bb
1479 epilog_loop:
1480 header_b:
1481 i_3 = PHI<i_2, i_4>;
1483 i_4 = i_3 + 1;
1484 if (cond_b)
1485 goto latch_b;
1486 else
1487 goto merge_bb;
1488 latch_b:
1489 goto header_b;
1491 merge_bb:
1492 ; PHI node (i_y = PHI<i_2, i_4>) to be created at merge point.
1494 exit_bb:
1495 i_x = PHI<i_4>; ;Use of i_4 to be replaced with i_y in merge_bb.
1497 For each name used out side EPILOG (i.e - for each name that has a lcssa
1498 phi in exit_bb) we create a new PHI in merge_bb. The new PHI has two
1499 args corresponding to GUARD_EDGE and MERGE_EDGE. Arg for MERGE_EDGE is
1500 the arg of the original PHI in exit_bb, arg for GUARD_EDGE is defined
1501 by LOOP and is found in the exit bb of LOOP. Arg of the original PHI
1502 in exit_bb will also be updated. */
1504 static void
1505 slpeel_update_phi_nodes_for_guard2 (struct loop *loop, struct loop *epilog,
1506 edge guard_edge, edge merge_edge)
1508 gphi_iterator gsi;
1509 basic_block merge_bb = guard_edge->dest;
1511 gcc_assert (single_succ_p (merge_bb));
1512 edge e = single_succ_edge (merge_bb);
1513 basic_block exit_bb = e->dest;
1514 gcc_assert (single_pred_p (exit_bb));
1515 gcc_assert (single_pred (exit_bb) == single_exit (epilog)->dest);
1517 for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1519 gphi *update_phi = gsi.phi ();
1520 tree old_arg = PHI_ARG_DEF (update_phi, 0);
1521 /* This loop-closed-phi actually doesn't represent a use out of the
1522 loop - the phi arg is a constant. */
1523 if (TREE_CODE (old_arg) != SSA_NAME)
1524 continue;
1526 tree merge_arg = get_current_def (old_arg);
1527 if (!merge_arg)
1528 merge_arg = old_arg;
1530 tree guard_arg = find_guard_arg (loop, epilog, update_phi);
1531 /* If the var is live after loop but not a reduction, we simply
1532 use the old arg. */
1533 if (!guard_arg)
1534 guard_arg = old_arg;
1536 /* Create new phi node in MERGE_BB: */
1537 tree new_res = copy_ssa_name (PHI_RESULT (update_phi));
1538 gphi *merge_phi = create_phi_node (new_res, merge_bb);
1540 /* MERGE_BB has two incoming edges: GUARD_EDGE and MERGE_EDGE, Set
1541 the two PHI args in merge_phi for these edges. */
1542 add_phi_arg (merge_phi, merge_arg, merge_edge, UNKNOWN_LOCATION);
1543 add_phi_arg (merge_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
1545 /* Update the original phi in exit_bb. */
1546 adjust_phi_and_debug_stmts (update_phi, e, new_res);
1550 /* EPILOG loop is duplicated from the original loop for vectorizing,
1551 the arg of its loop closed ssa PHI needs to be updated. */
1553 static void
1554 slpeel_update_phi_nodes_for_lcssa (struct loop *epilog)
1556 gphi_iterator gsi;
1557 basic_block exit_bb = single_exit (epilog)->dest;
1559 gcc_assert (single_pred_p (exit_bb));
1560 edge e = EDGE_PRED (exit_bb, 0);
1561 for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1562 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
1565 /* Function vect_do_peeling.
1567 Input:
1568 - LOOP_VINFO: Represent a loop to be vectorized, which looks like:
1570 preheader:
1571 LOOP:
1572 header_bb:
1573 loop_body
1574 if (exit_loop_cond) goto exit_bb
1575 else goto header_bb
1576 exit_bb:
1578 - NITERS: The number of iterations of the loop.
1579 - NITERSM1: The number of iterations of the loop's latch.
1580 - NITERS_NO_OVERFLOW: No overflow in computing NITERS.
1581 - TH, CHECK_PROFITABILITY: Threshold of niters to vectorize loop if
1582 CHECK_PROFITABILITY is true.
1583 Output:
1584 - NITERS_VECTOR: The number of iterations of loop after vectorization.
1586 This function peels prolog and epilog from the loop, adds guards skipping
1587 PROLOG and EPILOG for various conditions. As a result, the changed CFG
1588 would look like:
1590 guard_bb_1:
1591 if (prefer_scalar_loop) goto merge_bb_1
1592 else goto guard_bb_2
1594 guard_bb_2:
1595 if (skip_prolog) goto merge_bb_2
1596 else goto prolog_preheader
1598 prolog_preheader:
1599 PROLOG:
1600 prolog_header_bb:
1601 prolog_body
1602 if (exit_prolog_cond) goto prolog_exit_bb
1603 else goto prolog_header_bb
1604 prolog_exit_bb:
1606 merge_bb_2:
1608 vector_preheader:
1609 VECTOR LOOP:
1610 vector_header_bb:
1611 vector_body
1612 if (exit_vector_cond) goto vector_exit_bb
1613 else goto vector_header_bb
1614 vector_exit_bb:
1616 guard_bb_3:
1617 if (skip_epilog) goto merge_bb_3
1618 else goto epilog_preheader
1620 merge_bb_1:
1622 epilog_preheader:
1623 EPILOG:
1624 epilog_header_bb:
1625 epilog_body
1626 if (exit_epilog_cond) goto merge_bb_3
1627 else goto epilog_header_bb
1629 merge_bb_3:
1631 Note this function peels prolog and epilog only if it's necessary,
1632 as well as guards.
1633 Returns created epilogue or NULL.
1635 TODO: Guard for prefer_scalar_loop should be emitted along with
1636 versioning conditions if loop versioning is needed. */
1639 struct loop *
1640 vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
1641 tree *niters_vector, int th, bool check_profitability,
1642 bool niters_no_overflow)
1644 edge e, guard_e;
1645 tree type = TREE_TYPE (niters), guard_cond;
1646 basic_block guard_bb, guard_to;
1647 int prob_prolog, prob_vector, prob_epilog;
1648 int bound_prolog = 0, bound_scalar = 0, bound = 0;
1649 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1650 int prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1651 bool epilog_peeling = (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)
1652 || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
1654 if (!prolog_peeling && !epilog_peeling)
1655 return NULL;
1657 prob_vector = 9 * REG_BR_PROB_BASE / 10;
1658 if ((vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo)) == 2)
1659 vf = 3;
1660 prob_prolog = prob_epilog = (vf - 1) * REG_BR_PROB_BASE / vf;
1661 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1663 struct loop *prolog, *epilog = NULL, *loop = LOOP_VINFO_LOOP (loop_vinfo);
1664 struct loop *first_loop = loop;
1665 create_lcssa_for_virtual_phi (loop);
1666 update_ssa (TODO_update_ssa_only_virtuals);
1668 if (MAY_HAVE_DEBUG_STMTS)
1670 gcc_assert (!adjust_vec.exists ());
1671 adjust_vec.create (32);
1673 initialize_original_copy_tables ();
1675 /* Prolog loop may be skipped. */
1676 bool skip_prolog = (prolog_peeling != 0);
1677 /* Skip to epilog if scalar loop may be preferred. It's only used when
1678 we peel for epilog loop. */
1679 bool skip_vector = (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo));
1680 /* Epilog loop must be executed if the number of iterations for epilog
1681 loop is known at compile time, otherwise we need to add a check at
1682 the end of vector loop and skip to the end of epilog loop. */
1683 bool skip_epilog = (prolog_peeling < 0
1684 || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo));
1685 /* PEELING_FOR_GAPS is special because epilog loop must be executed. */
1686 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1687 skip_epilog = false;
1689 /* Record the anchor bb at which guard should be placed if scalar loop
1690 may be preferred. */
1691 basic_block anchor = loop_preheader_edge (loop)->src;
1692 if (skip_vector)
1693 split_edge (loop_preheader_edge (loop));
1695 tree niters_prolog = build_int_cst (type, 0);
1696 source_location loop_loc = find_loop_location (loop);
1697 struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
1698 if (prolog_peeling)
1700 e = loop_preheader_edge (loop);
1701 if (!slpeel_can_duplicate_loop_p (loop, e))
1703 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1704 "loop can't be duplicated to preheader edge.\n");
1705 gcc_unreachable ();
1707 /* Peel prolog and put it on preheader edge of loop. */
1708 prolog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
1709 if (!prolog)
1711 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1712 "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1713 gcc_unreachable ();
1715 slpeel_update_phi_nodes_for_loops (loop_vinfo, prolog, loop, true);
1716 first_loop = prolog;
1717 reset_original_copy_tables ();
1719 /* Generate and update the number of iterations for prolog loop. */
1720 niters_prolog = vect_gen_prolog_loop_niters (loop_vinfo, anchor,
1721 &bound_prolog);
1722 slpeel_make_loop_iterate_ntimes (prolog, niters_prolog);
1724 /* Skip the prolog loop. */
1725 if (skip_prolog)
1727 guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
1728 niters_prolog, build_int_cst (type, 0));
1729 guard_bb = loop_preheader_edge (prolog)->src;
1730 guard_to = split_edge (loop_preheader_edge (loop));
1731 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
1732 guard_to, guard_bb,
1733 inverse_probability (prob_prolog));
1734 e = EDGE_PRED (guard_to, 0);
1735 e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
1736 slpeel_update_phi_nodes_for_guard1 (prolog, loop, guard_e, e);
1737 scale_loop_profile (prolog, prob_prolog, bound_prolog);
1739 /* Update init address of DRs. */
1740 vect_update_inits_of_drs (loop_vinfo, niters_prolog);
1741 /* Update niters for vector loop. */
1742 LOOP_VINFO_NITERS (loop_vinfo)
1743 = fold_build2 (MINUS_EXPR, type, niters, niters_prolog);
1744 LOOP_VINFO_NITERSM1 (loop_vinfo)
1745 = fold_build2 (MINUS_EXPR, type,
1746 LOOP_VINFO_NITERSM1 (loop_vinfo), niters_prolog);
1747 niters = vect_build_loop_niters (loop_vinfo);
1749 /* Prolog iterates at most bound_prolog times, latch iterates at
1750 most bound_prolog - 1 times. */
1751 record_niter_bound (prolog, bound_prolog - 1, false, true);
1752 delete_update_ssa ();
1753 adjust_vec_debug_stmts ();
1754 scev_reset ();
1757 if (epilog_peeling)
1759 e = single_exit (loop);
1760 if (!slpeel_can_duplicate_loop_p (loop, e))
1762 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1763 "loop can't be duplicated to exit edge.\n");
1764 gcc_unreachable ();
1766 /* Peel epilog and put it on exit edge of loop. */
1767 epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
1768 if (!epilog)
1770 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1771 "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1772 gcc_unreachable ();
1774 slpeel_update_phi_nodes_for_loops (loop_vinfo, loop, epilog, false);
1776 /* Scalar version loop may be preferred. In this case, add guard
1777 and skip to epilog. Note this only happens when the number of
1778 iterations of loop is unknown at compile time, otherwise this
1779 won't be vectorized. */
1780 if (skip_vector)
1782 /* Additional epilogue iteration is peeled if gap exists. */
1783 bool peel_for_gaps = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo);
1784 tree t = vect_gen_scalar_loop_niters (niters_prolog, prolog_peeling,
1785 bound_prolog,
1786 peel_for_gaps ? vf : vf - 1,
1787 th, &bound_scalar,
1788 check_profitability);
1789 /* Build guard against NITERSM1 since NITERS may overflow. */
1790 guard_cond = fold_build2 (LT_EXPR, boolean_type_node, nitersm1, t);
1791 guard_bb = anchor;
1792 guard_to = split_edge (loop_preheader_edge (epilog));
1793 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
1794 guard_to, guard_bb,
1795 inverse_probability (prob_vector));
1796 e = EDGE_PRED (guard_to, 0);
1797 e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
1798 slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e);
1799 scale_loop_profile (epilog, prob_vector, bound_scalar);
1802 tree niters_vector_mult_vf;
1803 /* If loop is peeled for non-zero constant times, now niters refers to
1804 orig_niters - prolog_peeling, it won't overflow even the orig_niters
1805 overflows. */
1806 niters_no_overflow |= (prolog_peeling > 0);
1807 vect_gen_vector_loop_niters (loop_vinfo, niters,
1808 niters_vector, niters_no_overflow);
1809 vect_gen_vector_loop_niters_mult_vf (loop_vinfo, *niters_vector,
1810 &niters_vector_mult_vf);
1811 /* Update IVs of original loop as if they were advanced by
1812 niters_vector_mult_vf steps. */
1813 gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
1814 edge update_e = skip_vector ? e : loop_preheader_edge (epilog);
1815 vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf,
1816 update_e);
1818 if (skip_epilog)
1820 guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
1821 niters, niters_vector_mult_vf);
1822 guard_bb = single_exit (loop)->dest;
1823 guard_to = split_edge (single_exit (epilog));
1824 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond, guard_to,
1825 skip_vector ? anchor : guard_bb,
1826 inverse_probability (prob_epilog));
1827 slpeel_update_phi_nodes_for_guard2 (loop, epilog, guard_e,
1828 single_exit (epilog));
1829 scale_loop_profile (epilog, prob_epilog, bound);
1831 else
1832 slpeel_update_phi_nodes_for_lcssa (epilog);
1834 bound = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) ? vf - 1 : vf - 2;
1835 /* We share epilog loop with scalar version loop. */
1836 bound = MAX (bound, bound_scalar - 1);
1837 record_niter_bound (epilog, bound, false, true);
1839 delete_update_ssa ();
1840 adjust_vec_debug_stmts ();
1841 scev_reset ();
1843 adjust_vec.release ();
1844 free_original_copy_tables ();
1846 return epilog;
1849 /* Function vect_create_cond_for_niters_checks.
1851 Create a conditional expression that represents the run-time checks for
1852 loop's niter. The loop is guaranteed to to terminate if the run-time
1853 checks hold.
1855 Input:
1856 COND_EXPR - input conditional expression. New conditions will be chained
1857 with logical AND operation. If it is NULL, then the function
1858 is used to return the number of alias checks.
1859 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
1860 to be checked.
1862 Output:
1863 COND_EXPR - conditional expression.
1865 The returned COND_EXPR is the conditional expression to be used in the
1866 if statement that controls which version of the loop gets executed at
1867 runtime. */
1869 static void
1870 vect_create_cond_for_niters_checks (loop_vec_info loop_vinfo, tree *cond_expr)
1872 tree part_cond_expr = LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo);
1874 if (*cond_expr)
1875 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1876 *cond_expr, part_cond_expr);
1877 else
1878 *cond_expr = part_cond_expr;
1881 /* Function vect_create_cond_for_align_checks.
1883 Create a conditional expression that represents the alignment checks for
1884 all of data references (array element references) whose alignment must be
1885 checked at runtime.
1887 Input:
1888 COND_EXPR - input conditional expression. New conditions will be chained
1889 with logical AND operation.
1890 LOOP_VINFO - two fields of the loop information are used.
1891 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
1892 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
1894 Output:
1895 COND_EXPR_STMT_LIST - statements needed to construct the conditional
1896 expression.
1897 The returned value is the conditional expression to be used in the if
1898 statement that controls which version of the loop gets executed at runtime.
1900 The algorithm makes two assumptions:
1901 1) The number of bytes "n" in a vector is a power of 2.
1902 2) An address "a" is aligned if a%n is zero and that this
1903 test can be done as a&(n-1) == 0. For example, for 16
1904 byte vectors the test is a&0xf == 0. */
1906 static void
1907 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
1908 tree *cond_expr,
1909 gimple_seq *cond_expr_stmt_list)
1911 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1912 vec<gimple *> may_misalign_stmts
1913 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1914 gimple *ref_stmt;
1915 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
1916 tree mask_cst;
1917 unsigned int i;
1918 tree int_ptrsize_type;
1919 char tmp_name[20];
1920 tree or_tmp_name = NULL_TREE;
1921 tree and_tmp_name;
1922 gimple *and_stmt;
1923 tree ptrsize_zero;
1924 tree part_cond_expr;
1926 /* Check that mask is one less than a power of 2, i.e., mask is
1927 all zeros followed by all ones. */
1928 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
1930 int_ptrsize_type = signed_type_for (ptr_type_node);
1932 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
1933 of the first vector of the i'th data reference. */
1935 FOR_EACH_VEC_ELT (may_misalign_stmts, i, ref_stmt)
1937 gimple_seq new_stmt_list = NULL;
1938 tree addr_base;
1939 tree addr_tmp_name;
1940 tree new_or_tmp_name;
1941 gimple *addr_stmt, *or_stmt;
1942 stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
1943 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1944 bool negative = tree_int_cst_compare
1945 (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
1946 tree offset = negative
1947 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
1949 /* create: addr_tmp = (int)(address_of_first_vector) */
1950 addr_base =
1951 vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
1952 offset, loop);
1953 if (new_stmt_list != NULL)
1954 gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
1956 sprintf (tmp_name, "addr2int%d", i);
1957 addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
1958 addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base);
1959 gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
1961 /* The addresses are OR together. */
1963 if (or_tmp_name != NULL_TREE)
1965 /* create: or_tmp = or_tmp | addr_tmp */
1966 sprintf (tmp_name, "orptrs%d", i);
1967 new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
1968 or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
1969 or_tmp_name, addr_tmp_name);
1970 gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
1971 or_tmp_name = new_or_tmp_name;
1973 else
1974 or_tmp_name = addr_tmp_name;
1976 } /* end for i */
1978 mask_cst = build_int_cst (int_ptrsize_type, mask);
1980 /* create: and_tmp = or_tmp & mask */
1981 and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
1983 and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR,
1984 or_tmp_name, mask_cst);
1985 gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
1987 /* Make and_tmp the left operand of the conditional test against zero.
1988 if and_tmp has a nonzero bit then some address is unaligned. */
1989 ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
1990 part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
1991 and_tmp_name, ptrsize_zero);
1992 if (*cond_expr)
1993 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1994 *cond_expr, part_cond_expr);
1995 else
1996 *cond_expr = part_cond_expr;
1999 /* Given two data references and segment lengths described by DR_A and DR_B,
2000 create expression checking if the two addresses ranges intersect with
2001 each other based on index of the two addresses. This can only be done
2002 if DR_A and DR_B referring to the same (array) object and the index is
2003 the only difference. For example:
2005 DR_A DR_B
2006 data-ref arr[i] arr[j]
2007 base_object arr arr
2008 index {i_0, +, 1}_loop {j_0, +, 1}_loop
2010 The addresses and their index are like:
2012 |<- ADDR_A ->| |<- ADDR_B ->|
2013 ------------------------------------------------------->
2014 | | | | | | | | | |
2015 ------------------------------------------------------->
2016 i_0 ... i_0+4 j_0 ... j_0+4
2018 We can create expression based on index rather than address:
2020 (i_0 + 4 < j_0 || j_0 + 4 < i_0)
2022 Note evolution step of index needs to be considered in comparison. */
2024 static bool
2025 create_intersect_range_checks_index (loop_vec_info loop_vinfo, tree *cond_expr,
2026 const dr_with_seg_len& dr_a,
2027 const dr_with_seg_len& dr_b)
2029 if (integer_zerop (DR_STEP (dr_a.dr))
2030 || integer_zerop (DR_STEP (dr_b.dr))
2031 || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
2032 return false;
2034 if (!tree_fits_uhwi_p (dr_a.seg_len) || !tree_fits_uhwi_p (dr_b.seg_len))
2035 return false;
2037 if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
2038 return false;
2040 if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
2041 return false;
2043 if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
2044 return false;
2046 gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
2048 bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
2049 unsigned HOST_WIDE_INT abs_step
2050 = absu_hwi (tree_to_shwi (DR_STEP (dr_a.dr)));
2052 unsigned HOST_WIDE_INT seg_len1 = tree_to_uhwi (dr_a.seg_len);
2053 unsigned HOST_WIDE_INT seg_len2 = tree_to_uhwi (dr_b.seg_len);
2054 /* Infer the number of iterations with which the memory segment is accessed
2055 by DR. In other words, alias is checked if memory segment accessed by
2056 DR_A in some iterations intersect with memory segment accessed by DR_B
2057 in the same amount iterations.
2058 Note segnment length is a linear function of number of iterations with
2059 DR_STEP as the coefficient. */
2060 unsigned HOST_WIDE_INT niter_len1 = (seg_len1 + abs_step - 1) / abs_step;
2061 unsigned HOST_WIDE_INT niter_len2 = (seg_len2 + abs_step - 1) / abs_step;
2063 unsigned int i;
2064 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2065 for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
2067 tree access1 = DR_ACCESS_FN (dr_a.dr, i);
2068 tree access2 = DR_ACCESS_FN (dr_b.dr, i);
2069 /* Two indices must be the same if they are not scev, or not scev wrto
2070 current loop being vecorized. */
2071 if (TREE_CODE (access1) != POLYNOMIAL_CHREC
2072 || TREE_CODE (access2) != POLYNOMIAL_CHREC
2073 || CHREC_VARIABLE (access1) != (unsigned)loop->num
2074 || CHREC_VARIABLE (access2) != (unsigned)loop->num)
2076 if (operand_equal_p (access1, access2, 0))
2077 continue;
2079 return false;
2081 /* The two indices must have the same step. */
2082 if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
2083 return false;
2085 tree idx_step = CHREC_RIGHT (access1);
2086 /* Index must have const step, otherwise DR_STEP won't be constant. */
2087 gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
2088 /* Index must evaluate in the same direction as DR. */
2089 gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
2091 tree min1 = CHREC_LEFT (access1);
2092 tree min2 = CHREC_LEFT (access2);
2093 if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
2094 return false;
2096 /* Ideally, alias can be checked against loop's control IV, but we
2097 need to prove linear mapping between control IV and reference
2098 index. Although that should be true, we check against (array)
2099 index of data reference. Like segment length, index length is
2100 linear function of the number of iterations with index_step as
2101 the coefficient, i.e, niter_len * idx_step. */
2102 tree idx_len1 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
2103 build_int_cst (TREE_TYPE (min1),
2104 niter_len1));
2105 tree idx_len2 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
2106 build_int_cst (TREE_TYPE (min2),
2107 niter_len2));
2108 tree max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1), min1, idx_len1);
2109 tree max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2), min2, idx_len2);
2110 /* Adjust ranges for negative step. */
2111 if (neg_step)
2113 min1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1), max1, idx_step);
2114 max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1),
2115 CHREC_LEFT (access1), idx_step);
2116 min2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2), max2, idx_step);
2117 max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2),
2118 CHREC_LEFT (access2), idx_step);
2120 tree part_cond_expr
2121 = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2122 fold_build2 (LE_EXPR, boolean_type_node, max1, min2),
2123 fold_build2 (LE_EXPR, boolean_type_node, max2, min1));
2124 if (*cond_expr)
2125 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2126 *cond_expr, part_cond_expr);
2127 else
2128 *cond_expr = part_cond_expr;
2130 return true;
2133 /* Given two data references and segment lengths described by DR_A and DR_B,
2134 create expression checking if the two addresses ranges intersect with
2135 each other:
2137 ((DR_A_addr_0 + DR_A_segment_length_0) <= DR_B_addr_0)
2138 || (DR_B_addr_0 + DER_B_segment_length_0) <= DR_A_addr_0)) */
2140 static void
2141 create_intersect_range_checks (loop_vec_info loop_vinfo, tree *cond_expr,
2142 const dr_with_seg_len& dr_a,
2143 const dr_with_seg_len& dr_b)
2145 *cond_expr = NULL_TREE;
2146 if (create_intersect_range_checks_index (loop_vinfo, cond_expr, dr_a, dr_b))
2147 return;
2149 tree segment_length_a = dr_a.seg_len;
2150 tree segment_length_b = dr_b.seg_len;
2151 tree addr_base_a = DR_BASE_ADDRESS (dr_a.dr);
2152 tree addr_base_b = DR_BASE_ADDRESS (dr_b.dr);
2153 tree offset_a = DR_OFFSET (dr_a.dr), offset_b = DR_OFFSET (dr_b.dr);
2155 offset_a = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_a),
2156 offset_a, DR_INIT (dr_a.dr));
2157 offset_b = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_b),
2158 offset_b, DR_INIT (dr_b.dr));
2159 addr_base_a = fold_build_pointer_plus (addr_base_a, offset_a);
2160 addr_base_b = fold_build_pointer_plus (addr_base_b, offset_b);
2162 tree seg_a_min = addr_base_a;
2163 tree seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
2164 /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
2165 bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
2166 [a, a+12) */
2167 if (tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0)
2169 tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a.dr)));
2170 seg_a_min = fold_build_pointer_plus (seg_a_max, unit_size);
2171 seg_a_max = fold_build_pointer_plus (addr_base_a, unit_size);
2174 tree seg_b_min = addr_base_b;
2175 tree seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
2176 if (tree_int_cst_compare (DR_STEP (dr_b.dr), size_zero_node) < 0)
2178 tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b.dr)));
2179 seg_b_min = fold_build_pointer_plus (seg_b_max, unit_size);
2180 seg_b_max = fold_build_pointer_plus (addr_base_b, unit_size);
2182 *cond_expr
2183 = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2184 fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
2185 fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
2188 /* Function vect_create_cond_for_alias_checks.
2190 Create a conditional expression that represents the run-time checks for
2191 overlapping of address ranges represented by a list of data references
2192 relations passed as input.
2194 Input:
2195 COND_EXPR - input conditional expression. New conditions will be chained
2196 with logical AND operation. If it is NULL, then the function
2197 is used to return the number of alias checks.
2198 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2199 to be checked.
2201 Output:
2202 COND_EXPR - conditional expression.
2204 The returned COND_EXPR is the conditional expression to be used in the if
2205 statement that controls which version of the loop gets executed at runtime.
2208 void
2209 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
2211 vec<dr_with_seg_len_pair_t> comp_alias_ddrs =
2212 LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
2213 tree part_cond_expr;
2215 if (comp_alias_ddrs.is_empty ())
2216 return;
2218 for (size_t i = 0, s = comp_alias_ddrs.length (); i < s; ++i)
2220 const dr_with_seg_len& dr_a = comp_alias_ddrs[i].first;
2221 const dr_with_seg_len& dr_b = comp_alias_ddrs[i].second;
2223 if (dump_enabled_p ())
2225 dump_printf_loc (MSG_NOTE, vect_location,
2226 "create runtime check for data references ");
2227 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a.dr));
2228 dump_printf (MSG_NOTE, " and ");
2229 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b.dr));
2230 dump_printf (MSG_NOTE, "\n");
2233 /* Create condition expression for each pair data references. */
2234 create_intersect_range_checks (loop_vinfo, &part_cond_expr, dr_a, dr_b);
2235 if (*cond_expr)
2236 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2237 *cond_expr, part_cond_expr);
2238 else
2239 *cond_expr = part_cond_expr;
2242 if (dump_enabled_p ())
2243 dump_printf_loc (MSG_NOTE, vect_location,
2244 "created %u versioning for alias checks.\n",
2245 comp_alias_ddrs.length ());
2249 /* Function vect_loop_versioning.
2251 If the loop has data references that may or may not be aligned or/and
2252 has data reference relations whose independence was not proven then
2253 two versions of the loop need to be generated, one which is vectorized
2254 and one which isn't. A test is then generated to control which of the
2255 loops is executed. The test checks for the alignment of all of the
2256 data references that may or may not be aligned. An additional
2257 sequence of runtime tests is generated for each pairs of DDRs whose
2258 independence was not proven. The vectorized version of loop is
2259 executed only if both alias and alignment tests are passed.
2261 The test generated to check which version of loop is executed
2262 is modified to also check for profitability as indicated by the
2263 cost model threshold TH.
2265 The versioning precondition(s) are placed in *COND_EXPR and
2266 *COND_EXPR_STMT_LIST. */
2268 void
2269 vect_loop_versioning (loop_vec_info loop_vinfo,
2270 unsigned int th, bool check_profitability)
2272 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *nloop;
2273 struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
2274 basic_block condition_bb;
2275 gphi_iterator gsi;
2276 gimple_stmt_iterator cond_exp_gsi;
2277 basic_block merge_bb;
2278 basic_block new_exit_bb;
2279 edge new_exit_e, e;
2280 gphi *orig_phi, *new_phi;
2281 tree cond_expr = NULL_TREE;
2282 gimple_seq cond_expr_stmt_list = NULL;
2283 tree arg;
2284 unsigned prob = 4 * REG_BR_PROB_BASE / 5;
2285 gimple_seq gimplify_stmt_list = NULL;
2286 tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2287 bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
2288 bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
2289 bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo);
2291 if (check_profitability)
2292 cond_expr = fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
2293 build_int_cst (TREE_TYPE (scalar_loop_iters),
2294 th));
2296 if (version_niter)
2297 vect_create_cond_for_niters_checks (loop_vinfo, &cond_expr);
2299 if (cond_expr)
2300 cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
2301 is_gimple_condexpr, NULL_TREE);
2303 if (version_align)
2304 vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
2305 &cond_expr_stmt_list);
2307 if (version_alias)
2308 vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
2310 cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,
2311 is_gimple_condexpr, NULL_TREE);
2312 gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
2314 initialize_original_copy_tables ();
2315 if (scalar_loop)
2317 edge scalar_e;
2318 basic_block preheader, scalar_preheader;
2320 /* We don't want to scale SCALAR_LOOP's frequencies, we need to
2321 scale LOOP's frequencies instead. */
2322 nloop = loop_version (scalar_loop, cond_expr, &condition_bb,
2323 prob, REG_BR_PROB_BASE - prob,
2324 REG_BR_PROB_BASE, REG_BR_PROB_BASE - prob, true);
2325 scale_loop_frequencies (loop, prob, REG_BR_PROB_BASE);
2326 /* CONDITION_BB was created above SCALAR_LOOP's preheader,
2327 while we need to move it above LOOP's preheader. */
2328 e = loop_preheader_edge (loop);
2329 scalar_e = loop_preheader_edge (scalar_loop);
2330 gcc_assert (empty_block_p (e->src)
2331 && single_pred_p (e->src));
2332 gcc_assert (empty_block_p (scalar_e->src)
2333 && single_pred_p (scalar_e->src));
2334 gcc_assert (single_pred_p (condition_bb));
2335 preheader = e->src;
2336 scalar_preheader = scalar_e->src;
2337 scalar_e = find_edge (condition_bb, scalar_preheader);
2338 e = single_pred_edge (preheader);
2339 redirect_edge_and_branch_force (single_pred_edge (condition_bb),
2340 scalar_preheader);
2341 redirect_edge_and_branch_force (scalar_e, preheader);
2342 redirect_edge_and_branch_force (e, condition_bb);
2343 set_immediate_dominator (CDI_DOMINATORS, condition_bb,
2344 single_pred (condition_bb));
2345 set_immediate_dominator (CDI_DOMINATORS, scalar_preheader,
2346 single_pred (scalar_preheader));
2347 set_immediate_dominator (CDI_DOMINATORS, preheader,
2348 condition_bb);
2350 else
2351 nloop = loop_version (loop, cond_expr, &condition_bb,
2352 prob, REG_BR_PROB_BASE - prob,
2353 prob, REG_BR_PROB_BASE - prob, true);
2355 if (version_niter)
2357 /* The versioned loop could be infinite, we need to clear existing
2358 niter information which is copied from the original loop. */
2359 gcc_assert (loop_constraint_set_p (loop, LOOP_C_FINITE));
2360 vect_free_loop_info_assumptions (nloop);
2361 /* And set constraint LOOP_C_INFINITE for niter analyzer. */
2362 loop_constraint_set (loop, LOOP_C_INFINITE);
2365 if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
2366 && dump_enabled_p ())
2368 if (version_alias)
2369 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2370 "loop versioned for vectorization because of "
2371 "possible aliasing\n");
2372 if (version_align)
2373 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2374 "loop versioned for vectorization to enhance "
2375 "alignment\n");
2378 free_original_copy_tables ();
2380 /* Loop versioning violates an assumption we try to maintain during
2381 vectorization - that the loop exit block has a single predecessor.
2382 After versioning, the exit block of both loop versions is the same
2383 basic block (i.e. it has two predecessors). Just in order to simplify
2384 following transformations in the vectorizer, we fix this situation
2385 here by adding a new (empty) block on the exit-edge of the loop,
2386 with the proper loop-exit phis to maintain loop-closed-form.
2387 If loop versioning wasn't done from loop, but scalar_loop instead,
2388 merge_bb will have already just a single successor. */
2390 merge_bb = single_exit (loop)->dest;
2391 if (scalar_loop == NULL || EDGE_COUNT (merge_bb->preds) >= 2)
2393 gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
2394 new_exit_bb = split_edge (single_exit (loop));
2395 new_exit_e = single_exit (loop);
2396 e = EDGE_SUCC (new_exit_bb, 0);
2398 for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2400 tree new_res;
2401 orig_phi = gsi.phi ();
2402 new_res = copy_ssa_name (PHI_RESULT (orig_phi));
2403 new_phi = create_phi_node (new_res, new_exit_bb);
2404 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2405 add_phi_arg (new_phi, arg, new_exit_e,
2406 gimple_phi_arg_location_from_edge (orig_phi, e));
2407 adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
2411 /* End loop-exit-fixes after versioning. */
2413 if (cond_expr_stmt_list)
2415 cond_exp_gsi = gsi_last_bb (condition_bb);
2416 gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
2417 GSI_SAME_STMT);
2419 update_ssa (TODO_update_ssa);