2017-10-16 Tamar Christina <tamar.christina@arm.com>
[official-gcc.git] / gcc / tree-vect-loop-manip.c
blob910334f664e6737a5225de9add3235e382b13e61
1 /* Vectorizer Specific Loop Manipulations
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "cfghooks.h"
29 #include "tree-pass.h"
30 #include "ssa.h"
31 #include "fold-const.h"
32 #include "cfganal.h"
33 #include "gimplify.h"
34 #include "gimple-iterator.h"
35 #include "gimplify-me.h"
36 #include "tree-cfg.h"
37 #include "tree-ssa-loop-manip.h"
38 #include "tree-into-ssa.h"
39 #include "tree-ssa.h"
40 #include "cfgloop.h"
41 #include "tree-scalar-evolution.h"
42 #include "tree-vectorizer.h"
43 #include "tree-ssa-loop-ivopts.h"
45 /*************************************************************************
46 Simple Loop Peeling Utilities
48 Utilities to support loop peeling for vectorization purposes.
49 *************************************************************************/
52 /* Renames the use *OP_P. */
54 static void
55 rename_use_op (use_operand_p op_p)
57 tree new_name;
59 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
60 return;
62 new_name = get_current_def (USE_FROM_PTR (op_p));
64 /* Something defined outside of the loop. */
65 if (!new_name)
66 return;
68 /* An ordinary ssa name defined in the loop. */
70 SET_USE (op_p, new_name);
74 /* Renames the variables in basic block BB. Allow renaming of PHI arguments
75 on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
76 true. */
78 static void
79 rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop)
81 gimple *stmt;
82 use_operand_p use_p;
83 ssa_op_iter iter;
84 edge e;
85 edge_iterator ei;
86 struct loop *loop = bb->loop_father;
87 struct loop *outer_loop = NULL;
89 if (rename_from_outer_loop)
91 gcc_assert (loop);
92 outer_loop = loop_outer (loop);
95 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
96 gsi_next (&gsi))
98 stmt = gsi_stmt (gsi);
99 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
100 rename_use_op (use_p);
103 FOR_EACH_EDGE (e, ei, bb->preds)
105 if (!flow_bb_inside_loop_p (loop, e->src))
107 if (!rename_from_outer_loop)
108 continue;
109 if (e->src != outer_loop->header)
111 if (outer_loop->inner->next)
113 /* If outer_loop has 2 inner loops, allow there to
114 be an extra basic block which decides which of the
115 two loops to use using LOOP_VECTORIZED. */
116 if (!single_pred_p (e->src)
117 || single_pred (e->src) != outer_loop->header)
118 continue;
122 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
123 gsi_next (&gsi))
124 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
129 struct adjust_info
131 tree from, to;
132 basic_block bb;
135 /* A stack of values to be adjusted in debug stmts. We have to
136 process them LIFO, so that the closest substitution applies. If we
137 processed them FIFO, without the stack, we might substitute uses
138 with a PHI DEF that would soon become non-dominant, and when we got
139 to the suitable one, it wouldn't have anything to substitute any
140 more. */
141 static vec<adjust_info, va_heap> adjust_vec;
143 /* Adjust any debug stmts that referenced AI->from values to use the
144 loop-closed AI->to, if the references are dominated by AI->bb and
145 not by the definition of AI->from. */
147 static void
148 adjust_debug_stmts_now (adjust_info *ai)
150 basic_block bbphi = ai->bb;
151 tree orig_def = ai->from;
152 tree new_def = ai->to;
153 imm_use_iterator imm_iter;
154 gimple *stmt;
155 basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
157 gcc_assert (dom_info_available_p (CDI_DOMINATORS));
159 /* Adjust any debug stmts that held onto non-loop-closed
160 references. */
161 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
163 use_operand_p use_p;
164 basic_block bbuse;
166 if (!is_gimple_debug (stmt))
167 continue;
169 gcc_assert (gimple_debug_bind_p (stmt));
171 bbuse = gimple_bb (stmt);
173 if ((bbuse == bbphi
174 || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
175 && !(bbuse == bbdef
176 || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
178 if (new_def)
179 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
180 SET_USE (use_p, new_def);
181 else
183 gimple_debug_bind_reset_value (stmt);
184 update_stmt (stmt);
190 /* Adjust debug stmts as scheduled before. */
192 static void
193 adjust_vec_debug_stmts (void)
195 if (!MAY_HAVE_DEBUG_STMTS)
196 return;
198 gcc_assert (adjust_vec.exists ());
200 while (!adjust_vec.is_empty ())
202 adjust_debug_stmts_now (&adjust_vec.last ());
203 adjust_vec.pop ();
207 /* Adjust any debug stmts that referenced FROM values to use the
208 loop-closed TO, if the references are dominated by BB and not by
209 the definition of FROM. If adjust_vec is non-NULL, adjustments
210 will be postponed until adjust_vec_debug_stmts is called. */
212 static void
213 adjust_debug_stmts (tree from, tree to, basic_block bb)
215 adjust_info ai;
217 if (MAY_HAVE_DEBUG_STMTS
218 && TREE_CODE (from) == SSA_NAME
219 && ! SSA_NAME_IS_DEFAULT_DEF (from)
220 && ! virtual_operand_p (from))
222 ai.from = from;
223 ai.to = to;
224 ai.bb = bb;
226 if (adjust_vec.exists ())
227 adjust_vec.safe_push (ai);
228 else
229 adjust_debug_stmts_now (&ai);
233 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
234 to adjust any debug stmts that referenced the old phi arg,
235 presumably non-loop-closed references left over from other
236 transformations. */
238 static void
239 adjust_phi_and_debug_stmts (gimple *update_phi, edge e, tree new_def)
241 tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
243 SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
245 if (MAY_HAVE_DEBUG_STMTS)
246 adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
247 gimple_bb (update_phi));
250 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
251 that starts at zero, increases by one and its limit is NITERS.
253 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
255 void
256 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
258 tree indx_before_incr, indx_after_incr;
259 gcond *cond_stmt;
260 gcond *orig_cond;
261 edge exit_edge = single_exit (loop);
262 gimple_stmt_iterator loop_cond_gsi;
263 gimple_stmt_iterator incr_gsi;
264 bool insert_after;
265 tree init = build_int_cst (TREE_TYPE (niters), 0);
266 tree step = build_int_cst (TREE_TYPE (niters), 1);
267 source_location loop_loc;
268 enum tree_code code;
270 orig_cond = get_loop_exit_condition (loop);
271 gcc_assert (orig_cond);
272 loop_cond_gsi = gsi_for_stmt (orig_cond);
274 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
275 create_iv (init, step, NULL_TREE, loop,
276 &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
278 indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
279 true, NULL_TREE, true,
280 GSI_SAME_STMT);
281 niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
282 true, GSI_SAME_STMT);
284 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
285 cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
286 NULL_TREE);
288 gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
290 /* Remove old loop exit test: */
291 gsi_remove (&loop_cond_gsi, true);
292 free_stmt_vec_info (orig_cond);
294 loop_loc = find_loop_location (loop);
295 if (dump_enabled_p ())
297 if (LOCATION_LOCUS (loop_loc) != UNKNOWN_LOCATION)
298 dump_printf (MSG_NOTE, "\nloop at %s:%d: ", LOCATION_FILE (loop_loc),
299 LOCATION_LINE (loop_loc));
300 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, cond_stmt, 0);
303 /* Record the number of latch iterations. */
304 loop->nb_iterations = fold_build2 (MINUS_EXPR, TREE_TYPE (niters), niters,
305 build_int_cst (TREE_TYPE (niters), 1));
308 /* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg.
309 For all PHI arguments in FROM->dest and TO->dest from those
310 edges ensure that TO->dest PHI arguments have current_def
311 to that in from. */
313 static void
314 slpeel_duplicate_current_defs_from_edges (edge from, edge to)
316 gimple_stmt_iterator gsi_from, gsi_to;
318 for (gsi_from = gsi_start_phis (from->dest),
319 gsi_to = gsi_start_phis (to->dest);
320 !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);)
322 gimple *from_phi = gsi_stmt (gsi_from);
323 gimple *to_phi = gsi_stmt (gsi_to);
324 tree from_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, from);
325 tree to_arg = PHI_ARG_DEF_FROM_EDGE (to_phi, to);
326 if (virtual_operand_p (from_arg))
328 gsi_next (&gsi_from);
329 continue;
331 if (virtual_operand_p (to_arg))
333 gsi_next (&gsi_to);
334 continue;
336 if (TREE_CODE (from_arg) != SSA_NAME)
337 gcc_assert (operand_equal_p (from_arg, to_arg, 0));
338 else
340 if (get_current_def (to_arg) == NULL_TREE)
341 set_current_def (to_arg, get_current_def (from_arg));
343 gsi_next (&gsi_from);
344 gsi_next (&gsi_to);
347 gphi *from_phi = get_virtual_phi (from->dest);
348 gphi *to_phi = get_virtual_phi (to->dest);
349 if (from_phi)
350 set_current_def (PHI_ARG_DEF_FROM_EDGE (to_phi, to),
351 get_current_def (PHI_ARG_DEF_FROM_EDGE (from_phi, from)));
355 /* Given LOOP this function generates a new copy of it and puts it
356 on E which is either the entry or exit of LOOP. If SCALAR_LOOP is
357 non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
358 basic blocks from SCALAR_LOOP instead of LOOP, but to either the
359 entry or exit of LOOP. */
361 struct loop *
362 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop,
363 struct loop *scalar_loop, edge e)
365 struct loop *new_loop;
366 basic_block *new_bbs, *bbs, *pbbs;
367 bool at_exit;
368 bool was_imm_dom;
369 basic_block exit_dest;
370 edge exit, new_exit;
371 bool duplicate_outer_loop = false;
373 exit = single_exit (loop);
374 at_exit = (e == exit);
375 if (!at_exit && e != loop_preheader_edge (loop))
376 return NULL;
378 if (scalar_loop == NULL)
379 scalar_loop = loop;
381 bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
382 pbbs = bbs + 1;
383 get_loop_body_with_size (scalar_loop, pbbs, scalar_loop->num_nodes);
384 /* Allow duplication of outer loops. */
385 if (scalar_loop->inner)
386 duplicate_outer_loop = true;
387 /* Check whether duplication is possible. */
388 if (!can_copy_bbs_p (pbbs, scalar_loop->num_nodes))
390 free (bbs);
391 return NULL;
394 /* Generate new loop structure. */
395 new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
396 duplicate_subloops (scalar_loop, new_loop);
398 exit_dest = exit->dest;
399 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
400 exit_dest) == loop->header ?
401 true : false);
403 /* Also copy the pre-header, this avoids jumping through hoops to
404 duplicate the loop entry PHI arguments. Create an empty
405 pre-header unconditionally for this. */
406 basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
407 edge entry_e = single_pred_edge (preheader);
408 bbs[0] = preheader;
409 new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
411 exit = single_exit (scalar_loop);
412 copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
413 &exit, 1, &new_exit, NULL,
414 at_exit ? loop->latch : e->src, true);
415 exit = single_exit (loop);
416 basic_block new_preheader = new_bbs[0];
418 add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
420 if (scalar_loop != loop)
422 /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
423 SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop,
424 but LOOP will not. slpeel_update_phi_nodes_for_guard{1,2} expects
425 the LOOP SSA_NAMEs (on the exit edge and edge from latch to
426 header) to have current_def set, so copy them over. */
427 slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop),
428 exit);
429 slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop->latch,
431 EDGE_SUCC (loop->latch, 0));
434 if (at_exit) /* Add the loop copy at exit. */
436 if (scalar_loop != loop)
438 gphi_iterator gsi;
439 new_exit = redirect_edge_and_branch (new_exit, exit_dest);
441 for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi);
442 gsi_next (&gsi))
444 gphi *phi = gsi.phi ();
445 tree orig_arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
446 location_t orig_locus
447 = gimple_phi_arg_location_from_edge (phi, e);
449 add_phi_arg (phi, orig_arg, new_exit, orig_locus);
452 redirect_edge_and_branch_force (e, new_preheader);
453 flush_pending_stmts (e);
454 set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
455 if (was_imm_dom || duplicate_outer_loop)
456 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
458 /* And remove the non-necessary forwarder again. Keep the other
459 one so we have a proper pre-header for the loop at the exit edge. */
460 redirect_edge_pred (single_succ_edge (preheader),
461 single_pred (preheader));
462 delete_basic_block (preheader);
463 set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
464 loop_preheader_edge (scalar_loop)->src);
466 else /* Add the copy at entry. */
468 if (scalar_loop != loop)
470 /* Remove the non-necessary forwarder of scalar_loop again. */
471 redirect_edge_pred (single_succ_edge (preheader),
472 single_pred (preheader));
473 delete_basic_block (preheader);
474 set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
475 loop_preheader_edge (scalar_loop)->src);
476 preheader = split_edge (loop_preheader_edge (loop));
477 entry_e = single_pred_edge (preheader);
480 redirect_edge_and_branch_force (entry_e, new_preheader);
481 flush_pending_stmts (entry_e);
482 set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
484 redirect_edge_and_branch_force (new_exit, preheader);
485 flush_pending_stmts (new_exit);
486 set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
488 /* And remove the non-necessary forwarder again. Keep the other
489 one so we have a proper pre-header for the loop at the exit edge. */
490 redirect_edge_pred (single_succ_edge (new_preheader),
491 single_pred (new_preheader));
492 delete_basic_block (new_preheader);
493 set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
494 loop_preheader_edge (new_loop)->src);
497 /* Skip new preheader since it's deleted if copy loop is added at entry. */
498 for (unsigned i = (at_exit ? 0 : 1); i < scalar_loop->num_nodes + 1; i++)
499 rename_variables_in_bb (new_bbs[i], duplicate_outer_loop);
501 if (scalar_loop != loop)
503 /* Update new_loop->header PHIs, so that on the preheader
504 edge they are the ones from loop rather than scalar_loop. */
505 gphi_iterator gsi_orig, gsi_new;
506 edge orig_e = loop_preheader_edge (loop);
507 edge new_e = loop_preheader_edge (new_loop);
509 for (gsi_orig = gsi_start_phis (loop->header),
510 gsi_new = gsi_start_phis (new_loop->header);
511 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_new);
512 gsi_next (&gsi_orig), gsi_next (&gsi_new))
514 gphi *orig_phi = gsi_orig.phi ();
515 gphi *new_phi = gsi_new.phi ();
516 tree orig_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
517 location_t orig_locus
518 = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
520 add_phi_arg (new_phi, orig_arg, new_e, orig_locus);
524 free (new_bbs);
525 free (bbs);
527 checking_verify_dominators (CDI_DOMINATORS);
529 return new_loop;
533 /* Given the condition expression COND, put it as the last statement of
534 GUARD_BB; set both edges' probability; set dominator of GUARD_TO to
535 DOM_BB; return the skip edge. GUARD_TO is the target basic block to
536 skip the loop. PROBABILITY is the skip edge's probability. Mark the
537 new edge as irreducible if IRREDUCIBLE_P is true. */
539 static edge
540 slpeel_add_loop_guard (basic_block guard_bb, tree cond,
541 basic_block guard_to, basic_block dom_bb,
542 profile_probability probability, bool irreducible_p)
544 gimple_stmt_iterator gsi;
545 edge new_e, enter_e;
546 gcond *cond_stmt;
547 gimple_seq gimplify_stmt_list = NULL;
549 enter_e = EDGE_SUCC (guard_bb, 0);
550 enter_e->flags &= ~EDGE_FALLTHRU;
551 enter_e->flags |= EDGE_FALSE_VALUE;
552 gsi = gsi_last_bb (guard_bb);
554 cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
555 NULL_TREE);
556 if (gimplify_stmt_list)
557 gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
559 cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
560 gsi = gsi_last_bb (guard_bb);
561 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
563 /* Add new edge to connect guard block to the merge/loop-exit block. */
564 new_e = make_edge (guard_bb, guard_to, EDGE_TRUE_VALUE);
566 new_e->count = guard_bb->count;
567 new_e->probability = probability;
568 new_e->count = enter_e->count.apply_probability (probability);
569 if (irreducible_p)
570 new_e->flags |= EDGE_IRREDUCIBLE_LOOP;
572 enter_e->count -= new_e->count;
573 enter_e->probability = probability.invert ();
574 set_immediate_dominator (CDI_DOMINATORS, guard_to, dom_bb);
576 /* Split enter_e to preserve LOOPS_HAVE_PREHEADERS. */
577 if (enter_e->dest->loop_father->header == enter_e->dest)
578 split_edge (enter_e);
580 return new_e;
584 /* This function verifies that the following restrictions apply to LOOP:
585 (1) it consists of exactly 2 basic blocks - header, and an empty latch
586 for innermost loop and 5 basic blocks for outer-loop.
587 (2) it is single entry, single exit
588 (3) its exit condition is the last stmt in the header
589 (4) E is the entry/exit edge of LOOP.
592 bool
593 slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
595 edge exit_e = single_exit (loop);
596 edge entry_e = loop_preheader_edge (loop);
597 gcond *orig_cond = get_loop_exit_condition (loop);
598 gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
599 unsigned int num_bb = loop->inner? 5 : 2;
601 /* All loops have an outer scope; the only case loop->outer is NULL is for
602 the function itself. */
603 if (!loop_outer (loop)
604 || loop->num_nodes != num_bb
605 || !empty_block_p (loop->latch)
606 || !single_exit (loop)
607 /* Verify that new loop exit condition can be trivially modified. */
608 || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
609 || (e != exit_e && e != entry_e))
610 return false;
612 return true;
615 /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
616 in the exit bb and rename all the uses after the loop. This simplifies
617 the *guard[12] routines, which assume loop closed SSA form for all PHIs
618 (but normally loop closed SSA form doesn't require virtual PHIs to be
619 in the same form). Doing this early simplifies the checking what
620 uses should be renamed. */
622 static void
623 create_lcssa_for_virtual_phi (struct loop *loop)
625 gphi_iterator gsi;
626 edge exit_e = single_exit (loop);
628 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
629 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
631 gphi *phi = gsi.phi ();
632 for (gsi = gsi_start_phis (exit_e->dest);
633 !gsi_end_p (gsi); gsi_next (&gsi))
634 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
635 break;
636 if (gsi_end_p (gsi))
638 tree new_vop = copy_ssa_name (PHI_RESULT (phi));
639 gphi *new_phi = create_phi_node (new_vop, exit_e->dest);
640 tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
641 imm_use_iterator imm_iter;
642 gimple *stmt;
643 use_operand_p use_p;
645 add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
646 gimple_phi_set_result (new_phi, new_vop);
647 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
648 if (stmt != new_phi
649 && !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
650 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
651 SET_USE (use_p, new_vop);
653 break;
658 /* Function vect_get_loop_location.
660 Extract the location of the loop in the source code.
661 If the loop is not well formed for vectorization, an estimated
662 location is calculated.
663 Return the loop location if succeed and NULL if not. */
665 source_location
666 find_loop_location (struct loop *loop)
668 gimple *stmt = NULL;
669 basic_block bb;
670 gimple_stmt_iterator si;
672 if (!loop)
673 return UNKNOWN_LOCATION;
675 stmt = get_loop_exit_condition (loop);
677 if (stmt
678 && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
679 return gimple_location (stmt);
681 /* If we got here the loop is probably not "well formed",
682 try to estimate the loop location */
684 if (!loop->header)
685 return UNKNOWN_LOCATION;
687 bb = loop->header;
689 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
691 stmt = gsi_stmt (si);
692 if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
693 return gimple_location (stmt);
696 return UNKNOWN_LOCATION;
699 /* Return true if PHI defines an IV of the loop to be vectorized. */
701 static bool
702 iv_phi_p (gphi *phi)
704 if (virtual_operand_p (PHI_RESULT (phi)))
705 return false;
707 stmt_vec_info stmt_info = vinfo_for_stmt (phi);
708 gcc_assert (stmt_info != NULL);
709 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
710 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
711 return false;
713 return true;
716 /* Function vect_can_advance_ivs_p
718 In case the number of iterations that LOOP iterates is unknown at compile
719 time, an epilog loop will be generated, and the loop induction variables
720 (IVs) will be "advanced" to the value they are supposed to take just before
721 the epilog loop. Here we check that the access function of the loop IVs
722 and the expression that represents the loop bound are simple enough.
723 These restrictions will be relaxed in the future. */
725 bool
726 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
728 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
729 basic_block bb = loop->header;
730 gphi_iterator gsi;
732 /* Analyze phi functions of the loop header. */
734 if (dump_enabled_p ())
735 dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
736 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
738 tree evolution_part;
740 gphi *phi = gsi.phi ();
741 if (dump_enabled_p ())
743 dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
744 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
747 /* Skip virtual phi's. The data dependences that are associated with
748 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.
750 Skip reduction phis. */
751 if (!iv_phi_p (phi))
753 if (dump_enabled_p ())
754 dump_printf_loc (MSG_NOTE, vect_location,
755 "reduc or virtual phi. skip.\n");
756 continue;
759 /* Analyze the evolution function. */
761 evolution_part
762 = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
763 if (evolution_part == NULL_TREE)
765 if (dump_enabled_p ())
766 dump_printf (MSG_MISSED_OPTIMIZATION,
767 "No access function or evolution.\n");
768 return false;
771 /* FORNOW: We do not transform initial conditions of IVs
772 which evolution functions are not invariants in the loop. */
774 if (!expr_invariant_in_loop_p (loop, evolution_part))
776 if (dump_enabled_p ())
777 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
778 "evolution not invariant in loop.\n");
779 return false;
782 /* FORNOW: We do not transform initial conditions of IVs
783 which evolution functions are a polynomial of degree >= 2. */
785 if (tree_is_chrec (evolution_part))
787 if (dump_enabled_p ())
788 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
789 "evolution is chrec.\n");
790 return false;
794 return true;
798 /* Function vect_update_ivs_after_vectorizer.
800 "Advance" the induction variables of LOOP to the value they should take
801 after the execution of LOOP. This is currently necessary because the
802 vectorizer does not handle induction variables that are used after the
803 loop. Such a situation occurs when the last iterations of LOOP are
804 peeled, because:
805 1. We introduced new uses after LOOP for IVs that were not originally used
806 after LOOP: the IVs of LOOP are now used by an epilog loop.
807 2. LOOP is going to be vectorized; this means that it will iterate N/VF
808 times, whereas the loop IVs should be bumped N times.
810 Input:
811 - LOOP - a loop that is going to be vectorized. The last few iterations
812 of LOOP were peeled.
813 - NITERS - the number of iterations that LOOP executes (before it is
814 vectorized). i.e, the number of times the ivs should be bumped.
815 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
816 coming out from LOOP on which there are uses of the LOOP ivs
817 (this is the path from LOOP->exit to epilog_loop->preheader).
819 The new definitions of the ivs are placed in LOOP->exit.
820 The phi args associated with the edge UPDATE_E in the bb
821 UPDATE_E->dest are updated accordingly.
823 Assumption 1: Like the rest of the vectorizer, this function assumes
824 a single loop exit that has a single predecessor.
826 Assumption 2: The phi nodes in the LOOP header and in update_bb are
827 organized in the same order.
829 Assumption 3: The access function of the ivs is simple enough (see
830 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
832 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
833 coming out of LOOP on which the ivs of LOOP are used (this is the path
834 that leads to the epilog loop; other paths skip the epilog loop). This
835 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
836 needs to have its phis updated.
839 static void
840 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo,
841 tree niters, edge update_e)
843 gphi_iterator gsi, gsi1;
844 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
845 basic_block update_bb = update_e->dest;
846 basic_block exit_bb = single_exit (loop)->dest;
848 /* Make sure there exists a single-predecessor exit bb: */
849 gcc_assert (single_pred_p (exit_bb));
850 gcc_assert (single_succ_edge (exit_bb) == update_e);
852 for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
853 !gsi_end_p (gsi) && !gsi_end_p (gsi1);
854 gsi_next (&gsi), gsi_next (&gsi1))
856 tree init_expr;
857 tree step_expr, off;
858 tree type;
859 tree var, ni, ni_name;
860 gimple_stmt_iterator last_gsi;
862 gphi *phi = gsi.phi ();
863 gphi *phi1 = gsi1.phi ();
864 if (dump_enabled_p ())
866 dump_printf_loc (MSG_NOTE, vect_location,
867 "vect_update_ivs_after_vectorizer: phi: ");
868 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
871 /* Skip reduction and virtual phis. */
872 if (!iv_phi_p (phi))
874 if (dump_enabled_p ())
875 dump_printf_loc (MSG_NOTE, vect_location,
876 "reduc or virtual phi. skip.\n");
877 continue;
880 type = TREE_TYPE (gimple_phi_result (phi));
881 step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
882 step_expr = unshare_expr (step_expr);
884 /* FORNOW: We do not support IVs whose evolution function is a polynomial
885 of degree >= 2 or exponential. */
886 gcc_assert (!tree_is_chrec (step_expr));
888 init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
890 off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
891 fold_convert (TREE_TYPE (step_expr), niters),
892 step_expr);
893 if (POINTER_TYPE_P (type))
894 ni = fold_build_pointer_plus (init_expr, off);
895 else
896 ni = fold_build2 (PLUS_EXPR, type,
897 init_expr, fold_convert (type, off));
899 var = create_tmp_var (type, "tmp");
901 last_gsi = gsi_last_bb (exit_bb);
902 gimple_seq new_stmts = NULL;
903 ni_name = force_gimple_operand (ni, &new_stmts, false, var);
904 /* Exit_bb shouldn't be empty. */
905 if (!gsi_end_p (last_gsi))
906 gsi_insert_seq_after (&last_gsi, new_stmts, GSI_SAME_STMT);
907 else
908 gsi_insert_seq_before (&last_gsi, new_stmts, GSI_SAME_STMT);
910 /* Fix phi expressions in the successor bb. */
911 adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
915 /* Function vect_gen_prolog_loop_niters
917 Generate the number of iterations which should be peeled as prolog for the
918 loop represented by LOOP_VINFO. It is calculated as the misalignment of
919 DR - the data reference recorded in LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).
920 As a result, after the execution of this loop, the data reference DR will
921 refer to an aligned location. The following computation is generated:
923 If the misalignment of DR is known at compile time:
924 addr_mis = int mis = DR_MISALIGNMENT (dr);
925 Else, compute address misalignment in bytes:
926 addr_mis = addr & (vectype_align - 1)
928 prolog_niters = ((VF - addr_mis/elem_size)&(VF-1))/step
930 (elem_size = element type size; an element is the scalar element whose type
931 is the inner type of the vectype)
933 The computations will be emitted at the end of BB. We also compute and
934 store upper bound (included) of the result in BOUND.
936 When the step of the data-ref in the loop is not 1 (as in interleaved data
937 and SLP), the number of iterations of the prolog must be divided by the step
938 (which is equal to the size of interleaved group).
940 The above formulas assume that VF == number of elements in the vector. This
941 may not hold when there are multiple-types in the loop.
942 In this case, for some data-references in the loop the VF does not represent
943 the number of elements that fit in the vector. Therefore, instead of VF we
944 use TYPE_VECTOR_SUBPARTS. */
946 static tree
947 vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
948 basic_block bb, int *bound)
950 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
951 tree var;
952 tree niters_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
953 gimple_seq stmts = NULL, new_stmts = NULL;
954 tree iters, iters_name;
955 gimple *dr_stmt = DR_STMT (dr);
956 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
957 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
958 unsigned int target_align = DR_TARGET_ALIGNMENT (dr);
960 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
962 int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
964 if (dump_enabled_p ())
965 dump_printf_loc (MSG_NOTE, vect_location,
966 "known peeling = %d.\n", npeel);
968 iters = build_int_cst (niters_type, npeel);
969 *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
971 else
973 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
974 tree offset = negative
975 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
976 tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
977 &stmts, offset);
978 tree type = unsigned_type_for (TREE_TYPE (start_addr));
979 tree target_align_minus_1 = build_int_cst (type, target_align - 1);
980 HOST_WIDE_INT elem_size
981 = int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
982 tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
983 HOST_WIDE_INT align_in_elems = target_align / elem_size;
984 tree align_in_elems_minus_1 = build_int_cst (type, align_in_elems - 1);
985 tree align_in_elems_tree = build_int_cst (type, align_in_elems);
986 tree misalign_in_bytes;
987 tree misalign_in_elems;
989 /* Create: misalign_in_bytes = addr & (target_align - 1). */
990 misalign_in_bytes
991 = fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
992 target_align_minus_1);
994 /* Create: misalign_in_elems = misalign_in_bytes / element_size. */
995 misalign_in_elems
996 = fold_build2 (RSHIFT_EXPR, type, misalign_in_bytes, elem_size_log);
998 /* Create: (niters_type) ((align_in_elems - misalign_in_elems)
999 & (align_in_elems - 1)). */
1000 if (negative)
1001 iters = fold_build2 (MINUS_EXPR, type, misalign_in_elems,
1002 align_in_elems_tree);
1003 else
1004 iters = fold_build2 (MINUS_EXPR, type, align_in_elems_tree,
1005 misalign_in_elems);
1006 iters = fold_build2 (BIT_AND_EXPR, type, iters, align_in_elems_minus_1);
1007 iters = fold_convert (niters_type, iters);
1008 *bound = align_in_elems - 1;
1011 if (dump_enabled_p ())
1013 dump_printf_loc (MSG_NOTE, vect_location,
1014 "niters for prolog loop: ");
1015 dump_generic_expr (MSG_NOTE, TDF_SLIM, iters);
1016 dump_printf (MSG_NOTE, "\n");
1019 var = create_tmp_var (niters_type, "prolog_loop_niters");
1020 iters_name = force_gimple_operand (iters, &new_stmts, false, var);
1022 if (new_stmts)
1023 gimple_seq_add_seq (&stmts, new_stmts);
1024 if (stmts)
1026 gcc_assert (single_succ_p (bb));
1027 gimple_stmt_iterator gsi = gsi_last_bb (bb);
1028 if (gsi_end_p (gsi))
1029 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1030 else
1031 gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
1033 return iters_name;
1037 /* Function vect_update_init_of_dr
1039 NITERS iterations were peeled from LOOP. DR represents a data reference
1040 in LOOP. This function updates the information recorded in DR to
1041 account for the fact that the first NITERS iterations had already been
1042 executed. Specifically, it updates the OFFSET field of DR. */
1044 static void
1045 vect_update_init_of_dr (struct data_reference *dr, tree niters)
1047 tree offset = DR_OFFSET (dr);
1049 niters = fold_build2 (MULT_EXPR, sizetype,
1050 fold_convert (sizetype, niters),
1051 fold_convert (sizetype, DR_STEP (dr)));
1052 offset = fold_build2 (PLUS_EXPR, sizetype,
1053 fold_convert (sizetype, offset), niters);
1054 DR_OFFSET (dr) = offset;
1058 /* Function vect_update_inits_of_drs
1060 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1061 This function updates the information recorded for the data references in
1062 the loop to account for the fact that the first NITERS iterations had
1063 already been executed. Specifically, it updates the initial_condition of
1064 the access_function of all the data_references in the loop. */
1066 static void
1067 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
1069 unsigned int i;
1070 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1071 struct data_reference *dr;
1073 if (dump_enabled_p ())
1074 dump_printf_loc (MSG_NOTE, vect_location,
1075 "=== vect_update_inits_of_dr ===\n");
1077 /* Adjust niters to sizetype and insert stmts on loop preheader edge. */
1078 if (!types_compatible_p (sizetype, TREE_TYPE (niters)))
1080 gimple_seq seq;
1081 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1082 tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
1084 niters = fold_convert (sizetype, niters);
1085 niters = force_gimple_operand (niters, &seq, false, var);
1086 if (seq)
1088 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1089 gcc_assert (!new_bb);
1093 FOR_EACH_VEC_ELT (datarefs, i, dr)
1094 vect_update_init_of_dr (dr, niters);
1098 /* This function builds ni_name = number of iterations. Statements
1099 are emitted on the loop preheader edge. If NEW_VAR_P is not NULL, set
1100 it to TRUE if new ssa_var is generated. */
1102 tree
1103 vect_build_loop_niters (loop_vec_info loop_vinfo, bool *new_var_p)
1105 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1106 if (TREE_CODE (ni) == INTEGER_CST)
1107 return ni;
1108 else
1110 tree ni_name, var;
1111 gimple_seq stmts = NULL;
1112 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1114 var = create_tmp_var (TREE_TYPE (ni), "niters");
1115 ni_name = force_gimple_operand (ni, &stmts, false, var);
1116 if (stmts)
1118 gsi_insert_seq_on_edge_immediate (pe, stmts);
1119 if (new_var_p != NULL)
1120 *new_var_p = true;
1123 return ni_name;
1127 /* Calculate the number of iterations above which vectorized loop will be
1128 preferred than scalar loop. NITERS_PROLOG is the number of iterations
1129 of prolog loop. If it's integer const, the integer number is also passed
1130 in INT_NITERS_PROLOG. BOUND_PROLOG is the upper bound (included) of
1131 number of iterations of prolog loop. VFM1 is vector factor minus one.
1132 If CHECK_PROFITABILITY is true, TH is the threshold below which scalar
1133 (rather than vectorized) loop will be executed. This function stores
1134 upper bound (included) of the result in BOUND_SCALAR. */
1136 static tree
1137 vect_gen_scalar_loop_niters (tree niters_prolog, int int_niters_prolog,
1138 int bound_prolog, int vfm1, int th,
1139 int *bound_scalar, bool check_profitability)
1141 tree type = TREE_TYPE (niters_prolog);
1142 tree niters = fold_build2 (PLUS_EXPR, type, niters_prolog,
1143 build_int_cst (type, vfm1));
1145 *bound_scalar = vfm1 + bound_prolog;
1146 if (check_profitability)
1148 /* TH indicates the minimum niters of vectorized loop, while we
1149 compute the maximum niters of scalar loop. */
1150 th--;
1151 /* Peeling for constant times. */
1152 if (int_niters_prolog >= 0)
1154 *bound_scalar = (int_niters_prolog + vfm1 < th
1155 ? th
1156 : vfm1 + int_niters_prolog);
1157 return build_int_cst (type, *bound_scalar);
1159 /* Peeling for unknown times. Note BOUND_PROLOG is the upper
1160 bound (inlcuded) of niters of prolog loop. */
1161 if (th >= vfm1 + bound_prolog)
1163 *bound_scalar = th;
1164 return build_int_cst (type, th);
1166 /* Need to do runtime comparison, but BOUND_SCALAR remains the same. */
1167 else if (th > vfm1)
1168 return fold_build2 (MAX_EXPR, type, build_int_cst (type, th), niters);
1170 return niters;
1173 /* This function generates the following statements:
1175 niters = number of iterations loop executes (after peeling)
1176 niters_vector = niters / vf
1178 and places them on the loop preheader edge. NITERS_NO_OVERFLOW is
1179 true if NITERS doesn't overflow. */
1181 void
1182 vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters,
1183 tree *niters_vector_ptr, bool niters_no_overflow)
1185 tree ni_minus_gap, var;
1186 tree niters_vector, type = TREE_TYPE (niters);
1187 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1188 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1189 tree log_vf = build_int_cst (type, exact_log2 (vf));
1191 /* If epilogue loop is required because of data accesses with gaps, we
1192 subtract one iteration from the total number of iterations here for
1193 correct calculation of RATIO. */
1194 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1196 ni_minus_gap = fold_build2 (MINUS_EXPR, type, niters,
1197 build_one_cst (type));
1198 if (!is_gimple_val (ni_minus_gap))
1200 var = create_tmp_var (type, "ni_gap");
1201 gimple *stmts = NULL;
1202 ni_minus_gap = force_gimple_operand (ni_minus_gap, &stmts,
1203 true, var);
1204 gsi_insert_seq_on_edge_immediate (pe, stmts);
1207 else
1208 ni_minus_gap = niters;
1210 /* Create: niters >> log2(vf) */
1211 /* If it's known that niters == number of latch executions + 1 doesn't
1212 overflow, we can generate niters >> log2(vf); otherwise we generate
1213 (niters - vf) >> log2(vf) + 1 by using the fact that we know ratio
1214 will be at least one. */
1215 if (niters_no_overflow)
1216 niters_vector = fold_build2 (RSHIFT_EXPR, type, ni_minus_gap, log_vf);
1217 else
1218 niters_vector
1219 = fold_build2 (PLUS_EXPR, type,
1220 fold_build2 (RSHIFT_EXPR, type,
1221 fold_build2 (MINUS_EXPR, type, ni_minus_gap,
1222 build_int_cst (type, vf)),
1223 log_vf),
1224 build_int_cst (type, 1));
1226 if (!is_gimple_val (niters_vector))
1228 var = create_tmp_var (type, "bnd");
1229 gimple_seq stmts = NULL;
1230 niters_vector = force_gimple_operand (niters_vector, &stmts, true, var);
1231 gsi_insert_seq_on_edge_immediate (pe, stmts);
1232 /* Peeling algorithm guarantees that vector loop bound is at least ONE,
1233 we set range information to make niters analyzer's life easier. */
1234 if (stmts != NULL)
1235 set_range_info (niters_vector, VR_RANGE,
1236 wi::to_wide (build_int_cst (type, 1)),
1237 wi::to_wide (fold_build2 (RSHIFT_EXPR, type,
1238 TYPE_MAX_VALUE (type),
1239 log_vf)));
1241 *niters_vector_ptr = niters_vector;
1243 return;
1246 /* Given NITERS_VECTOR which is the number of iterations for vectorized
1247 loop specified by LOOP_VINFO after vectorization, compute the number
1248 of iterations before vectorization (niters_vector * vf) and store it
1249 to NITERS_VECTOR_MULT_VF_PTR. */
1251 static void
1252 vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo,
1253 tree niters_vector,
1254 tree *niters_vector_mult_vf_ptr)
1256 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1257 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1258 tree type = TREE_TYPE (niters_vector);
1259 tree log_vf = build_int_cst (type, exact_log2 (vf));
1260 basic_block exit_bb = single_exit (loop)->dest;
1262 gcc_assert (niters_vector_mult_vf_ptr != NULL);
1263 tree niters_vector_mult_vf = fold_build2 (LSHIFT_EXPR, type,
1264 niters_vector, log_vf);
1265 if (!is_gimple_val (niters_vector_mult_vf))
1267 tree var = create_tmp_var (type, "niters_vector_mult_vf");
1268 gimple_seq stmts = NULL;
1269 niters_vector_mult_vf = force_gimple_operand (niters_vector_mult_vf,
1270 &stmts, true, var);
1271 gimple_stmt_iterator gsi = gsi_start_bb (exit_bb);
1272 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1274 *niters_vector_mult_vf_ptr = niters_vector_mult_vf;
1277 /* Function slpeel_tree_duplicate_loop_to_edge_cfg duplciates FIRST/SECOND
1278 from SECOND/FIRST and puts it at the original loop's preheader/exit
1279 edge, the two loops are arranged as below:
1281 preheader_a:
1282 first_loop:
1283 header_a:
1284 i_1 = PHI<i_0, i_2>;
1286 i_2 = i_1 + 1;
1287 if (cond_a)
1288 goto latch_a;
1289 else
1290 goto between_bb;
1291 latch_a:
1292 goto header_a;
1294 between_bb:
1295 ;; i_x = PHI<i_2>; ;; LCSSA phi node to be created for FIRST,
1297 second_loop:
1298 header_b:
1299 i_3 = PHI<i_0, i_4>; ;; Use of i_0 to be replaced with i_x,
1300 or with i_2 if no LCSSA phi is created
1301 under condition of CREATE_LCSSA_FOR_IV_PHIS.
1303 i_4 = i_3 + 1;
1304 if (cond_b)
1305 goto latch_b;
1306 else
1307 goto exit_bb;
1308 latch_b:
1309 goto header_b;
1311 exit_bb:
1313 This function creates loop closed SSA for the first loop; update the
1314 second loop's PHI nodes by replacing argument on incoming edge with the
1315 result of newly created lcssa PHI nodes. IF CREATE_LCSSA_FOR_IV_PHIS
1316 is false, Loop closed ssa phis will only be created for non-iv phis for
1317 the first loop.
1319 This function assumes exit bb of the first loop is preheader bb of the
1320 second loop, i.e, between_bb in the example code. With PHIs updated,
1321 the second loop will execute rest iterations of the first. */
1323 static void
1324 slpeel_update_phi_nodes_for_loops (loop_vec_info loop_vinfo,
1325 struct loop *first, struct loop *second,
1326 bool create_lcssa_for_iv_phis)
1328 gphi_iterator gsi_update, gsi_orig;
1329 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1331 edge first_latch_e = EDGE_SUCC (first->latch, 0);
1332 edge second_preheader_e = loop_preheader_edge (second);
1333 basic_block between_bb = single_exit (first)->dest;
1335 gcc_assert (between_bb == second_preheader_e->src);
1336 gcc_assert (single_pred_p (between_bb) && single_succ_p (between_bb));
1337 /* Either the first loop or the second is the loop to be vectorized. */
1338 gcc_assert (loop == first || loop == second);
1340 for (gsi_orig = gsi_start_phis (first->header),
1341 gsi_update = gsi_start_phis (second->header);
1342 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
1343 gsi_next (&gsi_orig), gsi_next (&gsi_update))
1345 gphi *orig_phi = gsi_orig.phi ();
1346 gphi *update_phi = gsi_update.phi ();
1348 tree arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, first_latch_e);
1349 /* Generate lcssa PHI node for the first loop. */
1350 gphi *vect_phi = (loop == first) ? orig_phi : update_phi;
1351 if (create_lcssa_for_iv_phis || !iv_phi_p (vect_phi))
1353 tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
1354 gphi *lcssa_phi = create_phi_node (new_res, between_bb);
1355 add_phi_arg (lcssa_phi, arg, single_exit (first), UNKNOWN_LOCATION);
1356 arg = new_res;
1359 /* Update PHI node in the second loop by replacing arg on the loop's
1360 incoming edge. */
1361 adjust_phi_and_debug_stmts (update_phi, second_preheader_e, arg);
1365 /* Function slpeel_add_loop_guard adds guard skipping from the beginning
1366 of SKIP_LOOP to the beginning of UPDATE_LOOP. GUARD_EDGE and MERGE_EDGE
1367 are two pred edges of the merge point before UPDATE_LOOP. The two loops
1368 appear like below:
1370 guard_bb:
1371 if (cond)
1372 goto merge_bb;
1373 else
1374 goto skip_loop;
1376 skip_loop:
1377 header_a:
1378 i_1 = PHI<i_0, i_2>;
1380 i_2 = i_1 + 1;
1381 if (cond_a)
1382 goto latch_a;
1383 else
1384 goto exit_a;
1385 latch_a:
1386 goto header_a;
1388 exit_a:
1389 i_5 = PHI<i_2>;
1391 merge_bb:
1392 ;; PHI (i_x = PHI<i_0, i_5>) to be created at merge point.
1394 update_loop:
1395 header_b:
1396 i_3 = PHI<i_5, i_4>; ;; Use of i_5 to be replaced with i_x.
1398 i_4 = i_3 + 1;
1399 if (cond_b)
1400 goto latch_b;
1401 else
1402 goto exit_bb;
1403 latch_b:
1404 goto header_b;
1406 exit_bb:
1408 This function creates PHI nodes at merge_bb and replaces the use of i_5
1409 in the update_loop's PHI node with the result of new PHI result. */
1411 static void
1412 slpeel_update_phi_nodes_for_guard1 (struct loop *skip_loop,
1413 struct loop *update_loop,
1414 edge guard_edge, edge merge_edge)
1416 source_location merge_loc, guard_loc;
1417 edge orig_e = loop_preheader_edge (skip_loop);
1418 edge update_e = loop_preheader_edge (update_loop);
1419 gphi_iterator gsi_orig, gsi_update;
1421 for ((gsi_orig = gsi_start_phis (skip_loop->header),
1422 gsi_update = gsi_start_phis (update_loop->header));
1423 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
1424 gsi_next (&gsi_orig), gsi_next (&gsi_update))
1426 gphi *orig_phi = gsi_orig.phi ();
1427 gphi *update_phi = gsi_update.phi ();
1429 /* Generate new phi node at merge bb of the guard. */
1430 tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
1431 gphi *new_phi = create_phi_node (new_res, guard_edge->dest);
1433 /* Merge bb has two incoming edges: GUARD_EDGE and MERGE_EDGE. Set the
1434 args in NEW_PHI for these edges. */
1435 tree merge_arg = PHI_ARG_DEF_FROM_EDGE (update_phi, update_e);
1436 tree guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
1437 merge_loc = gimple_phi_arg_location_from_edge (update_phi, update_e);
1438 guard_loc = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
1439 add_phi_arg (new_phi, merge_arg, merge_edge, merge_loc);
1440 add_phi_arg (new_phi, guard_arg, guard_edge, guard_loc);
1442 /* Update phi in UPDATE_PHI. */
1443 adjust_phi_and_debug_stmts (update_phi, update_e, new_res);
1447 /* LCSSA_PHI is a lcssa phi of EPILOG loop which is copied from LOOP,
1448 this function searches for the corresponding lcssa phi node in exit
1449 bb of LOOP. If it is found, return the phi result; otherwise return
1450 NULL. */
1452 static tree
1453 find_guard_arg (struct loop *loop, struct loop *epilog ATTRIBUTE_UNUSED,
1454 gphi *lcssa_phi)
1456 gphi_iterator gsi;
1457 edge e = single_exit (loop);
1459 gcc_assert (single_pred_p (e->dest));
1460 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
1462 gphi *phi = gsi.phi ();
1463 if (operand_equal_p (PHI_ARG_DEF (phi, 0),
1464 PHI_ARG_DEF (lcssa_phi, 0), 0))
1465 return PHI_RESULT (phi);
1467 return NULL_TREE;
1470 /* LOOP and EPILOG are two consecutive loops in CFG and EPILOG is copied
1471 from LOOP. Function slpeel_add_loop_guard adds guard skipping from a
1472 point between the two loops to the end of EPILOG. Edges GUARD_EDGE
1473 and MERGE_EDGE are the two pred edges of merge_bb at the end of EPILOG.
1474 The CFG looks like:
1476 loop:
1477 header_a:
1478 i_1 = PHI<i_0, i_2>;
1480 i_2 = i_1 + 1;
1481 if (cond_a)
1482 goto latch_a;
1483 else
1484 goto exit_a;
1485 latch_a:
1486 goto header_a;
1488 exit_a:
1490 guard_bb:
1491 if (cond)
1492 goto merge_bb;
1493 else
1494 goto epilog_loop;
1496 ;; fall_through_bb
1498 epilog_loop:
1499 header_b:
1500 i_3 = PHI<i_2, i_4>;
1502 i_4 = i_3 + 1;
1503 if (cond_b)
1504 goto latch_b;
1505 else
1506 goto merge_bb;
1507 latch_b:
1508 goto header_b;
1510 merge_bb:
1511 ; PHI node (i_y = PHI<i_2, i_4>) to be created at merge point.
1513 exit_bb:
1514 i_x = PHI<i_4>; ;Use of i_4 to be replaced with i_y in merge_bb.
1516 For each name used out side EPILOG (i.e - for each name that has a lcssa
1517 phi in exit_bb) we create a new PHI in merge_bb. The new PHI has two
1518 args corresponding to GUARD_EDGE and MERGE_EDGE. Arg for MERGE_EDGE is
1519 the arg of the original PHI in exit_bb, arg for GUARD_EDGE is defined
1520 by LOOP and is found in the exit bb of LOOP. Arg of the original PHI
1521 in exit_bb will also be updated. */
1523 static void
1524 slpeel_update_phi_nodes_for_guard2 (struct loop *loop, struct loop *epilog,
1525 edge guard_edge, edge merge_edge)
1527 gphi_iterator gsi;
1528 basic_block merge_bb = guard_edge->dest;
1530 gcc_assert (single_succ_p (merge_bb));
1531 edge e = single_succ_edge (merge_bb);
1532 basic_block exit_bb = e->dest;
1533 gcc_assert (single_pred_p (exit_bb));
1534 gcc_assert (single_pred (exit_bb) == single_exit (epilog)->dest);
1536 for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1538 gphi *update_phi = gsi.phi ();
1539 tree old_arg = PHI_ARG_DEF (update_phi, 0);
1540 /* This loop-closed-phi actually doesn't represent a use out of the
1541 loop - the phi arg is a constant. */
1542 if (TREE_CODE (old_arg) != SSA_NAME)
1543 continue;
1545 tree merge_arg = get_current_def (old_arg);
1546 if (!merge_arg)
1547 merge_arg = old_arg;
1549 tree guard_arg = find_guard_arg (loop, epilog, update_phi);
1550 /* If the var is live after loop but not a reduction, we simply
1551 use the old arg. */
1552 if (!guard_arg)
1553 guard_arg = old_arg;
1555 /* Create new phi node in MERGE_BB: */
1556 tree new_res = copy_ssa_name (PHI_RESULT (update_phi));
1557 gphi *merge_phi = create_phi_node (new_res, merge_bb);
1559 /* MERGE_BB has two incoming edges: GUARD_EDGE and MERGE_EDGE, Set
1560 the two PHI args in merge_phi for these edges. */
1561 add_phi_arg (merge_phi, merge_arg, merge_edge, UNKNOWN_LOCATION);
1562 add_phi_arg (merge_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
1564 /* Update the original phi in exit_bb. */
1565 adjust_phi_and_debug_stmts (update_phi, e, new_res);
1569 /* EPILOG loop is duplicated from the original loop for vectorizing,
1570 the arg of its loop closed ssa PHI needs to be updated. */
1572 static void
1573 slpeel_update_phi_nodes_for_lcssa (struct loop *epilog)
1575 gphi_iterator gsi;
1576 basic_block exit_bb = single_exit (epilog)->dest;
1578 gcc_assert (single_pred_p (exit_bb));
1579 edge e = EDGE_PRED (exit_bb, 0);
1580 for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1581 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
1584 /* Function vect_do_peeling.
1586 Input:
1587 - LOOP_VINFO: Represent a loop to be vectorized, which looks like:
1589 preheader:
1590 LOOP:
1591 header_bb:
1592 loop_body
1593 if (exit_loop_cond) goto exit_bb
1594 else goto header_bb
1595 exit_bb:
1597 - NITERS: The number of iterations of the loop.
1598 - NITERSM1: The number of iterations of the loop's latch.
1599 - NITERS_NO_OVERFLOW: No overflow in computing NITERS.
1600 - TH, CHECK_PROFITABILITY: Threshold of niters to vectorize loop if
1601 CHECK_PROFITABILITY is true.
1602 Output:
1603 - NITERS_VECTOR: The number of iterations of loop after vectorization.
1605 This function peels prolog and epilog from the loop, adds guards skipping
1606 PROLOG and EPILOG for various conditions. As a result, the changed CFG
1607 would look like:
1609 guard_bb_1:
1610 if (prefer_scalar_loop) goto merge_bb_1
1611 else goto guard_bb_2
1613 guard_bb_2:
1614 if (skip_prolog) goto merge_bb_2
1615 else goto prolog_preheader
1617 prolog_preheader:
1618 PROLOG:
1619 prolog_header_bb:
1620 prolog_body
1621 if (exit_prolog_cond) goto prolog_exit_bb
1622 else goto prolog_header_bb
1623 prolog_exit_bb:
1625 merge_bb_2:
1627 vector_preheader:
1628 VECTOR LOOP:
1629 vector_header_bb:
1630 vector_body
1631 if (exit_vector_cond) goto vector_exit_bb
1632 else goto vector_header_bb
1633 vector_exit_bb:
1635 guard_bb_3:
1636 if (skip_epilog) goto merge_bb_3
1637 else goto epilog_preheader
1639 merge_bb_1:
1641 epilog_preheader:
1642 EPILOG:
1643 epilog_header_bb:
1644 epilog_body
1645 if (exit_epilog_cond) goto merge_bb_3
1646 else goto epilog_header_bb
1648 merge_bb_3:
1650 Note this function peels prolog and epilog only if it's necessary,
1651 as well as guards.
1652 Returns created epilogue or NULL.
1654 TODO: Guard for prefer_scalar_loop should be emitted along with
1655 versioning conditions if loop versioning is needed. */
1658 struct loop *
1659 vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
1660 tree *niters_vector, int th, bool check_profitability,
1661 bool niters_no_overflow)
1663 edge e, guard_e;
1664 tree type = TREE_TYPE (niters), guard_cond;
1665 basic_block guard_bb, guard_to;
1666 profile_probability prob_prolog, prob_vector, prob_epilog;
1667 int bound_prolog = 0, bound_scalar = 0, bound = 0;
1668 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1669 int prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1670 bool epilog_peeling = (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)
1671 || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
1673 if (!prolog_peeling && !epilog_peeling)
1674 return NULL;
1676 prob_vector = profile_probability::guessed_always ().apply_scale (9, 10);
1677 if ((vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo)) == 2)
1678 vf = 3;
1679 prob_prolog = prob_epilog = profile_probability::guessed_always ()
1680 .apply_scale (vf - 1, vf);
1681 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1683 struct loop *prolog, *epilog = NULL, *loop = LOOP_VINFO_LOOP (loop_vinfo);
1684 struct loop *first_loop = loop;
1685 bool irred_flag = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP;
1686 create_lcssa_for_virtual_phi (loop);
1687 update_ssa (TODO_update_ssa_only_virtuals);
1689 if (MAY_HAVE_DEBUG_STMTS)
1691 gcc_assert (!adjust_vec.exists ());
1692 adjust_vec.create (32);
1694 initialize_original_copy_tables ();
1696 /* Prolog loop may be skipped. */
1697 bool skip_prolog = (prolog_peeling != 0);
1698 /* Skip to epilog if scalar loop may be preferred. It's only needed
1699 when we peel for epilog loop and when it hasn't been checked with
1700 loop versioning. */
1701 bool skip_vector = (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1702 && !LOOP_REQUIRES_VERSIONING (loop_vinfo));
1703 /* Epilog loop must be executed if the number of iterations for epilog
1704 loop is known at compile time, otherwise we need to add a check at
1705 the end of vector loop and skip to the end of epilog loop. */
1706 bool skip_epilog = (prolog_peeling < 0
1707 || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo));
1708 /* PEELING_FOR_GAPS is special because epilog loop must be executed. */
1709 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1710 skip_epilog = false;
1712 /* Record the anchor bb at which guard should be placed if scalar loop
1713 may be preferred. */
1714 basic_block anchor = loop_preheader_edge (loop)->src;
1715 if (skip_vector)
1717 split_edge (loop_preheader_edge (loop));
1719 /* Due to the order in which we peel prolog and epilog, we first
1720 propagate probability to the whole loop. The purpose is to
1721 avoid adjusting probabilities of both prolog and vector loops
1722 separately. Note in this case, the probability of epilog loop
1723 needs to be scaled back later. */
1724 basic_block bb_before_loop = loop_preheader_edge (loop)->src;
1725 if (prob_vector.initialized_p ())
1726 scale_bbs_frequencies (&bb_before_loop, 1, prob_vector);
1727 scale_loop_profile (loop, prob_vector, bound);
1730 tree niters_prolog = build_int_cst (type, 0);
1731 source_location loop_loc = find_loop_location (loop);
1732 struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
1733 if (prolog_peeling)
1735 e = loop_preheader_edge (loop);
1736 if (!slpeel_can_duplicate_loop_p (loop, e))
1738 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1739 "loop can't be duplicated to preheader edge.\n");
1740 gcc_unreachable ();
1742 /* Peel prolog and put it on preheader edge of loop. */
1743 prolog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
1744 if (!prolog)
1746 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1747 "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1748 gcc_unreachable ();
1750 slpeel_update_phi_nodes_for_loops (loop_vinfo, prolog, loop, true);
1751 first_loop = prolog;
1752 reset_original_copy_tables ();
1754 /* Generate and update the number of iterations for prolog loop. */
1755 niters_prolog = vect_gen_prolog_loop_niters (loop_vinfo, anchor,
1756 &bound_prolog);
1757 slpeel_make_loop_iterate_ntimes (prolog, niters_prolog);
1759 /* Skip the prolog loop. */
1760 if (skip_prolog)
1762 guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
1763 niters_prolog, build_int_cst (type, 0));
1764 guard_bb = loop_preheader_edge (prolog)->src;
1765 basic_block bb_after_prolog = loop_preheader_edge (loop)->src;
1766 guard_to = split_edge (loop_preheader_edge (loop));
1767 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
1768 guard_to, guard_bb,
1769 prob_prolog.invert (),
1770 irred_flag);
1771 e = EDGE_PRED (guard_to, 0);
1772 e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
1773 slpeel_update_phi_nodes_for_guard1 (prolog, loop, guard_e, e);
1775 scale_bbs_frequencies (&bb_after_prolog, 1, prob_prolog);
1776 scale_loop_profile (prolog, prob_prolog, bound_prolog);
1778 /* Update init address of DRs. */
1779 vect_update_inits_of_drs (loop_vinfo, niters_prolog);
1780 /* Update niters for vector loop. */
1781 LOOP_VINFO_NITERS (loop_vinfo)
1782 = fold_build2 (MINUS_EXPR, type, niters, niters_prolog);
1783 LOOP_VINFO_NITERSM1 (loop_vinfo)
1784 = fold_build2 (MINUS_EXPR, type,
1785 LOOP_VINFO_NITERSM1 (loop_vinfo), niters_prolog);
1786 bool new_var_p = false;
1787 niters = vect_build_loop_niters (loop_vinfo, &new_var_p);
1788 /* It's guaranteed that vector loop bound before vectorization is at
1789 least VF, so set range information for newly generated var. */
1790 if (new_var_p)
1791 set_range_info (niters, VR_RANGE,
1792 wi::to_wide (build_int_cst (type, vf)),
1793 wi::to_wide (TYPE_MAX_VALUE (type)));
1795 /* Prolog iterates at most bound_prolog times, latch iterates at
1796 most bound_prolog - 1 times. */
1797 record_niter_bound (prolog, bound_prolog - 1, false, true);
1798 delete_update_ssa ();
1799 adjust_vec_debug_stmts ();
1800 scev_reset ();
1803 if (epilog_peeling)
1805 e = single_exit (loop);
1806 if (!slpeel_can_duplicate_loop_p (loop, e))
1808 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1809 "loop can't be duplicated to exit edge.\n");
1810 gcc_unreachable ();
1812 /* Peel epilog and put it on exit edge of loop. */
1813 epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
1814 if (!epilog)
1816 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1817 "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1818 gcc_unreachable ();
1820 slpeel_update_phi_nodes_for_loops (loop_vinfo, loop, epilog, false);
1822 /* Scalar version loop may be preferred. In this case, add guard
1823 and skip to epilog. Note this only happens when the number of
1824 iterations of loop is unknown at compile time, otherwise this
1825 won't be vectorized. */
1826 if (skip_vector)
1828 /* Additional epilogue iteration is peeled if gap exists. */
1829 bool peel_for_gaps = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo);
1830 tree t = vect_gen_scalar_loop_niters (niters_prolog, prolog_peeling,
1831 bound_prolog,
1832 peel_for_gaps ? vf : vf - 1,
1833 th, &bound_scalar,
1834 check_profitability);
1835 /* Build guard against NITERSM1 since NITERS may overflow. */
1836 guard_cond = fold_build2 (LT_EXPR, boolean_type_node, nitersm1, t);
1837 guard_bb = anchor;
1838 guard_to = split_edge (loop_preheader_edge (epilog));
1839 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
1840 guard_to, guard_bb,
1841 prob_vector.invert (),
1842 irred_flag);
1843 e = EDGE_PRED (guard_to, 0);
1844 e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
1845 slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e);
1847 /* Simply propagate profile info from guard_bb to guard_to which is
1848 a merge point of control flow. */
1849 guard_to->frequency = guard_bb->frequency;
1850 guard_to->count = guard_bb->count;
1851 single_succ_edge (guard_to)->count = guard_to->count;
1852 /* Scale probability of epilog loop back.
1853 FIXME: We should avoid scaling down and back up. Profile may
1854 get lost if we scale down to 0. */
1855 int scale_up = REG_BR_PROB_BASE * REG_BR_PROB_BASE
1856 / prob_vector.to_reg_br_prob_base ();
1857 basic_block *bbs = get_loop_body (epilog);
1858 scale_bbs_frequencies_int (bbs, epilog->num_nodes, scale_up,
1859 REG_BR_PROB_BASE);
1860 free (bbs);
1863 basic_block bb_before_epilog = loop_preheader_edge (epilog)->src;
1864 tree niters_vector_mult_vf;
1865 /* If loop is peeled for non-zero constant times, now niters refers to
1866 orig_niters - prolog_peeling, it won't overflow even the orig_niters
1867 overflows. */
1868 niters_no_overflow |= (prolog_peeling > 0);
1869 vect_gen_vector_loop_niters (loop_vinfo, niters,
1870 niters_vector, niters_no_overflow);
1871 vect_gen_vector_loop_niters_mult_vf (loop_vinfo, *niters_vector,
1872 &niters_vector_mult_vf);
1873 /* Update IVs of original loop as if they were advanced by
1874 niters_vector_mult_vf steps. */
1875 gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
1876 edge update_e = skip_vector ? e : loop_preheader_edge (epilog);
1877 vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf,
1878 update_e);
1880 if (skip_epilog)
1882 guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
1883 niters, niters_vector_mult_vf);
1884 guard_bb = single_exit (loop)->dest;
1885 guard_to = split_edge (single_exit (epilog));
1886 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond, guard_to,
1887 skip_vector ? anchor : guard_bb,
1888 prob_epilog.invert (),
1889 irred_flag);
1890 slpeel_update_phi_nodes_for_guard2 (loop, epilog, guard_e,
1891 single_exit (epilog));
1892 /* Only need to handle basic block before epilog loop if it's not
1893 the guard_bb, which is the case when skip_vector is true. */
1894 if (guard_bb != bb_before_epilog)
1896 prob_epilog = prob_vector * prob_epilog + prob_vector.invert ();
1898 scale_bbs_frequencies (&bb_before_epilog, 1, prob_epilog);
1900 scale_loop_profile (epilog, prob_epilog, bound);
1902 else
1903 slpeel_update_phi_nodes_for_lcssa (epilog);
1905 bound = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) ? vf - 1 : vf - 2;
1906 /* We share epilog loop with scalar version loop. */
1907 bound = MAX (bound, bound_scalar - 1);
1908 record_niter_bound (epilog, bound, false, true);
1910 delete_update_ssa ();
1911 adjust_vec_debug_stmts ();
1912 scev_reset ();
1914 adjust_vec.release ();
1915 free_original_copy_tables ();
1917 return epilog;
1920 /* Function vect_create_cond_for_niters_checks.
1922 Create a conditional expression that represents the run-time checks for
1923 loop's niter. The loop is guaranteed to to terminate if the run-time
1924 checks hold.
1926 Input:
1927 COND_EXPR - input conditional expression. New conditions will be chained
1928 with logical AND operation. If it is NULL, then the function
1929 is used to return the number of alias checks.
1930 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
1931 to be checked.
1933 Output:
1934 COND_EXPR - conditional expression.
1936 The returned COND_EXPR is the conditional expression to be used in the
1937 if statement that controls which version of the loop gets executed at
1938 runtime. */
1940 static void
1941 vect_create_cond_for_niters_checks (loop_vec_info loop_vinfo, tree *cond_expr)
1943 tree part_cond_expr = LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo);
1945 if (*cond_expr)
1946 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1947 *cond_expr, part_cond_expr);
1948 else
1949 *cond_expr = part_cond_expr;
1952 /* Set *COND_EXPR to a tree that is true when both the original *COND_EXPR
1953 and PART_COND_EXPR are true. Treat a null *COND_EXPR as "true". */
1955 static void
1956 chain_cond_expr (tree *cond_expr, tree part_cond_expr)
1958 if (*cond_expr)
1959 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1960 *cond_expr, part_cond_expr);
1961 else
1962 *cond_expr = part_cond_expr;
1965 /* Function vect_create_cond_for_align_checks.
1967 Create a conditional expression that represents the alignment checks for
1968 all of data references (array element references) whose alignment must be
1969 checked at runtime.
1971 Input:
1972 COND_EXPR - input conditional expression. New conditions will be chained
1973 with logical AND operation.
1974 LOOP_VINFO - two fields of the loop information are used.
1975 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
1976 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
1978 Output:
1979 COND_EXPR_STMT_LIST - statements needed to construct the conditional
1980 expression.
1981 The returned value is the conditional expression to be used in the if
1982 statement that controls which version of the loop gets executed at runtime.
1984 The algorithm makes two assumptions:
1985 1) The number of bytes "n" in a vector is a power of 2.
1986 2) An address "a" is aligned if a%n is zero and that this
1987 test can be done as a&(n-1) == 0. For example, for 16
1988 byte vectors the test is a&0xf == 0. */
1990 static void
1991 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
1992 tree *cond_expr,
1993 gimple_seq *cond_expr_stmt_list)
1995 vec<gimple *> may_misalign_stmts
1996 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1997 gimple *ref_stmt;
1998 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
1999 tree mask_cst;
2000 unsigned int i;
2001 tree int_ptrsize_type;
2002 char tmp_name[20];
2003 tree or_tmp_name = NULL_TREE;
2004 tree and_tmp_name;
2005 gimple *and_stmt;
2006 tree ptrsize_zero;
2007 tree part_cond_expr;
2009 /* Check that mask is one less than a power of 2, i.e., mask is
2010 all zeros followed by all ones. */
2011 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2013 int_ptrsize_type = signed_type_for (ptr_type_node);
2015 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2016 of the first vector of the i'th data reference. */
2018 FOR_EACH_VEC_ELT (may_misalign_stmts, i, ref_stmt)
2020 gimple_seq new_stmt_list = NULL;
2021 tree addr_base;
2022 tree addr_tmp_name;
2023 tree new_or_tmp_name;
2024 gimple *addr_stmt, *or_stmt;
2025 stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
2026 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2027 bool negative = tree_int_cst_compare
2028 (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
2029 tree offset = negative
2030 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
2032 /* create: addr_tmp = (int)(address_of_first_vector) */
2033 addr_base =
2034 vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
2035 offset);
2036 if (new_stmt_list != NULL)
2037 gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
2039 sprintf (tmp_name, "addr2int%d", i);
2040 addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2041 addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base);
2042 gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
2044 /* The addresses are OR together. */
2046 if (or_tmp_name != NULL_TREE)
2048 /* create: or_tmp = or_tmp | addr_tmp */
2049 sprintf (tmp_name, "orptrs%d", i);
2050 new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2051 or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
2052 or_tmp_name, addr_tmp_name);
2053 gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
2054 or_tmp_name = new_or_tmp_name;
2056 else
2057 or_tmp_name = addr_tmp_name;
2059 } /* end for i */
2061 mask_cst = build_int_cst (int_ptrsize_type, mask);
2063 /* create: and_tmp = or_tmp & mask */
2064 and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
2066 and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR,
2067 or_tmp_name, mask_cst);
2068 gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
2070 /* Make and_tmp the left operand of the conditional test against zero.
2071 if and_tmp has a nonzero bit then some address is unaligned. */
2072 ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2073 part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
2074 and_tmp_name, ptrsize_zero);
2075 chain_cond_expr (cond_expr, part_cond_expr);
2078 /* If LOOP_VINFO_CHECK_UNEQUAL_ADDRS contains <A1, B1>, ..., <An, Bn>,
2079 create a tree representation of: (&A1 != &B1) && ... && (&An != &Bn).
2080 Set *COND_EXPR to a tree that is true when both the original *COND_EXPR
2081 and this new condition are true. Treat a null *COND_EXPR as "true". */
2083 static void
2084 vect_create_cond_for_unequal_addrs (loop_vec_info loop_vinfo, tree *cond_expr)
2086 vec<vec_object_pair> pairs = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo);
2087 unsigned int i;
2088 vec_object_pair *pair;
2089 FOR_EACH_VEC_ELT (pairs, i, pair)
2091 tree addr1 = build_fold_addr_expr (pair->first);
2092 tree addr2 = build_fold_addr_expr (pair->second);
2093 tree part_cond_expr = fold_build2 (NE_EXPR, boolean_type_node,
2094 addr1, addr2);
2095 chain_cond_expr (cond_expr, part_cond_expr);
2099 /* Function vect_create_cond_for_alias_checks.
2101 Create a conditional expression that represents the run-time checks for
2102 overlapping of address ranges represented by a list of data references
2103 relations passed as input.
2105 Input:
2106 COND_EXPR - input conditional expression. New conditions will be chained
2107 with logical AND operation. If it is NULL, then the function
2108 is used to return the number of alias checks.
2109 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2110 to be checked.
2112 Output:
2113 COND_EXPR - conditional expression.
2115 The returned COND_EXPR is the conditional expression to be used in the if
2116 statement that controls which version of the loop gets executed at runtime.
2119 void
2120 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
2122 vec<dr_with_seg_len_pair_t> comp_alias_ddrs =
2123 LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
2125 if (comp_alias_ddrs.is_empty ())
2126 return;
2128 create_runtime_alias_checks (LOOP_VINFO_LOOP (loop_vinfo),
2129 &comp_alias_ddrs, cond_expr);
2130 if (dump_enabled_p ())
2131 dump_printf_loc (MSG_NOTE, vect_location,
2132 "created %u versioning for alias checks.\n",
2133 comp_alias_ddrs.length ());
2137 /* Function vect_loop_versioning.
2139 If the loop has data references that may or may not be aligned or/and
2140 has data reference relations whose independence was not proven then
2141 two versions of the loop need to be generated, one which is vectorized
2142 and one which isn't. A test is then generated to control which of the
2143 loops is executed. The test checks for the alignment of all of the
2144 data references that may or may not be aligned. An additional
2145 sequence of runtime tests is generated for each pairs of DDRs whose
2146 independence was not proven. The vectorized version of loop is
2147 executed only if both alias and alignment tests are passed.
2149 The test generated to check which version of loop is executed
2150 is modified to also check for profitability as indicated by the
2151 cost model threshold TH.
2153 The versioning precondition(s) are placed in *COND_EXPR and
2154 *COND_EXPR_STMT_LIST. */
2156 void
2157 vect_loop_versioning (loop_vec_info loop_vinfo,
2158 unsigned int th, bool check_profitability)
2160 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *nloop;
2161 struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
2162 basic_block condition_bb;
2163 gphi_iterator gsi;
2164 gimple_stmt_iterator cond_exp_gsi;
2165 basic_block merge_bb;
2166 basic_block new_exit_bb;
2167 edge new_exit_e, e;
2168 gphi *orig_phi, *new_phi;
2169 tree cond_expr = NULL_TREE;
2170 gimple_seq cond_expr_stmt_list = NULL;
2171 tree arg;
2172 profile_probability prob = profile_probability::likely ();
2173 gimple_seq gimplify_stmt_list = NULL;
2174 tree scalar_loop_iters = LOOP_VINFO_NITERSM1 (loop_vinfo);
2175 bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
2176 bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
2177 bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo);
2179 if (check_profitability)
2180 cond_expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
2181 build_int_cst (TREE_TYPE (scalar_loop_iters),
2182 th - 1));
2184 if (version_niter)
2185 vect_create_cond_for_niters_checks (loop_vinfo, &cond_expr);
2187 if (cond_expr)
2188 cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
2189 is_gimple_condexpr, NULL_TREE);
2191 if (version_align)
2192 vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
2193 &cond_expr_stmt_list);
2195 if (version_alias)
2197 vect_create_cond_for_unequal_addrs (loop_vinfo, &cond_expr);
2198 vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
2201 cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,
2202 is_gimple_condexpr, NULL_TREE);
2203 gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
2205 initialize_original_copy_tables ();
2206 if (scalar_loop)
2208 edge scalar_e;
2209 basic_block preheader, scalar_preheader;
2211 /* We don't want to scale SCALAR_LOOP's frequencies, we need to
2212 scale LOOP's frequencies instead. */
2213 nloop = loop_version (scalar_loop, cond_expr, &condition_bb,
2214 prob, prob.invert (), prob, prob.invert (), true);
2215 scale_loop_frequencies (loop, prob);
2216 /* CONDITION_BB was created above SCALAR_LOOP's preheader,
2217 while we need to move it above LOOP's preheader. */
2218 e = loop_preheader_edge (loop);
2219 scalar_e = loop_preheader_edge (scalar_loop);
2220 gcc_assert (empty_block_p (e->src)
2221 && single_pred_p (e->src));
2222 gcc_assert (empty_block_p (scalar_e->src)
2223 && single_pred_p (scalar_e->src));
2224 gcc_assert (single_pred_p (condition_bb));
2225 preheader = e->src;
2226 scalar_preheader = scalar_e->src;
2227 scalar_e = find_edge (condition_bb, scalar_preheader);
2228 e = single_pred_edge (preheader);
2229 redirect_edge_and_branch_force (single_pred_edge (condition_bb),
2230 scalar_preheader);
2231 redirect_edge_and_branch_force (scalar_e, preheader);
2232 redirect_edge_and_branch_force (e, condition_bb);
2233 set_immediate_dominator (CDI_DOMINATORS, condition_bb,
2234 single_pred (condition_bb));
2235 set_immediate_dominator (CDI_DOMINATORS, scalar_preheader,
2236 single_pred (scalar_preheader));
2237 set_immediate_dominator (CDI_DOMINATORS, preheader,
2238 condition_bb);
2240 else
2241 nloop = loop_version (loop, cond_expr, &condition_bb,
2242 prob, prob.invert (), prob, prob.invert (), true);
2244 if (version_niter)
2246 /* The versioned loop could be infinite, we need to clear existing
2247 niter information which is copied from the original loop. */
2248 gcc_assert (loop_constraint_set_p (loop, LOOP_C_FINITE));
2249 vect_free_loop_info_assumptions (nloop);
2250 /* And set constraint LOOP_C_INFINITE for niter analyzer. */
2251 loop_constraint_set (loop, LOOP_C_INFINITE);
2254 if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
2255 && dump_enabled_p ())
2257 if (version_alias)
2258 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2259 "loop versioned for vectorization because of "
2260 "possible aliasing\n");
2261 if (version_align)
2262 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2263 "loop versioned for vectorization to enhance "
2264 "alignment\n");
2267 free_original_copy_tables ();
2269 /* Loop versioning violates an assumption we try to maintain during
2270 vectorization - that the loop exit block has a single predecessor.
2271 After versioning, the exit block of both loop versions is the same
2272 basic block (i.e. it has two predecessors). Just in order to simplify
2273 following transformations in the vectorizer, we fix this situation
2274 here by adding a new (empty) block on the exit-edge of the loop,
2275 with the proper loop-exit phis to maintain loop-closed-form.
2276 If loop versioning wasn't done from loop, but scalar_loop instead,
2277 merge_bb will have already just a single successor. */
2279 merge_bb = single_exit (loop)->dest;
2280 if (scalar_loop == NULL || EDGE_COUNT (merge_bb->preds) >= 2)
2282 gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
2283 new_exit_bb = split_edge (single_exit (loop));
2284 new_exit_e = single_exit (loop);
2285 e = EDGE_SUCC (new_exit_bb, 0);
2287 for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2289 tree new_res;
2290 orig_phi = gsi.phi ();
2291 new_res = copy_ssa_name (PHI_RESULT (orig_phi));
2292 new_phi = create_phi_node (new_res, new_exit_bb);
2293 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2294 add_phi_arg (new_phi, arg, new_exit_e,
2295 gimple_phi_arg_location_from_edge (orig_phi, e));
2296 adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
2300 /* End loop-exit-fixes after versioning. */
2302 if (cond_expr_stmt_list)
2304 cond_exp_gsi = gsi_last_bb (condition_bb);
2305 gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
2306 GSI_SAME_STMT);
2308 update_ssa (TODO_update_ssa);