2016-08-24 Michael Collison <michael.collison@linaro.org>
[official-gcc.git] / gcc / tree-vect-loop-manip.c
blob01d6bb1e1bc94cab47fe305f055a6cc287a8ab98
1 /* Vectorizer Specific Loop Manipulations
2 Copyright (C) 2003-2016 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "cfghooks.h"
29 #include "tree-pass.h"
30 #include "ssa.h"
31 #include "fold-const.h"
32 #include "cfganal.h"
33 #include "gimplify.h"
34 #include "gimple-iterator.h"
35 #include "gimplify-me.h"
36 #include "tree-cfg.h"
37 #include "tree-ssa-loop-manip.h"
38 #include "tree-into-ssa.h"
39 #include "tree-ssa.h"
40 #include "cfgloop.h"
41 #include "tree-scalar-evolution.h"
42 #include "tree-vectorizer.h"
43 #include "tree-ssa-loop-ivopts.h"
45 /*************************************************************************
46 Simple Loop Peeling Utilities
48 Utilities to support loop peeling for vectorization purposes.
49 *************************************************************************/
52 /* Renames the use *OP_P. */
54 static void
55 rename_use_op (use_operand_p op_p)
57 tree new_name;
59 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
60 return;
62 new_name = get_current_def (USE_FROM_PTR (op_p));
64 /* Something defined outside of the loop. */
65 if (!new_name)
66 return;
68 /* An ordinary ssa name defined in the loop. */
70 SET_USE (op_p, new_name);
74 /* Renames the variables in basic block BB. Allow renaming of PHI argumnets
75 on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
76 true. */
78 static void
79 rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop)
81 gimple *stmt;
82 use_operand_p use_p;
83 ssa_op_iter iter;
84 edge e;
85 edge_iterator ei;
86 struct loop *loop = bb->loop_father;
87 struct loop *outer_loop = NULL;
89 if (rename_from_outer_loop)
91 gcc_assert (loop);
92 outer_loop = loop_outer (loop);
95 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
96 gsi_next (&gsi))
98 stmt = gsi_stmt (gsi);
99 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
100 rename_use_op (use_p);
103 FOR_EACH_EDGE (e, ei, bb->preds)
105 if (!flow_bb_inside_loop_p (loop, e->src)
106 && (!rename_from_outer_loop || e->src != outer_loop->header))
107 continue;
108 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
109 gsi_next (&gsi))
110 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
115 struct adjust_info
117 tree from, to;
118 basic_block bb;
121 /* A stack of values to be adjusted in debug stmts. We have to
122 process them LIFO, so that the closest substitution applies. If we
123 processed them FIFO, without the stack, we might substitute uses
124 with a PHI DEF that would soon become non-dominant, and when we got
125 to the suitable one, it wouldn't have anything to substitute any
126 more. */
127 static vec<adjust_info, va_heap> adjust_vec;
129 /* Adjust any debug stmts that referenced AI->from values to use the
130 loop-closed AI->to, if the references are dominated by AI->bb and
131 not by the definition of AI->from. */
133 static void
134 adjust_debug_stmts_now (adjust_info *ai)
136 basic_block bbphi = ai->bb;
137 tree orig_def = ai->from;
138 tree new_def = ai->to;
139 imm_use_iterator imm_iter;
140 gimple *stmt;
141 basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
143 gcc_assert (dom_info_available_p (CDI_DOMINATORS));
145 /* Adjust any debug stmts that held onto non-loop-closed
146 references. */
147 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
149 use_operand_p use_p;
150 basic_block bbuse;
152 if (!is_gimple_debug (stmt))
153 continue;
155 gcc_assert (gimple_debug_bind_p (stmt));
157 bbuse = gimple_bb (stmt);
159 if ((bbuse == bbphi
160 || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
161 && !(bbuse == bbdef
162 || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
164 if (new_def)
165 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
166 SET_USE (use_p, new_def);
167 else
169 gimple_debug_bind_reset_value (stmt);
170 update_stmt (stmt);
176 /* Adjust debug stmts as scheduled before. */
178 static void
179 adjust_vec_debug_stmts (void)
181 if (!MAY_HAVE_DEBUG_STMTS)
182 return;
184 gcc_assert (adjust_vec.exists ());
186 while (!adjust_vec.is_empty ())
188 adjust_debug_stmts_now (&adjust_vec.last ());
189 adjust_vec.pop ();
192 adjust_vec.release ();
195 /* Adjust any debug stmts that referenced FROM values to use the
196 loop-closed TO, if the references are dominated by BB and not by
197 the definition of FROM. If adjust_vec is non-NULL, adjustments
198 will be postponed until adjust_vec_debug_stmts is called. */
200 static void
201 adjust_debug_stmts (tree from, tree to, basic_block bb)
203 adjust_info ai;
205 if (MAY_HAVE_DEBUG_STMTS
206 && TREE_CODE (from) == SSA_NAME
207 && ! SSA_NAME_IS_DEFAULT_DEF (from)
208 && ! virtual_operand_p (from))
210 ai.from = from;
211 ai.to = to;
212 ai.bb = bb;
214 if (adjust_vec.exists ())
215 adjust_vec.safe_push (ai);
216 else
217 adjust_debug_stmts_now (&ai);
221 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
222 to adjust any debug stmts that referenced the old phi arg,
223 presumably non-loop-closed references left over from other
224 transformations. */
226 static void
227 adjust_phi_and_debug_stmts (gimple *update_phi, edge e, tree new_def)
229 tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
231 SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
233 if (MAY_HAVE_DEBUG_STMTS)
234 adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
235 gimple_bb (update_phi));
239 /* Update PHI nodes for a guard of the LOOP.
241 Input:
242 - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
243 controls whether LOOP is to be executed. GUARD_EDGE is the edge that
244 originates from the guard-bb, skips LOOP and reaches the (unique) exit
245 bb of LOOP. This loop-exit-bb is an empty bb with one successor.
246 We denote this bb NEW_MERGE_BB because before the guard code was added
247 it had a single predecessor (the LOOP header), and now it became a merge
248 point of two paths - the path that ends with the LOOP exit-edge, and
249 the path that ends with GUARD_EDGE.
250 - NEW_EXIT_BB: New basic block that is added by this function between LOOP
251 and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
253 ===> The CFG before the guard-code was added:
254 LOOP_header_bb:
255 loop_body
256 if (exit_loop) goto update_bb
257 else goto LOOP_header_bb
258 update_bb:
260 ==> The CFG after the guard-code was added:
261 guard_bb:
262 if (LOOP_guard_condition) goto new_merge_bb
263 else goto LOOP_header_bb
264 LOOP_header_bb:
265 loop_body
266 if (exit_loop_condition) goto new_merge_bb
267 else goto LOOP_header_bb
268 new_merge_bb:
269 goto update_bb
270 update_bb:
272 ==> The CFG after this function:
273 guard_bb:
274 if (LOOP_guard_condition) goto new_merge_bb
275 else goto LOOP_header_bb
276 LOOP_header_bb:
277 loop_body
278 if (exit_loop_condition) goto new_exit_bb
279 else goto LOOP_header_bb
280 new_exit_bb:
281 new_merge_bb:
282 goto update_bb
283 update_bb:
285 This function:
286 1. creates and updates the relevant phi nodes to account for the new
287 incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
288 1.1. Create phi nodes at NEW_MERGE_BB.
289 1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
290 UPDATE_BB). UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
291 2. preserves loop-closed-ssa-form by creating the required phi nodes
292 at the exit of LOOP (i.e, in NEW_EXIT_BB).
294 There are two flavors to this function:
296 slpeel_update_phi_nodes_for_guard1:
297 Here the guard controls whether we enter or skip LOOP, where LOOP is a
298 prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
299 for variables that have phis in the loop header.
301 slpeel_update_phi_nodes_for_guard2:
302 Here the guard controls whether we enter or skip LOOP, where LOOP is an
303 epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
304 for variables that have phis in the loop exit.
306 I.E., the overall structure is:
308 loop1_preheader_bb:
309 guard1 (goto loop1/merge1_bb)
310 loop1
311 loop1_exit_bb:
312 guard2 (goto merge1_bb/merge2_bb)
313 merge1_bb
314 loop2
315 loop2_exit_bb
316 merge2_bb
317 next_bb
319 slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
320 loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
321 that have phis in loop1->header).
323 slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
324 loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
325 that have phis in next_bb). It also adds some of these phis to
326 loop1_exit_bb.
328 slpeel_update_phi_nodes_for_guard1 is always called before
329 slpeel_update_phi_nodes_for_guard2. They are both needed in order
330 to create correct data-flow and loop-closed-ssa-form.
332 Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
333 that change between iterations of a loop (and therefore have a phi-node
334 at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
335 phis for variables that are used out of the loop (and therefore have
336 loop-closed exit phis). Some variables may be both updated between
337 iterations and used after the loop. This is why in loop1_exit_bb we
338 may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
339 and exit phis (created by slpeel_update_phi_nodes_for_guard2).
341 - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
342 an original loop. i.e., we have:
344 orig_loop
345 guard_bb (goto LOOP/new_merge)
346 new_loop <-- LOOP
347 new_exit
348 new_merge
349 next_bb
351 If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
352 have:
354 new_loop
355 guard_bb (goto LOOP/new_merge)
356 orig_loop <-- LOOP
357 new_exit
358 new_merge
359 next_bb
361 The SSA names defined in the original loop have a current
362 reaching definition that records the corresponding new ssa-name
363 used in the new duplicated loop copy.
366 /* Function slpeel_update_phi_nodes_for_guard1
368 Input:
369 - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
370 - DEFS - a bitmap of ssa names to mark new names for which we recorded
371 information.
373 In the context of the overall structure, we have:
375 loop1_preheader_bb:
376 guard1 (goto loop1/merge1_bb)
377 LOOP-> loop1
378 loop1_exit_bb:
379 guard2 (goto merge1_bb/merge2_bb)
380 merge1_bb
381 loop2
382 loop2_exit_bb
383 merge2_bb
384 next_bb
386 For each name updated between loop iterations (i.e - for each name that has
387 an entry (loop-header) phi in LOOP) we create a new phi in:
388 1. merge1_bb (to account for the edge from guard1)
389 2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
392 static void
393 slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
394 bool is_new_loop, basic_block *new_exit_bb)
396 gphi *orig_phi, *new_phi;
397 gphi *update_phi, *update_phi2;
398 tree guard_arg, loop_arg;
399 basic_block new_merge_bb = guard_edge->dest;
400 edge e = EDGE_SUCC (new_merge_bb, 0);
401 basic_block update_bb = e->dest;
402 basic_block orig_bb = loop->header;
403 edge new_exit_e;
404 tree current_new_name;
405 gphi_iterator gsi_orig, gsi_update;
407 /* Create new bb between loop and new_merge_bb. */
408 *new_exit_bb = split_edge (single_exit (loop));
410 new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
412 for (gsi_orig = gsi_start_phis (orig_bb),
413 gsi_update = gsi_start_phis (update_bb);
414 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
415 gsi_next (&gsi_orig), gsi_next (&gsi_update))
417 source_location loop_locus, guard_locus;
418 tree new_res;
419 orig_phi = gsi_orig.phi ();
420 update_phi = gsi_update.phi ();
422 /** 1. Handle new-merge-point phis **/
424 /* 1.1. Generate new phi node in NEW_MERGE_BB: */
425 new_res = copy_ssa_name (PHI_RESULT (orig_phi));
426 new_phi = create_phi_node (new_res, new_merge_bb);
428 /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
429 of LOOP. Set the two phi args in NEW_PHI for these edges: */
430 loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
431 loop_locus = gimple_phi_arg_location_from_edge (orig_phi,
432 EDGE_SUCC (loop->latch,
433 0));
434 guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
435 guard_locus
436 = gimple_phi_arg_location_from_edge (orig_phi,
437 loop_preheader_edge (loop));
439 add_phi_arg (new_phi, loop_arg, new_exit_e, loop_locus);
440 add_phi_arg (new_phi, guard_arg, guard_edge, guard_locus);
442 /* 1.3. Update phi in successor block. */
443 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
444 || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
445 adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
446 update_phi2 = new_phi;
449 /** 2. Handle loop-closed-ssa-form phis **/
451 if (virtual_operand_p (PHI_RESULT (orig_phi)))
452 continue;
454 /* 2.1. Generate new phi node in NEW_EXIT_BB: */
455 new_res = copy_ssa_name (PHI_RESULT (orig_phi));
456 new_phi = create_phi_node (new_res, *new_exit_bb);
458 /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */
459 add_phi_arg (new_phi, loop_arg, single_exit (loop), loop_locus);
461 /* 2.3. Update phi in successor of NEW_EXIT_BB: */
462 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
463 adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
464 PHI_RESULT (new_phi));
466 /* 2.4. Record the newly created name with set_current_def.
467 We want to find a name such that
468 name = get_current_def (orig_loop_name)
469 and to set its current definition as follows:
470 set_current_def (name, new_phi_name)
472 If LOOP is a new loop then loop_arg is already the name we're
473 looking for. If LOOP is the original loop, then loop_arg is
474 the orig_loop_name and the relevant name is recorded in its
475 current reaching definition. */
476 if (is_new_loop)
477 current_new_name = loop_arg;
478 else
480 current_new_name = get_current_def (loop_arg);
481 /* current_def is not available only if the variable does not
482 change inside the loop, in which case we also don't care
483 about recording a current_def for it because we won't be
484 trying to create loop-exit-phis for it. */
485 if (!current_new_name)
486 continue;
488 tree new_name = get_current_def (current_new_name);
489 /* Because of peeled_chrec optimization it is possible that we have
490 set this earlier. Verify the PHI has the same value. */
491 if (new_name)
493 gimple *phi = SSA_NAME_DEF_STMT (new_name);
494 gcc_assert (gimple_code (phi) == GIMPLE_PHI
495 && gimple_bb (phi) == *new_exit_bb
496 && (PHI_ARG_DEF_FROM_EDGE (phi, single_exit (loop))
497 == loop_arg));
498 continue;
501 set_current_def (current_new_name, PHI_RESULT (new_phi));
506 /* Function slpeel_update_phi_nodes_for_guard2
508 Input:
509 - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
511 In the context of the overall structure, we have:
513 loop1_preheader_bb:
514 guard1 (goto loop1/merge1_bb)
515 loop1
516 loop1_exit_bb:
517 guard2 (goto merge1_bb/merge2_bb)
518 merge1_bb
519 LOOP-> loop2
520 loop2_exit_bb
521 merge2_bb
522 next_bb
524 For each name used out side the loop (i.e - for each name that has an exit
525 phi in next_bb) we create a new phi in:
526 1. merge2_bb (to account for the edge from guard_bb)
527 2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
528 3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
529 if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
532 static void
533 slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
534 bool is_new_loop, basic_block *new_exit_bb)
536 gphi *orig_phi, *new_phi;
537 gphi *update_phi, *update_phi2;
538 tree guard_arg, loop_arg;
539 basic_block new_merge_bb = guard_edge->dest;
540 edge e = EDGE_SUCC (new_merge_bb, 0);
541 basic_block update_bb = e->dest;
542 edge new_exit_e;
543 tree orig_def, orig_def_new_name;
544 tree new_name, new_name2;
545 tree arg;
546 gphi_iterator gsi;
548 /* Create new bb between loop and new_merge_bb. */
549 *new_exit_bb = split_edge (single_exit (loop));
551 new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
553 for (gsi = gsi_start_phis (update_bb); !gsi_end_p (gsi); gsi_next (&gsi))
555 tree new_res;
556 update_phi = gsi.phi ();
557 orig_phi = update_phi;
558 orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
559 /* This loop-closed-phi actually doesn't represent a use
560 out of the loop - the phi arg is a constant. */
561 if (TREE_CODE (orig_def) != SSA_NAME)
562 continue;
563 orig_def_new_name = get_current_def (orig_def);
564 arg = NULL_TREE;
566 /** 1. Handle new-merge-point phis **/
568 /* 1.1. Generate new phi node in NEW_MERGE_BB: */
569 new_res = copy_ssa_name (PHI_RESULT (orig_phi));
570 new_phi = create_phi_node (new_res, new_merge_bb);
572 /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
573 of LOOP. Set the two PHI args in NEW_PHI for these edges: */
574 new_name = orig_def;
575 new_name2 = NULL_TREE;
576 if (orig_def_new_name)
578 new_name = orig_def_new_name;
579 /* Some variables have both loop-entry-phis and loop-exit-phis.
580 Such variables were given yet newer names by phis placed in
581 guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
582 new_name2 = get_current_def (get_current_def (orig_name)). */
583 new_name2 = get_current_def (new_name);
586 if (is_new_loop)
588 guard_arg = orig_def;
589 loop_arg = new_name;
591 else
593 guard_arg = new_name;
594 loop_arg = orig_def;
596 if (new_name2)
597 guard_arg = new_name2;
599 add_phi_arg (new_phi, loop_arg, new_exit_e, UNKNOWN_LOCATION);
600 add_phi_arg (new_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
602 /* 1.3. Update phi in successor block. */
603 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
604 adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
605 update_phi2 = new_phi;
608 /** 2. Handle loop-closed-ssa-form phis **/
610 if (virtual_operand_p (PHI_RESULT (orig_phi)))
611 continue;
613 /* 2.1. Generate new phi node in NEW_EXIT_BB: */
614 new_res = copy_ssa_name (PHI_RESULT (orig_phi));
615 new_phi = create_phi_node (new_res, *new_exit_bb);
617 /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */
618 add_phi_arg (new_phi, loop_arg, single_exit (loop), UNKNOWN_LOCATION);
620 /* 2.3. Update phi in successor of NEW_EXIT_BB: */
621 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
622 adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
623 PHI_RESULT (new_phi));
626 /** 3. Handle loop-closed-ssa-form phis for first loop **/
628 /* 3.1. Find the relevant names that need an exit-phi in
629 GUARD_BB, i.e. names for which
630 slpeel_update_phi_nodes_for_guard1 had not already created a
631 phi node. This is the case for names that are used outside
632 the loop (and therefore need an exit phi) but are not updated
633 across loop iterations (and therefore don't have a
634 loop-header-phi).
636 slpeel_update_phi_nodes_for_guard1 is responsible for
637 creating loop-exit phis in GUARD_BB for names that have a
638 loop-header-phi. When such a phi is created we also record
639 the new name in its current definition. If this new name
640 exists, then guard_arg was set to this new name (see 1.2
641 above). Therefore, if guard_arg is not this new name, this
642 is an indication that an exit-phi in GUARD_BB was not yet
643 created, so we take care of it here. */
644 if (guard_arg == new_name2)
645 continue;
646 arg = guard_arg;
648 /* 3.2. Generate new phi node in GUARD_BB: */
649 new_res = copy_ssa_name (PHI_RESULT (orig_phi));
650 new_phi = create_phi_node (new_res, guard_edge->src);
652 /* 3.3. GUARD_BB has one incoming edge: */
653 gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
654 add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0),
655 UNKNOWN_LOCATION);
657 /* 3.4. Update phi in successor of GUARD_BB: */
658 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
659 == guard_arg);
660 adjust_phi_and_debug_stmts (update_phi2, guard_edge,
661 PHI_RESULT (new_phi));
666 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
667 that starts at zero, increases by one and its limit is NITERS.
669 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
671 void
672 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
674 tree indx_before_incr, indx_after_incr;
675 gcond *cond_stmt;
676 gcond *orig_cond;
677 edge exit_edge = single_exit (loop);
678 gimple_stmt_iterator loop_cond_gsi;
679 gimple_stmt_iterator incr_gsi;
680 bool insert_after;
681 tree init = build_int_cst (TREE_TYPE (niters), 0);
682 tree step = build_int_cst (TREE_TYPE (niters), 1);
683 source_location loop_loc;
684 enum tree_code code;
686 orig_cond = get_loop_exit_condition (loop);
687 gcc_assert (orig_cond);
688 loop_cond_gsi = gsi_for_stmt (orig_cond);
690 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
691 create_iv (init, step, NULL_TREE, loop,
692 &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
694 indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
695 true, NULL_TREE, true,
696 GSI_SAME_STMT);
697 niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
698 true, GSI_SAME_STMT);
700 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
701 cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
702 NULL_TREE);
704 gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
706 /* Remove old loop exit test: */
707 gsi_remove (&loop_cond_gsi, true);
708 free_stmt_vec_info (orig_cond);
710 loop_loc = find_loop_location (loop);
711 if (dump_enabled_p ())
713 if (LOCATION_LOCUS (loop_loc) != UNKNOWN_LOCATION)
714 dump_printf (MSG_NOTE, "\nloop at %s:%d: ", LOCATION_FILE (loop_loc),
715 LOCATION_LINE (loop_loc));
716 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, cond_stmt, 0);
718 loop->nb_iterations = niters;
721 /* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg.
722 For all PHI arguments in FROM->dest and TO->dest from those
723 edges ensure that TO->dest PHI arguments have current_def
724 to that in from. */
726 static void
727 slpeel_duplicate_current_defs_from_edges (edge from, edge to)
729 gimple_stmt_iterator gsi_from, gsi_to;
731 for (gsi_from = gsi_start_phis (from->dest),
732 gsi_to = gsi_start_phis (to->dest);
733 !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);)
735 gimple *from_phi = gsi_stmt (gsi_from);
736 gimple *to_phi = gsi_stmt (gsi_to);
737 tree from_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, from);
738 tree to_arg = PHI_ARG_DEF_FROM_EDGE (to_phi, to);
739 if (virtual_operand_p (from_arg))
741 gsi_next (&gsi_from);
742 continue;
744 if (virtual_operand_p (to_arg))
746 gsi_next (&gsi_to);
747 continue;
749 if (TREE_CODE (from_arg) != SSA_NAME)
750 gcc_assert (operand_equal_p (from_arg, to_arg, 0));
751 else
753 if (get_current_def (to_arg) == NULL_TREE)
754 set_current_def (to_arg, get_current_def (from_arg));
756 gsi_next (&gsi_from);
757 gsi_next (&gsi_to);
760 gphi *from_phi = get_virtual_phi (from->dest);
761 gphi *to_phi = get_virtual_phi (to->dest);
762 if (from_phi)
763 set_current_def (PHI_ARG_DEF_FROM_EDGE (to_phi, to),
764 get_current_def (PHI_ARG_DEF_FROM_EDGE (from_phi, from)));
768 /* Given LOOP this function generates a new copy of it and puts it
769 on E which is either the entry or exit of LOOP. If SCALAR_LOOP is
770 non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
771 basic blocks from SCALAR_LOOP instead of LOOP, but to either the
772 entry or exit of LOOP. */
774 struct loop *
775 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop,
776 struct loop *scalar_loop, edge e)
778 struct loop *new_loop;
779 basic_block *new_bbs, *bbs;
780 bool at_exit;
781 bool was_imm_dom;
782 basic_block exit_dest;
783 edge exit, new_exit;
784 bool duplicate_outer_loop = false;
786 exit = single_exit (loop);
787 at_exit = (e == exit);
788 if (!at_exit && e != loop_preheader_edge (loop))
789 return NULL;
791 if (scalar_loop == NULL)
792 scalar_loop = loop;
794 bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
795 get_loop_body_with_size (scalar_loop, bbs, scalar_loop->num_nodes);
796 /* Allow duplication of outer loops. */
797 if (scalar_loop->inner)
798 duplicate_outer_loop = true;
799 /* Check whether duplication is possible. */
800 if (!can_copy_bbs_p (bbs, scalar_loop->num_nodes))
802 free (bbs);
803 return NULL;
806 /* Generate new loop structure. */
807 new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
808 duplicate_subloops (scalar_loop, new_loop);
810 exit_dest = exit->dest;
811 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
812 exit_dest) == loop->header ?
813 true : false);
815 /* Also copy the pre-header, this avoids jumping through hoops to
816 duplicate the loop entry PHI arguments. Create an empty
817 pre-header unconditionally for this. */
818 basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
819 edge entry_e = single_pred_edge (preheader);
820 bbs[scalar_loop->num_nodes] = preheader;
821 new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
823 exit = single_exit (scalar_loop);
824 copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
825 &exit, 1, &new_exit, NULL,
826 e->src, true);
827 exit = single_exit (loop);
828 basic_block new_preheader = new_bbs[scalar_loop->num_nodes];
830 add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
832 if (scalar_loop != loop)
834 /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
835 SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop,
836 but LOOP will not. slpeel_update_phi_nodes_for_guard{1,2} expects
837 the LOOP SSA_NAMEs (on the exit edge and edge from latch to
838 header) to have current_def set, so copy them over. */
839 slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop),
840 exit);
841 slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop->latch,
843 EDGE_SUCC (loop->latch, 0));
846 if (at_exit) /* Add the loop copy at exit. */
848 if (scalar_loop != loop)
850 gphi_iterator gsi;
851 new_exit = redirect_edge_and_branch (new_exit, exit_dest);
853 for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi);
854 gsi_next (&gsi))
856 gphi *phi = gsi.phi ();
857 tree orig_arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
858 location_t orig_locus
859 = gimple_phi_arg_location_from_edge (phi, e);
861 add_phi_arg (phi, orig_arg, new_exit, orig_locus);
864 redirect_edge_and_branch_force (e, new_preheader);
865 flush_pending_stmts (e);
866 set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
867 if (was_imm_dom || duplicate_outer_loop)
868 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
870 /* And remove the non-necessary forwarder again. Keep the other
871 one so we have a proper pre-header for the loop at the exit edge. */
872 redirect_edge_pred (single_succ_edge (preheader),
873 single_pred (preheader));
874 delete_basic_block (preheader);
875 set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
876 loop_preheader_edge (scalar_loop)->src);
878 else /* Add the copy at entry. */
880 if (scalar_loop != loop)
882 /* Remove the non-necessary forwarder of scalar_loop again. */
883 redirect_edge_pred (single_succ_edge (preheader),
884 single_pred (preheader));
885 delete_basic_block (preheader);
886 set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
887 loop_preheader_edge (scalar_loop)->src);
888 preheader = split_edge (loop_preheader_edge (loop));
889 entry_e = single_pred_edge (preheader);
892 redirect_edge_and_branch_force (entry_e, new_preheader);
893 flush_pending_stmts (entry_e);
894 set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
896 redirect_edge_and_branch_force (new_exit, preheader);
897 flush_pending_stmts (new_exit);
898 set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
900 /* And remove the non-necessary forwarder again. Keep the other
901 one so we have a proper pre-header for the loop at the exit edge. */
902 redirect_edge_pred (single_succ_edge (new_preheader),
903 single_pred (new_preheader));
904 delete_basic_block (new_preheader);
905 set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
906 loop_preheader_edge (new_loop)->src);
909 for (unsigned i = 0; i < scalar_loop->num_nodes + 1; i++)
910 rename_variables_in_bb (new_bbs[i], duplicate_outer_loop);
912 if (scalar_loop != loop)
914 /* Update new_loop->header PHIs, so that on the preheader
915 edge they are the ones from loop rather than scalar_loop. */
916 gphi_iterator gsi_orig, gsi_new;
917 edge orig_e = loop_preheader_edge (loop);
918 edge new_e = loop_preheader_edge (new_loop);
920 for (gsi_orig = gsi_start_phis (loop->header),
921 gsi_new = gsi_start_phis (new_loop->header);
922 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_new);
923 gsi_next (&gsi_orig), gsi_next (&gsi_new))
925 gphi *orig_phi = gsi_orig.phi ();
926 gphi *new_phi = gsi_new.phi ();
927 tree orig_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
928 location_t orig_locus
929 = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
931 add_phi_arg (new_phi, orig_arg, new_e, orig_locus);
935 free (new_bbs);
936 free (bbs);
938 checking_verify_dominators (CDI_DOMINATORS);
940 return new_loop;
944 /* Given the condition statement COND, put it as the last statement
945 of GUARD_BB; EXIT_BB is the basic block to skip the loop;
946 Assumes that this is the single exit of the guarded loop.
947 Returns the skip edge, inserts new stmts on the COND_EXPR_STMT_LIST. */
949 static edge
950 slpeel_add_loop_guard (basic_block guard_bb, tree cond,
951 gimple_seq cond_expr_stmt_list,
952 basic_block exit_bb, basic_block dom_bb,
953 int probability)
955 gimple_stmt_iterator gsi;
956 edge new_e, enter_e;
957 gcond *cond_stmt;
958 gimple_seq gimplify_stmt_list = NULL;
960 enter_e = EDGE_SUCC (guard_bb, 0);
961 enter_e->flags &= ~EDGE_FALLTHRU;
962 enter_e->flags |= EDGE_FALSE_VALUE;
963 gsi = gsi_last_bb (guard_bb);
965 cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
966 NULL_TREE);
967 if (gimplify_stmt_list)
968 gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
969 cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
970 if (cond_expr_stmt_list)
971 gsi_insert_seq_after (&gsi, cond_expr_stmt_list, GSI_NEW_STMT);
973 gsi = gsi_last_bb (guard_bb);
974 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
976 /* Add new edge to connect guard block to the merge/loop-exit block. */
977 new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
979 new_e->count = guard_bb->count;
980 new_e->probability = probability;
981 new_e->count = apply_probability (enter_e->count, probability);
982 enter_e->count -= new_e->count;
983 enter_e->probability = inverse_probability (probability);
984 set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
985 return new_e;
989 /* This function verifies that the following restrictions apply to LOOP:
990 (1) it consists of exactly 2 basic blocks - header, and an empty latch
991 for innermost loop and 5 basic blocks for outer-loop.
992 (2) it is single entry, single exit
993 (3) its exit condition is the last stmt in the header
994 (4) E is the entry/exit edge of LOOP.
997 bool
998 slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
1000 edge exit_e = single_exit (loop);
1001 edge entry_e = loop_preheader_edge (loop);
1002 gcond *orig_cond = get_loop_exit_condition (loop);
1003 gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
1004 unsigned int num_bb = loop->inner? 5 : 2;
1006 /* All loops have an outer scope; the only case loop->outer is NULL is for
1007 the function itself. */
1008 if (!loop_outer (loop)
1009 || loop->num_nodes != num_bb
1010 || !empty_block_p (loop->latch)
1011 || !single_exit (loop)
1012 /* Verify that new loop exit condition can be trivially modified. */
1013 || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
1014 || (e != exit_e && e != entry_e))
1015 return false;
1017 return true;
1020 static void
1021 slpeel_checking_verify_cfg_after_peeling (struct loop *first_loop,
1022 struct loop *second_loop)
1024 if (!flag_checking)
1025 return;
1027 basic_block loop1_exit_bb = single_exit (first_loop)->dest;
1028 basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
1029 basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
1031 /* A guard that controls whether the second_loop is to be executed or skipped
1032 is placed in first_loop->exit. first_loop->exit therefore has two
1033 successors - one is the preheader of second_loop, and the other is a bb
1034 after second_loop.
1036 gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
1038 /* 1. Verify that one of the successors of first_loop->exit is the preheader
1039 of second_loop. */
1041 /* The preheader of new_loop is expected to have two predecessors:
1042 first_loop->exit and the block that precedes first_loop. */
1044 gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
1045 && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
1046 && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
1047 || (EDGE_PRED (loop2_entry_bb, 1)->src == loop1_exit_bb
1048 && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
1050 /* Verify that the other successor of first_loop->exit is after the
1051 second_loop. */
1052 /* TODO */
1055 /* If the run time cost model check determines that vectorization is
1056 not profitable and hence scalar loop should be generated then set
1057 FIRST_NITERS to prologue peeled iterations. This will allow all the
1058 iterations to be executed in the prologue peeled scalar loop. */
1060 static void
1061 set_prologue_iterations (basic_block bb_before_first_loop,
1062 tree *first_niters,
1063 struct loop *loop,
1064 unsigned int th,
1065 int probability)
1067 edge e;
1068 basic_block cond_bb, then_bb;
1069 tree var, prologue_after_cost_adjust_name;
1070 gimple_stmt_iterator gsi;
1071 gphi *newphi;
1072 edge e_true, e_false, e_fallthru;
1073 gcond *cond_stmt;
1074 gimple_seq stmts = NULL;
1075 tree cost_pre_condition = NULL_TREE;
1076 tree scalar_loop_iters =
1077 unshare_expr (LOOP_VINFO_NITERS_UNCHANGED (loop_vec_info_for_loop (loop)));
1079 e = single_pred_edge (bb_before_first_loop);
1080 cond_bb = split_edge (e);
1082 e = single_pred_edge (bb_before_first_loop);
1083 then_bb = split_edge (e);
1084 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
1086 e_false = make_single_succ_edge (cond_bb, bb_before_first_loop,
1087 EDGE_FALSE_VALUE);
1088 set_immediate_dominator (CDI_DOMINATORS, bb_before_first_loop, cond_bb);
1090 e_true = EDGE_PRED (then_bb, 0);
1091 e_true->flags &= ~EDGE_FALLTHRU;
1092 e_true->flags |= EDGE_TRUE_VALUE;
1094 e_true->probability = probability;
1095 e_false->probability = inverse_probability (probability);
1096 e_true->count = apply_probability (cond_bb->count, probability);
1097 e_false->count = cond_bb->count - e_true->count;
1098 then_bb->frequency = EDGE_FREQUENCY (e_true);
1099 then_bb->count = e_true->count;
1101 e_fallthru = EDGE_SUCC (then_bb, 0);
1102 e_fallthru->count = then_bb->count;
1104 gsi = gsi_last_bb (cond_bb);
1105 cost_pre_condition =
1106 fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
1107 build_int_cst (TREE_TYPE (scalar_loop_iters), th));
1108 cost_pre_condition =
1109 force_gimple_operand_gsi_1 (&gsi, cost_pre_condition, is_gimple_condexpr,
1110 NULL_TREE, false, GSI_CONTINUE_LINKING);
1111 cond_stmt = gimple_build_cond_from_tree (cost_pre_condition,
1112 NULL_TREE, NULL_TREE);
1113 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
1115 var = create_tmp_var (TREE_TYPE (scalar_loop_iters),
1116 "prologue_after_cost_adjust");
1117 prologue_after_cost_adjust_name =
1118 force_gimple_operand (scalar_loop_iters, &stmts, false, var);
1120 gsi = gsi_last_bb (then_bb);
1121 if (stmts)
1122 gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
1124 newphi = create_phi_node (var, bb_before_first_loop);
1125 add_phi_arg (newphi, prologue_after_cost_adjust_name, e_fallthru,
1126 UNKNOWN_LOCATION);
1127 add_phi_arg (newphi, *first_niters, e_false, UNKNOWN_LOCATION);
1129 *first_niters = PHI_RESULT (newphi);
1132 /* Function slpeel_tree_peel_loop_to_edge.
1134 Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
1135 that is placed on the entry (exit) edge E of LOOP. After this transformation
1136 we have two loops one after the other - first-loop iterates FIRST_NITERS
1137 times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
1138 If the cost model indicates that it is profitable to emit a scalar
1139 loop instead of the vector one, then the prolog (epilog) loop will iterate
1140 for the entire unchanged scalar iterations of the loop.
1142 Input:
1143 - LOOP: the loop to be peeled.
1144 - SCALAR_LOOP: if non-NULL, the alternate loop from which basic blocks
1145 should be copied.
1146 - E: the exit or entry edge of LOOP.
1147 If it is the entry edge, we peel the first iterations of LOOP. In this
1148 case first-loop is LOOP, and second-loop is the newly created loop.
1149 If it is the exit edge, we peel the last iterations of LOOP. In this
1150 case, first-loop is the newly created loop, and second-loop is LOOP.
1151 - NITERS: the number of iterations that LOOP iterates.
1152 - FIRST_NITERS: the number of iterations that the first-loop should iterate.
1153 - UPDATE_FIRST_LOOP_COUNT: specified whether this function is responsible
1154 for updating the loop bound of the first-loop to FIRST_NITERS. If it
1155 is false, the caller of this function may want to take care of this
1156 (this can be useful if we don't want new stmts added to first-loop).
1157 - TH: cost model profitability threshold of iterations for vectorization.
1158 - CHECK_PROFITABILITY: specify whether cost model check has not occurred
1159 during versioning and hence needs to occur during
1160 prologue generation or whether cost model check
1161 has not occurred during prologue generation and hence
1162 needs to occur during epilogue generation.
1163 - BOUND1 is the upper bound on number of iterations of the first loop (if known)
1164 - BOUND2 is the upper bound on number of iterations of the second loop (if known)
1167 Output:
1168 The function returns a pointer to the new loop-copy, or NULL if it failed
1169 to perform the transformation.
1171 The function generates two if-then-else guards: one before the first loop,
1172 and the other before the second loop:
1173 The first guard is:
1174 if (FIRST_NITERS == 0) then skip the first loop,
1175 and go directly to the second loop.
1176 The second guard is:
1177 if (FIRST_NITERS == NITERS) then skip the second loop.
1179 If the optional COND_EXPR and COND_EXPR_STMT_LIST arguments are given
1180 then the generated condition is combined with COND_EXPR and the
1181 statements in COND_EXPR_STMT_LIST are emitted together with it.
1183 FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1184 FORNOW the resulting code will not be in loop-closed-ssa form.
1187 static struct loop *
1188 slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loop *scalar_loop,
1189 edge e, tree *first_niters,
1190 tree niters, bool update_first_loop_count,
1191 unsigned int th, bool check_profitability,
1192 tree cond_expr, gimple_seq cond_expr_stmt_list,
1193 int bound1, int bound2)
1195 struct loop *new_loop = NULL, *first_loop, *second_loop;
1196 edge skip_e;
1197 tree pre_condition = NULL_TREE;
1198 basic_block bb_before_second_loop, bb_after_second_loop;
1199 basic_block bb_before_first_loop;
1200 basic_block bb_between_loops;
1201 basic_block new_exit_bb;
1202 gphi_iterator gsi;
1203 edge exit_e = single_exit (loop);
1204 source_location loop_loc;
1205 /* There are many aspects to how likely the first loop is going to be executed.
1206 Without histogram we can't really do good job. Simply set it to
1207 2/3, so the first loop is not reordered to the end of function and
1208 the hot path through stays short. */
1209 int first_guard_probability = 2 * REG_BR_PROB_BASE / 3;
1210 int second_guard_probability = 2 * REG_BR_PROB_BASE / 3;
1211 int probability_of_second_loop;
1213 if (!slpeel_can_duplicate_loop_p (loop, e))
1214 return NULL;
1216 /* We might have a queued need to update virtual SSA form. As we
1217 delete the update SSA machinery below after doing a regular
1218 incremental SSA update during loop copying make sure we don't
1219 lose that fact.
1220 ??? Needing to update virtual SSA form by renaming is unfortunate
1221 but not all of the vectorizer code inserting new loads / stores
1222 properly assigns virtual operands to those statements. */
1223 update_ssa (TODO_update_ssa_only_virtuals);
1225 /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
1226 in the exit bb and rename all the uses after the loop. This simplifies
1227 the *guard[12] routines, which assume loop closed SSA form for all PHIs
1228 (but normally loop closed SSA form doesn't require virtual PHIs to be
1229 in the same form). Doing this early simplifies the checking what
1230 uses should be renamed. */
1231 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1232 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
1234 gphi *phi = gsi.phi ();
1235 for (gsi = gsi_start_phis (exit_e->dest);
1236 !gsi_end_p (gsi); gsi_next (&gsi))
1237 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
1238 break;
1239 if (gsi_end_p (gsi))
1241 tree new_vop = copy_ssa_name (PHI_RESULT (phi));
1242 gphi *new_phi = create_phi_node (new_vop, exit_e->dest);
1243 tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
1244 imm_use_iterator imm_iter;
1245 gimple *stmt;
1246 use_operand_p use_p;
1248 add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
1249 gimple_phi_set_result (new_phi, new_vop);
1250 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
1251 if (stmt != new_phi
1252 && !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
1253 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1254 SET_USE (use_p, new_vop);
1256 break;
1259 /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1260 Resulting CFG would be:
1262 first_loop:
1263 do {
1264 } while ...
1266 second_loop:
1267 do {
1268 } while ...
1270 orig_exit_bb:
1273 if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop,
1274 e)))
1276 loop_loc = find_loop_location (loop);
1277 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1278 "tree_duplicate_loop_to_edge_cfg failed.\n");
1279 return NULL;
1282 if (MAY_HAVE_DEBUG_STMTS)
1284 gcc_assert (!adjust_vec.exists ());
1285 adjust_vec.create (32);
1288 if (e == exit_e)
1290 /* NEW_LOOP was placed after LOOP. */
1291 first_loop = loop;
1292 second_loop = new_loop;
1294 else
1296 /* NEW_LOOP was placed before LOOP. */
1297 first_loop = new_loop;
1298 second_loop = loop;
1301 /* 2. Add the guard code in one of the following ways:
1303 2.a Add the guard that controls whether the first loop is executed.
1304 This occurs when this function is invoked for prologue or epilogue
1305 generation and when the cost model check can be done at compile time.
1307 Resulting CFG would be:
1309 bb_before_first_loop:
1310 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1311 GOTO first-loop
1313 first_loop:
1314 do {
1315 } while ...
1317 bb_before_second_loop:
1319 second_loop:
1320 do {
1321 } while ...
1323 orig_exit_bb:
1325 2.b Add the cost model check that allows the prologue
1326 to iterate for the entire unchanged scalar
1327 iterations of the loop in the event that the cost
1328 model indicates that the scalar loop is more
1329 profitable than the vector one. This occurs when
1330 this function is invoked for prologue generation
1331 and the cost model check needs to be done at run
1332 time.
1334 Resulting CFG after prologue peeling would be:
1336 if (scalar_loop_iterations <= th)
1337 FIRST_NITERS = scalar_loop_iterations
1339 bb_before_first_loop:
1340 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1341 GOTO first-loop
1343 first_loop:
1344 do {
1345 } while ...
1347 bb_before_second_loop:
1349 second_loop:
1350 do {
1351 } while ...
1353 orig_exit_bb:
1355 2.c Add the cost model check that allows the epilogue
1356 to iterate for the entire unchanged scalar
1357 iterations of the loop in the event that the cost
1358 model indicates that the scalar loop is more
1359 profitable than the vector one. This occurs when
1360 this function is invoked for epilogue generation
1361 and the cost model check needs to be done at run
1362 time. This check is combined with any pre-existing
1363 check in COND_EXPR to avoid versioning.
1365 Resulting CFG after prologue peeling would be:
1367 bb_before_first_loop:
1368 if ((scalar_loop_iterations <= th)
1370 FIRST_NITERS == 0) GOTO bb_before_second_loop
1371 GOTO first-loop
1373 first_loop:
1374 do {
1375 } while ...
1377 bb_before_second_loop:
1379 second_loop:
1380 do {
1381 } while ...
1383 orig_exit_bb:
1386 bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1387 /* Loop copying insterted a forwarder block for us here. */
1388 bb_before_second_loop = single_exit (first_loop)->dest;
1390 probability_of_second_loop = (inverse_probability (first_guard_probability)
1391 + combine_probabilities (second_guard_probability,
1392 first_guard_probability));
1393 /* Theoretically preheader edge of first loop and exit edge should have
1394 same frequencies. Loop exit probablities are however easy to get wrong.
1395 It is safer to copy value from original loop entry. */
1396 bb_before_second_loop->frequency
1397 = combine_probabilities (bb_before_first_loop->frequency,
1398 probability_of_second_loop);
1399 bb_before_second_loop->count
1400 = apply_probability (bb_before_first_loop->count,
1401 probability_of_second_loop);
1402 single_succ_edge (bb_before_second_loop)->count
1403 = bb_before_second_loop->count;
1405 /* Epilogue peeling. */
1406 if (!update_first_loop_count)
1408 loop_vec_info loop_vinfo = loop_vec_info_for_loop (loop);
1409 tree scalar_loop_iters = LOOP_VINFO_NITERSM1 (loop_vinfo);
1410 unsigned limit = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1;
1411 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1412 limit = limit + 1;
1413 if (check_profitability
1414 && th > limit)
1415 limit = th;
1416 pre_condition =
1417 fold_build2 (LT_EXPR, boolean_type_node, scalar_loop_iters,
1418 build_int_cst (TREE_TYPE (scalar_loop_iters), limit));
1419 if (cond_expr)
1421 pre_condition =
1422 fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1423 pre_condition,
1424 fold_build1 (TRUTH_NOT_EXPR, boolean_type_node,
1425 cond_expr));
1429 /* Prologue peeling. */
1430 else
1432 if (check_profitability)
1433 set_prologue_iterations (bb_before_first_loop, first_niters,
1434 loop, th, first_guard_probability);
1436 pre_condition =
1437 fold_build2 (LE_EXPR, boolean_type_node, *first_niters,
1438 build_int_cst (TREE_TYPE (*first_niters), 0));
1441 skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1442 cond_expr_stmt_list,
1443 bb_before_second_loop, bb_before_first_loop,
1444 inverse_probability (first_guard_probability));
1445 scale_loop_profile (first_loop, first_guard_probability,
1446 check_profitability && (int)th > bound1 ? th : bound1);
1447 slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
1448 first_loop == new_loop,
1449 &new_exit_bb);
1452 /* 3. Add the guard that controls whether the second loop is executed.
1453 Resulting CFG would be:
1455 bb_before_first_loop:
1456 if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1457 GOTO first-loop
1459 first_loop:
1460 do {
1461 } while ...
1463 bb_between_loops:
1464 if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1465 GOTO bb_before_second_loop
1467 bb_before_second_loop:
1469 second_loop:
1470 do {
1471 } while ...
1473 bb_after_second_loop:
1475 orig_exit_bb:
1478 bb_between_loops = new_exit_bb;
1479 bb_after_second_loop = split_edge (single_exit (second_loop));
1481 pre_condition =
1482 fold_build2 (EQ_EXPR, boolean_type_node, *first_niters, niters);
1483 skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, NULL,
1484 bb_after_second_loop, bb_before_first_loop,
1485 inverse_probability (second_guard_probability));
1486 scale_loop_profile (second_loop, probability_of_second_loop, bound2);
1487 slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
1488 second_loop == new_loop, &new_exit_bb);
1490 /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1492 if (update_first_loop_count)
1493 slpeel_make_loop_iterate_ntimes (first_loop, *first_niters);
1495 delete_update_ssa ();
1497 adjust_vec_debug_stmts ();
1499 return new_loop;
1502 /* Function vect_get_loop_location.
1504 Extract the location of the loop in the source code.
1505 If the loop is not well formed for vectorization, an estimated
1506 location is calculated.
1507 Return the loop location if succeed and NULL if not. */
1509 source_location
1510 find_loop_location (struct loop *loop)
1512 gimple *stmt = NULL;
1513 basic_block bb;
1514 gimple_stmt_iterator si;
1516 if (!loop)
1517 return UNKNOWN_LOCATION;
1519 stmt = get_loop_exit_condition (loop);
1521 if (stmt
1522 && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
1523 return gimple_location (stmt);
1525 /* If we got here the loop is probably not "well formed",
1526 try to estimate the loop location */
1528 if (!loop->header)
1529 return UNKNOWN_LOCATION;
1531 bb = loop->header;
1533 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1535 stmt = gsi_stmt (si);
1536 if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
1537 return gimple_location (stmt);
1540 return UNKNOWN_LOCATION;
1544 /* Function vect_can_advance_ivs_p
1546 In case the number of iterations that LOOP iterates is unknown at compile
1547 time, an epilog loop will be generated, and the loop induction variables
1548 (IVs) will be "advanced" to the value they are supposed to take just before
1549 the epilog loop. Here we check that the access function of the loop IVs
1550 and the expression that represents the loop bound are simple enough.
1551 These restrictions will be relaxed in the future. */
1553 bool
1554 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1556 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1557 basic_block bb = loop->header;
1558 gimple *phi;
1559 gphi_iterator gsi;
1561 /* Analyze phi functions of the loop header. */
1563 if (dump_enabled_p ())
1564 dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
1565 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1567 tree evolution_part;
1569 phi = gsi.phi ();
1570 if (dump_enabled_p ())
1572 dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
1573 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
1576 /* Skip virtual phi's. The data dependences that are associated with
1577 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
1579 if (virtual_operand_p (PHI_RESULT (phi)))
1581 if (dump_enabled_p ())
1582 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1583 "virtual phi. skip.\n");
1584 continue;
1587 /* Skip reduction phis. */
1589 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1591 if (dump_enabled_p ())
1592 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1593 "reduc phi. skip.\n");
1594 continue;
1597 /* Analyze the evolution function. */
1599 evolution_part
1600 = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
1601 if (evolution_part == NULL_TREE)
1603 if (dump_enabled_p ())
1604 dump_printf (MSG_MISSED_OPTIMIZATION,
1605 "No access function or evolution.\n");
1606 return false;
1609 /* FORNOW: We do not transform initial conditions of IVs
1610 which evolution functions are not invariants in the loop. */
1612 if (!expr_invariant_in_loop_p (loop, evolution_part))
1614 if (dump_enabled_p ())
1615 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1616 "evolution not invariant in loop.\n");
1617 return false;
1620 /* FORNOW: We do not transform initial conditions of IVs
1621 which evolution functions are a polynomial of degree >= 2. */
1623 if (tree_is_chrec (evolution_part))
1625 if (dump_enabled_p ())
1626 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1627 "evolution is chrec.\n");
1628 return false;
1632 return true;
1636 /* Function vect_update_ivs_after_vectorizer.
1638 "Advance" the induction variables of LOOP to the value they should take
1639 after the execution of LOOP. This is currently necessary because the
1640 vectorizer does not handle induction variables that are used after the
1641 loop. Such a situation occurs when the last iterations of LOOP are
1642 peeled, because:
1643 1. We introduced new uses after LOOP for IVs that were not originally used
1644 after LOOP: the IVs of LOOP are now used by an epilog loop.
1645 2. LOOP is going to be vectorized; this means that it will iterate N/VF
1646 times, whereas the loop IVs should be bumped N times.
1648 Input:
1649 - LOOP - a loop that is going to be vectorized. The last few iterations
1650 of LOOP were peeled.
1651 - NITERS - the number of iterations that LOOP executes (before it is
1652 vectorized). i.e, the number of times the ivs should be bumped.
1653 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
1654 coming out from LOOP on which there are uses of the LOOP ivs
1655 (this is the path from LOOP->exit to epilog_loop->preheader).
1657 The new definitions of the ivs are placed in LOOP->exit.
1658 The phi args associated with the edge UPDATE_E in the bb
1659 UPDATE_E->dest are updated accordingly.
1661 Assumption 1: Like the rest of the vectorizer, this function assumes
1662 a single loop exit that has a single predecessor.
1664 Assumption 2: The phi nodes in the LOOP header and in update_bb are
1665 organized in the same order.
1667 Assumption 3: The access function of the ivs is simple enough (see
1668 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
1670 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
1671 coming out of LOOP on which the ivs of LOOP are used (this is the path
1672 that leads to the epilog loop; other paths skip the epilog loop). This
1673 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
1674 needs to have its phis updated.
1677 static void
1678 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
1679 edge update_e)
1681 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1682 basic_block exit_bb = single_exit (loop)->dest;
1683 gphi *phi, *phi1;
1684 gphi_iterator gsi, gsi1;
1685 basic_block update_bb = update_e->dest;
1687 gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
1689 /* Make sure there exists a single-predecessor exit bb: */
1690 gcc_assert (single_pred_p (exit_bb));
1692 for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
1693 !gsi_end_p (gsi) && !gsi_end_p (gsi1);
1694 gsi_next (&gsi), gsi_next (&gsi1))
1696 tree init_expr;
1697 tree step_expr, off;
1698 tree type;
1699 tree var, ni, ni_name;
1700 gimple_stmt_iterator last_gsi;
1701 stmt_vec_info stmt_info;
1703 phi = gsi.phi ();
1704 phi1 = gsi1.phi ();
1705 if (dump_enabled_p ())
1707 dump_printf_loc (MSG_NOTE, vect_location,
1708 "vect_update_ivs_after_vectorizer: phi: ");
1709 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
1712 /* Skip virtual phi's. */
1713 if (virtual_operand_p (PHI_RESULT (phi)))
1715 if (dump_enabled_p ())
1716 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1717 "virtual phi. skip.\n");
1718 continue;
1721 /* Skip reduction phis. */
1722 stmt_info = vinfo_for_stmt (phi);
1723 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
1724 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
1726 if (dump_enabled_p ())
1727 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1728 "reduc phi. skip.\n");
1729 continue;
1732 type = TREE_TYPE (gimple_phi_result (phi));
1733 step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
1734 step_expr = unshare_expr (step_expr);
1736 /* FORNOW: We do not support IVs whose evolution function is a polynomial
1737 of degree >= 2 or exponential. */
1738 gcc_assert (!tree_is_chrec (step_expr));
1740 init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1742 off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
1743 fold_convert (TREE_TYPE (step_expr), niters),
1744 step_expr);
1745 if (POINTER_TYPE_P (type))
1746 ni = fold_build_pointer_plus (init_expr, off);
1747 else
1748 ni = fold_build2 (PLUS_EXPR, type,
1749 init_expr, fold_convert (type, off));
1751 var = create_tmp_var (type, "tmp");
1753 last_gsi = gsi_last_bb (exit_bb);
1754 ni_name = force_gimple_operand_gsi (&last_gsi, ni, false, var,
1755 true, GSI_SAME_STMT);
1757 /* Fix phi expressions in the successor bb. */
1758 adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
1762 /* Function vect_do_peeling_for_loop_bound
1764 Peel the last iterations of the loop represented by LOOP_VINFO.
1765 The peeled iterations form a new epilog loop. Given that the loop now
1766 iterates NITERS times, the new epilog loop iterates
1767 NITERS % VECTORIZATION_FACTOR times.
1769 If CHECK_PROFITABILITY is 1 then profitability check is generated
1770 using TH as a cost model profitability threshold of iterations for
1771 vectorization.
1773 The original loop will later be made to iterate
1774 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).
1776 COND_EXPR and COND_EXPR_STMT_LIST are combined with a new generated
1777 test. */
1779 void
1780 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo,
1781 tree ni_name, tree ratio_mult_vf_name,
1782 unsigned int th, bool check_profitability)
1784 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1785 struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
1786 struct loop *new_loop;
1787 edge update_e;
1788 basic_block preheader;
1789 int loop_num;
1790 int max_iter;
1791 tree cond_expr = NULL_TREE;
1792 gimple_seq cond_expr_stmt_list = NULL;
1794 if (dump_enabled_p ())
1795 dump_printf_loc (MSG_NOTE, vect_location,
1796 "=== vect_do_peeling_for_loop_bound ===\n");
1798 initialize_original_copy_tables ();
1800 loop_num = loop->num;
1802 new_loop
1803 = slpeel_tree_peel_loop_to_edge (loop, scalar_loop, single_exit (loop),
1804 &ratio_mult_vf_name, ni_name, false,
1805 th, check_profitability,
1806 cond_expr, cond_expr_stmt_list,
1807 0, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
1808 gcc_assert (new_loop);
1809 gcc_assert (loop_num == loop->num);
1810 slpeel_checking_verify_cfg_after_peeling (loop, new_loop);
1812 /* A guard that controls whether the new_loop is to be executed or skipped
1813 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
1814 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
1815 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
1816 is on the path where the LOOP IVs are used and need to be updated. */
1818 preheader = loop_preheader_edge (new_loop)->src;
1819 if (EDGE_PRED (preheader, 0)->src == single_exit (loop)->dest)
1820 update_e = EDGE_PRED (preheader, 0);
1821 else
1822 update_e = EDGE_PRED (preheader, 1);
1824 /* Update IVs of original loop as if they were advanced
1825 by ratio_mult_vf_name steps. */
1826 vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
1828 /* For vectorization factor N, we need to copy last N-1 values in epilogue
1829 and this means N-2 loopback edge executions.
1831 PEELING_FOR_GAPS works by subtracting last iteration and thus the epilogue
1832 will execute at least LOOP_VINFO_VECT_FACTOR times. */
1833 max_iter = (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
1834 ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) * 2
1835 : LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 2;
1836 if (check_profitability)
1837 max_iter = MAX (max_iter, (int) th - 1);
1838 record_niter_bound (new_loop, max_iter, false, true);
1839 dump_printf (MSG_NOTE,
1840 "Setting upper bound of nb iterations for epilogue "
1841 "loop to %d\n", max_iter);
1843 /* After peeling we have to reset scalar evolution analyzer. */
1844 scev_reset ();
1846 free_original_copy_tables ();
1850 /* Function vect_gen_niters_for_prolog_loop
1852 Set the number of iterations for the loop represented by LOOP_VINFO
1853 to the minimum between LOOP_NITERS (the original iteration count of the loop)
1854 and the misalignment of DR - the data reference recorded in
1855 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
1856 this loop, the data reference DR will refer to an aligned location.
1858 The following computation is generated:
1860 If the misalignment of DR is known at compile time:
1861 addr_mis = int mis = DR_MISALIGNMENT (dr);
1862 Else, compute address misalignment in bytes:
1863 addr_mis = addr & (vectype_align - 1)
1865 prolog_niters = min (LOOP_NITERS, ((VF - addr_mis/elem_size)&(VF-1))/step)
1867 (elem_size = element type size; an element is the scalar element whose type
1868 is the inner type of the vectype)
1870 When the step of the data-ref in the loop is not 1 (as in interleaved data
1871 and SLP), the number of iterations of the prolog must be divided by the step
1872 (which is equal to the size of interleaved group).
1874 The above formulas assume that VF == number of elements in the vector. This
1875 may not hold when there are multiple-types in the loop.
1876 In this case, for some data-references in the loop the VF does not represent
1877 the number of elements that fit in the vector. Therefore, instead of VF we
1878 use TYPE_VECTOR_SUBPARTS. */
1880 static tree
1881 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters, int *bound)
1883 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
1884 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1885 tree var;
1886 gimple_seq stmts;
1887 tree iters, iters_name;
1888 edge pe;
1889 basic_block new_bb;
1890 gimple *dr_stmt = DR_STMT (dr);
1891 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
1892 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1893 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
1894 tree niters_type = TREE_TYPE (loop_niters);
1895 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1897 pe = loop_preheader_edge (loop);
1899 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
1901 int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1903 if (dump_enabled_p ())
1904 dump_printf_loc (MSG_NOTE, vect_location,
1905 "known peeling = %d.\n", npeel);
1907 iters = build_int_cst (niters_type, npeel);
1908 *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1910 else
1912 gimple_seq new_stmts = NULL;
1913 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
1914 tree offset = negative
1915 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
1916 tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
1917 &new_stmts, offset, loop);
1918 tree type = unsigned_type_for (TREE_TYPE (start_addr));
1919 tree vectype_align_minus_1 = build_int_cst (type, vectype_align - 1);
1920 HOST_WIDE_INT elem_size =
1921 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1922 tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
1923 tree nelements_minus_1 = build_int_cst (type, nelements - 1);
1924 tree nelements_tree = build_int_cst (type, nelements);
1925 tree byte_misalign;
1926 tree elem_misalign;
1928 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmts);
1929 gcc_assert (!new_bb);
1931 /* Create: byte_misalign = addr & (vectype_align - 1) */
1932 byte_misalign =
1933 fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
1934 vectype_align_minus_1);
1936 /* Create: elem_misalign = byte_misalign / element_size */
1937 elem_misalign =
1938 fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
1940 /* Create: (niters_type) (nelements - elem_misalign)&(nelements - 1) */
1941 if (negative)
1942 iters = fold_build2 (MINUS_EXPR, type, elem_misalign, nelements_tree);
1943 else
1944 iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
1945 iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
1946 iters = fold_convert (niters_type, iters);
1947 *bound = nelements;
1950 /* Create: prolog_loop_niters = min (iters, loop_niters) */
1951 /* If the loop bound is known at compile time we already verified that it is
1952 greater than vf; since the misalignment ('iters') is at most vf, there's
1953 no need to generate the MIN_EXPR in this case. */
1954 if (TREE_CODE (loop_niters) != INTEGER_CST)
1955 iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters);
1957 if (dump_enabled_p ())
1959 dump_printf_loc (MSG_NOTE, vect_location,
1960 "niters for prolog loop: ");
1961 dump_generic_expr (MSG_NOTE, TDF_SLIM, iters);
1962 dump_printf (MSG_NOTE, "\n");
1965 var = create_tmp_var (niters_type, "prolog_loop_niters");
1966 stmts = NULL;
1967 iters_name = force_gimple_operand (iters, &stmts, false, var);
1969 /* Insert stmt on loop preheader edge. */
1970 if (stmts)
1972 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1973 gcc_assert (!new_bb);
1976 return iters_name;
1980 /* Function vect_update_init_of_dr
1982 NITERS iterations were peeled from LOOP. DR represents a data reference
1983 in LOOP. This function updates the information recorded in DR to
1984 account for the fact that the first NITERS iterations had already been
1985 executed. Specifically, it updates the OFFSET field of DR. */
1987 static void
1988 vect_update_init_of_dr (struct data_reference *dr, tree niters)
1990 tree offset = DR_OFFSET (dr);
1992 niters = fold_build2 (MULT_EXPR, sizetype,
1993 fold_convert (sizetype, niters),
1994 fold_convert (sizetype, DR_STEP (dr)));
1995 offset = fold_build2 (PLUS_EXPR, sizetype,
1996 fold_convert (sizetype, offset), niters);
1997 DR_OFFSET (dr) = offset;
2001 /* Function vect_update_inits_of_drs
2003 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
2004 This function updates the information recorded for the data references in
2005 the loop to account for the fact that the first NITERS iterations had
2006 already been executed. Specifically, it updates the initial_condition of
2007 the access_function of all the data_references in the loop. */
2009 static void
2010 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
2012 unsigned int i;
2013 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2014 struct data_reference *dr;
2016 if (dump_enabled_p ())
2017 dump_printf_loc (MSG_NOTE, vect_location,
2018 "=== vect_update_inits_of_dr ===\n");
2020 FOR_EACH_VEC_ELT (datarefs, i, dr)
2021 vect_update_init_of_dr (dr, niters);
2025 /* Function vect_do_peeling_for_alignment
2027 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
2028 'niters' is set to the misalignment of one of the data references in the
2029 loop, thereby forcing it to refer to an aligned location at the beginning
2030 of the execution of this loop. The data reference for which we are
2031 peeling is recorded in LOOP_VINFO_UNALIGNED_DR.
2033 If CHECK_PROFITABILITY is 1 then profitability check is generated
2034 using TH as a cost model profitability threshold of iterations for
2035 vectorization. */
2037 void
2038 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, tree ni_name,
2039 unsigned int th, bool check_profitability)
2041 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2042 struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
2043 tree niters_of_prolog_loop;
2044 tree wide_prolog_niters;
2045 struct loop *new_loop;
2046 int max_iter;
2047 int bound = 0;
2049 if (dump_enabled_p ())
2050 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2051 "loop peeled for vectorization to enhance"
2052 " alignment\n");
2054 initialize_original_copy_tables ();
2056 gimple_seq stmts = NULL;
2057 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2058 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo,
2059 ni_name,
2060 &bound);
2062 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
2063 new_loop =
2064 slpeel_tree_peel_loop_to_edge (loop, scalar_loop,
2065 loop_preheader_edge (loop),
2066 &niters_of_prolog_loop, ni_name, true,
2067 th, check_profitability, NULL_TREE, NULL,
2068 bound, 0);
2070 gcc_assert (new_loop);
2071 slpeel_checking_verify_cfg_after_peeling (new_loop, loop);
2072 /* For vectorization factor N, we need to copy at most N-1 values
2073 for alignment and this means N-2 loopback edge executions. */
2074 max_iter = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 2;
2075 if (check_profitability)
2076 max_iter = MAX (max_iter, (int) th - 1);
2077 record_niter_bound (new_loop, max_iter, false, true);
2078 dump_printf (MSG_NOTE,
2079 "Setting upper bound of nb iterations for prologue "
2080 "loop to %d\n", max_iter);
2082 /* Update number of times loop executes. */
2083 LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
2084 TREE_TYPE (ni_name), ni_name, niters_of_prolog_loop);
2085 LOOP_VINFO_NITERSM1 (loop_vinfo) = fold_build2 (MINUS_EXPR,
2086 TREE_TYPE (ni_name),
2087 LOOP_VINFO_NITERSM1 (loop_vinfo), niters_of_prolog_loop);
2089 if (types_compatible_p (sizetype, TREE_TYPE (niters_of_prolog_loop)))
2090 wide_prolog_niters = niters_of_prolog_loop;
2091 else
2093 gimple_seq seq = NULL;
2094 edge pe = loop_preheader_edge (loop);
2095 tree wide_iters = fold_convert (sizetype, niters_of_prolog_loop);
2096 tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
2097 wide_prolog_niters = force_gimple_operand (wide_iters, &seq, false,
2098 var);
2099 if (seq)
2101 /* Insert stmt on loop preheader edge. */
2102 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2103 gcc_assert (!new_bb);
2107 /* Update the init conditions of the access functions of all data refs. */
2108 vect_update_inits_of_drs (loop_vinfo, wide_prolog_niters);
2110 /* After peeling we have to reset scalar evolution analyzer. */
2111 scev_reset ();
2113 free_original_copy_tables ();
2116 /* Function vect_create_cond_for_niters_checks.
2118 Create a conditional expression that represents the run-time checks for
2119 loop's niter. The loop is guaranteed to to terminate if the run-time
2120 checks hold.
2122 Input:
2123 COND_EXPR - input conditional expression. New conditions will be chained
2124 with logical AND operation. If it is NULL, then the function
2125 is used to return the number of alias checks.
2126 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2127 to be checked.
2129 Output:
2130 COND_EXPR - conditional expression.
2132 The returned COND_EXPR is the conditional expression to be used in the
2133 if statement that controls which version of the loop gets executed at
2134 runtime. */
2136 static void
2137 vect_create_cond_for_niters_checks (loop_vec_info loop_vinfo, tree *cond_expr)
2139 tree part_cond_expr = LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo);
2141 if (*cond_expr)
2142 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2143 *cond_expr, part_cond_expr);
2144 else
2145 *cond_expr = part_cond_expr;
2148 /* Function vect_create_cond_for_align_checks.
2150 Create a conditional expression that represents the alignment checks for
2151 all of data references (array element references) whose alignment must be
2152 checked at runtime.
2154 Input:
2155 COND_EXPR - input conditional expression. New conditions will be chained
2156 with logical AND operation.
2157 LOOP_VINFO - two fields of the loop information are used.
2158 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
2159 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
2161 Output:
2162 COND_EXPR_STMT_LIST - statements needed to construct the conditional
2163 expression.
2164 The returned value is the conditional expression to be used in the if
2165 statement that controls which version of the loop gets executed at runtime.
2167 The algorithm makes two assumptions:
2168 1) The number of bytes "n" in a vector is a power of 2.
2169 2) An address "a" is aligned if a%n is zero and that this
2170 test can be done as a&(n-1) == 0. For example, for 16
2171 byte vectors the test is a&0xf == 0. */
2173 static void
2174 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
2175 tree *cond_expr,
2176 gimple_seq *cond_expr_stmt_list)
2178 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2179 vec<gimple *> may_misalign_stmts
2180 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2181 gimple *ref_stmt;
2182 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
2183 tree mask_cst;
2184 unsigned int i;
2185 tree int_ptrsize_type;
2186 char tmp_name[20];
2187 tree or_tmp_name = NULL_TREE;
2188 tree and_tmp_name;
2189 gimple *and_stmt;
2190 tree ptrsize_zero;
2191 tree part_cond_expr;
2193 /* Check that mask is one less than a power of 2, i.e., mask is
2194 all zeros followed by all ones. */
2195 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2197 int_ptrsize_type = signed_type_for (ptr_type_node);
2199 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2200 of the first vector of the i'th data reference. */
2202 FOR_EACH_VEC_ELT (may_misalign_stmts, i, ref_stmt)
2204 gimple_seq new_stmt_list = NULL;
2205 tree addr_base;
2206 tree addr_tmp_name;
2207 tree new_or_tmp_name;
2208 gimple *addr_stmt, *or_stmt;
2209 stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
2210 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2211 bool negative = tree_int_cst_compare
2212 (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
2213 tree offset = negative
2214 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
2216 /* create: addr_tmp = (int)(address_of_first_vector) */
2217 addr_base =
2218 vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
2219 offset, loop);
2220 if (new_stmt_list != NULL)
2221 gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
2223 sprintf (tmp_name, "addr2int%d", i);
2224 addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2225 addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base);
2226 gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
2228 /* The addresses are OR together. */
2230 if (or_tmp_name != NULL_TREE)
2232 /* create: or_tmp = or_tmp | addr_tmp */
2233 sprintf (tmp_name, "orptrs%d", i);
2234 new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2235 or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
2236 or_tmp_name, addr_tmp_name);
2237 gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
2238 or_tmp_name = new_or_tmp_name;
2240 else
2241 or_tmp_name = addr_tmp_name;
2243 } /* end for i */
2245 mask_cst = build_int_cst (int_ptrsize_type, mask);
2247 /* create: and_tmp = or_tmp & mask */
2248 and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
2250 and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR,
2251 or_tmp_name, mask_cst);
2252 gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
2254 /* Make and_tmp the left operand of the conditional test against zero.
2255 if and_tmp has a nonzero bit then some address is unaligned. */
2256 ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2257 part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
2258 and_tmp_name, ptrsize_zero);
2259 if (*cond_expr)
2260 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2261 *cond_expr, part_cond_expr);
2262 else
2263 *cond_expr = part_cond_expr;
2266 /* Function vect_create_cond_for_alias_checks.
2268 Create a conditional expression that represents the run-time checks for
2269 overlapping of address ranges represented by a list of data references
2270 relations passed as input.
2272 Input:
2273 COND_EXPR - input conditional expression. New conditions will be chained
2274 with logical AND operation. If it is NULL, then the function
2275 is used to return the number of alias checks.
2276 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2277 to be checked.
2279 Output:
2280 COND_EXPR - conditional expression.
2282 The returned COND_EXPR is the conditional expression to be used in the if
2283 statement that controls which version of the loop gets executed at runtime.
2286 void
2287 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
2289 vec<dr_with_seg_len_pair_t> comp_alias_ddrs =
2290 LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
2291 tree part_cond_expr;
2293 /* Create expression
2294 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
2295 || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
2299 ((store_ptr_n + store_segment_length_n) <= load_ptr_n)
2300 || (load_ptr_n + load_segment_length_n) <= store_ptr_n)) */
2302 if (comp_alias_ddrs.is_empty ())
2303 return;
2305 for (size_t i = 0, s = comp_alias_ddrs.length (); i < s; ++i)
2307 const dr_with_seg_len& dr_a = comp_alias_ddrs[i].first;
2308 const dr_with_seg_len& dr_b = comp_alias_ddrs[i].second;
2309 tree segment_length_a = dr_a.seg_len;
2310 tree segment_length_b = dr_b.seg_len;
2311 tree addr_base_a = DR_BASE_ADDRESS (dr_a.dr);
2312 tree addr_base_b = DR_BASE_ADDRESS (dr_b.dr);
2313 tree offset_a = DR_OFFSET (dr_a.dr), offset_b = DR_OFFSET (dr_b.dr);
2315 offset_a = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_a),
2316 offset_a, DR_INIT (dr_a.dr));
2317 offset_b = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_b),
2318 offset_b, DR_INIT (dr_b.dr));
2319 addr_base_a = fold_build_pointer_plus (addr_base_a, offset_a);
2320 addr_base_b = fold_build_pointer_plus (addr_base_b, offset_b);
2322 if (dump_enabled_p ())
2324 dump_printf_loc (MSG_NOTE, vect_location,
2325 "create runtime check for data references ");
2326 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a.dr));
2327 dump_printf (MSG_NOTE, " and ");
2328 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b.dr));
2329 dump_printf (MSG_NOTE, "\n");
2332 tree seg_a_min = addr_base_a;
2333 tree seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
2334 /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
2335 bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
2336 [a, a+12) */
2337 if (tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0)
2339 tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a.dr)));
2340 seg_a_min = fold_build_pointer_plus (seg_a_max, unit_size);
2341 seg_a_max = fold_build_pointer_plus (addr_base_a, unit_size);
2344 tree seg_b_min = addr_base_b;
2345 tree seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
2346 if (tree_int_cst_compare (DR_STEP (dr_b.dr), size_zero_node) < 0)
2348 tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b.dr)));
2349 seg_b_min = fold_build_pointer_plus (seg_b_max, unit_size);
2350 seg_b_max = fold_build_pointer_plus (addr_base_b, unit_size);
2353 part_cond_expr =
2354 fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2355 fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
2356 fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
2358 if (*cond_expr)
2359 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2360 *cond_expr, part_cond_expr);
2361 else
2362 *cond_expr = part_cond_expr;
2365 if (dump_enabled_p ())
2366 dump_printf_loc (MSG_NOTE, vect_location,
2367 "created %u versioning for alias checks.\n",
2368 comp_alias_ddrs.length ());
2372 /* Function vect_loop_versioning.
2374 If the loop has data references that may or may not be aligned or/and
2375 has data reference relations whose independence was not proven then
2376 two versions of the loop need to be generated, one which is vectorized
2377 and one which isn't. A test is then generated to control which of the
2378 loops is executed. The test checks for the alignment of all of the
2379 data references that may or may not be aligned. An additional
2380 sequence of runtime tests is generated for each pairs of DDRs whose
2381 independence was not proven. The vectorized version of loop is
2382 executed only if both alias and alignment tests are passed.
2384 The test generated to check which version of loop is executed
2385 is modified to also check for profitability as indicated by the
2386 cost model threshold TH.
2388 The versioning precondition(s) are placed in *COND_EXPR and
2389 *COND_EXPR_STMT_LIST. */
2391 void
2392 vect_loop_versioning (loop_vec_info loop_vinfo,
2393 unsigned int th, bool check_profitability)
2395 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *nloop;
2396 struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
2397 basic_block condition_bb;
2398 gphi_iterator gsi;
2399 gimple_stmt_iterator cond_exp_gsi;
2400 basic_block merge_bb;
2401 basic_block new_exit_bb;
2402 edge new_exit_e, e;
2403 gphi *orig_phi, *new_phi;
2404 tree cond_expr = NULL_TREE;
2405 gimple_seq cond_expr_stmt_list = NULL;
2406 tree arg;
2407 unsigned prob = 4 * REG_BR_PROB_BASE / 5;
2408 gimple_seq gimplify_stmt_list = NULL;
2409 tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2410 bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
2411 bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
2412 bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo);
2414 if (check_profitability)
2415 cond_expr = fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
2416 build_int_cst (TREE_TYPE (scalar_loop_iters),
2417 th));
2419 if (version_niter)
2420 vect_create_cond_for_niters_checks (loop_vinfo, &cond_expr);
2422 if (cond_expr)
2423 cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
2424 is_gimple_condexpr, NULL_TREE);
2426 if (version_align)
2427 vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
2428 &cond_expr_stmt_list);
2430 if (version_alias)
2431 vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
2433 cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,
2434 is_gimple_condexpr, NULL_TREE);
2435 gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
2437 initialize_original_copy_tables ();
2438 if (scalar_loop)
2440 edge scalar_e;
2441 basic_block preheader, scalar_preheader;
2443 /* We don't want to scale SCALAR_LOOP's frequencies, we need to
2444 scale LOOP's frequencies instead. */
2445 nloop = loop_version (scalar_loop, cond_expr, &condition_bb, prob,
2446 REG_BR_PROB_BASE, REG_BR_PROB_BASE - prob, true);
2447 scale_loop_frequencies (loop, prob, REG_BR_PROB_BASE);
2448 /* CONDITION_BB was created above SCALAR_LOOP's preheader,
2449 while we need to move it above LOOP's preheader. */
2450 e = loop_preheader_edge (loop);
2451 scalar_e = loop_preheader_edge (scalar_loop);
2452 gcc_assert (empty_block_p (e->src)
2453 && single_pred_p (e->src));
2454 gcc_assert (empty_block_p (scalar_e->src)
2455 && single_pred_p (scalar_e->src));
2456 gcc_assert (single_pred_p (condition_bb));
2457 preheader = e->src;
2458 scalar_preheader = scalar_e->src;
2459 scalar_e = find_edge (condition_bb, scalar_preheader);
2460 e = single_pred_edge (preheader);
2461 redirect_edge_and_branch_force (single_pred_edge (condition_bb),
2462 scalar_preheader);
2463 redirect_edge_and_branch_force (scalar_e, preheader);
2464 redirect_edge_and_branch_force (e, condition_bb);
2465 set_immediate_dominator (CDI_DOMINATORS, condition_bb,
2466 single_pred (condition_bb));
2467 set_immediate_dominator (CDI_DOMINATORS, scalar_preheader,
2468 single_pred (scalar_preheader));
2469 set_immediate_dominator (CDI_DOMINATORS, preheader,
2470 condition_bb);
2472 else
2473 nloop = loop_version (loop, cond_expr, &condition_bb,
2474 prob, prob, REG_BR_PROB_BASE - prob, true);
2476 if (version_niter)
2478 /* The versioned loop could be infinite, we need to clear existing
2479 niter information which is copied from the original loop. */
2480 gcc_assert (loop_constraint_set_p (loop, LOOP_C_FINITE));
2481 vect_free_loop_info_assumptions (nloop);
2482 /* And set constraint LOOP_C_INFINITE for niter analyzer. */
2483 loop_constraint_set (loop, LOOP_C_INFINITE);
2486 if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
2487 && dump_enabled_p ())
2489 if (version_alias)
2490 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2491 "loop versioned for vectorization because of "
2492 "possible aliasing\n");
2493 if (version_align)
2494 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2495 "loop versioned for vectorization to enhance "
2496 "alignment\n");
2499 free_original_copy_tables ();
2501 /* Loop versioning violates an assumption we try to maintain during
2502 vectorization - that the loop exit block has a single predecessor.
2503 After versioning, the exit block of both loop versions is the same
2504 basic block (i.e. it has two predecessors). Just in order to simplify
2505 following transformations in the vectorizer, we fix this situation
2506 here by adding a new (empty) block on the exit-edge of the loop,
2507 with the proper loop-exit phis to maintain loop-closed-form.
2508 If loop versioning wasn't done from loop, but scalar_loop instead,
2509 merge_bb will have already just a single successor. */
2511 merge_bb = single_exit (loop)->dest;
2512 if (scalar_loop == NULL || EDGE_COUNT (merge_bb->preds) >= 2)
2514 gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
2515 new_exit_bb = split_edge (single_exit (loop));
2516 new_exit_e = single_exit (loop);
2517 e = EDGE_SUCC (new_exit_bb, 0);
2519 for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2521 tree new_res;
2522 orig_phi = gsi.phi ();
2523 new_res = copy_ssa_name (PHI_RESULT (orig_phi));
2524 new_phi = create_phi_node (new_res, new_exit_bb);
2525 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2526 add_phi_arg (new_phi, arg, new_exit_e,
2527 gimple_phi_arg_location_from_edge (orig_phi, e));
2528 adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
2532 /* End loop-exit-fixes after versioning. */
2534 if (cond_expr_stmt_list)
2536 cond_exp_gsi = gsi_last_bb (condition_bb);
2537 gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
2538 GSI_SAME_STMT);
2540 update_ssa (TODO_update_ssa);