Merge from mainline (165734:167278).
[official-gcc/graphite-test-results.git] / gcc / tree-vect-loop-manip.c
blobeb7eada03c0cd2b2e941d101c6d2e9aa2698adff
1 /* Vectorizer Specific Loop Manipulations
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "basic-block.h"
30 #include "tree-pretty-print.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
34 #include "cfgloop.h"
35 #include "cfglayout.h"
36 #include "diagnostic-core.h"
37 #include "toplev.h"
38 #include "tree-scalar-evolution.h"
39 #include "tree-vectorizer.h"
40 #include "langhooks.h"
42 /*************************************************************************
43 Simple Loop Peeling Utilities
45 Utilities to support loop peeling for vectorization purposes.
46 *************************************************************************/
49 /* Renames the use *OP_P. */
51 static void
52 rename_use_op (use_operand_p op_p)
54 tree new_name;
56 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
57 return;
59 new_name = get_current_def (USE_FROM_PTR (op_p));
61 /* Something defined outside of the loop. */
62 if (!new_name)
63 return;
65 /* An ordinary ssa name defined in the loop. */
67 SET_USE (op_p, new_name);
71 /* Renames the variables in basic block BB. */
73 void
74 rename_variables_in_bb (basic_block bb)
76 gimple_stmt_iterator gsi;
77 gimple stmt;
78 use_operand_p use_p;
79 ssa_op_iter iter;
80 edge e;
81 edge_iterator ei;
82 struct loop *loop = bb->loop_father;
84 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
86 stmt = gsi_stmt (gsi);
87 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
88 rename_use_op (use_p);
91 FOR_EACH_EDGE (e, ei, bb->succs)
93 if (!flow_bb_inside_loop_p (loop, e->dest))
94 continue;
95 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
96 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi_stmt (gsi), e));
101 /* Renames variables in new generated LOOP. */
103 void
104 rename_variables_in_loop (struct loop *loop)
106 unsigned i;
107 basic_block *bbs;
109 bbs = get_loop_body (loop);
111 for (i = 0; i < loop->num_nodes; i++)
112 rename_variables_in_bb (bbs[i]);
114 free (bbs);
117 typedef struct
119 tree from, to;
120 basic_block bb;
121 } adjust_info;
123 DEF_VEC_O(adjust_info);
124 DEF_VEC_ALLOC_O_STACK(adjust_info);
125 #define VEC_adjust_info_stack_alloc(alloc) VEC_stack_alloc (adjust_info, alloc)
127 /* A stack of values to be adjusted in debug stmts. We have to
128 process them LIFO, so that the closest substitution applies. If we
129 processed them FIFO, without the stack, we might substitute uses
130 with a PHI DEF that would soon become non-dominant, and when we got
131 to the suitable one, it wouldn't have anything to substitute any
132 more. */
133 static VEC(adjust_info, stack) *adjust_vec;
135 /* Adjust any debug stmts that referenced AI->from values to use the
136 loop-closed AI->to, if the references are dominated by AI->bb and
137 not by the definition of AI->from. */
139 static void
140 adjust_debug_stmts_now (adjust_info *ai)
142 basic_block bbphi = ai->bb;
143 tree orig_def = ai->from;
144 tree new_def = ai->to;
145 imm_use_iterator imm_iter;
146 gimple stmt;
147 basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
149 gcc_assert (dom_info_available_p (CDI_DOMINATORS));
151 /* Adjust any debug stmts that held onto non-loop-closed
152 references. */
153 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
155 use_operand_p use_p;
156 basic_block bbuse;
158 if (!is_gimple_debug (stmt))
159 continue;
161 gcc_assert (gimple_debug_bind_p (stmt));
163 bbuse = gimple_bb (stmt);
165 if ((bbuse == bbphi
166 || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
167 && !(bbuse == bbdef
168 || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
170 if (new_def)
171 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
172 SET_USE (use_p, new_def);
173 else
175 gimple_debug_bind_reset_value (stmt);
176 update_stmt (stmt);
182 /* Adjust debug stmts as scheduled before. */
184 static void
185 adjust_vec_debug_stmts (void)
187 if (!MAY_HAVE_DEBUG_STMTS)
188 return;
190 gcc_assert (adjust_vec);
192 while (!VEC_empty (adjust_info, adjust_vec))
194 adjust_debug_stmts_now (VEC_last (adjust_info, adjust_vec));
195 VEC_pop (adjust_info, adjust_vec);
198 VEC_free (adjust_info, stack, adjust_vec);
201 /* Adjust any debug stmts that referenced FROM values to use the
202 loop-closed TO, if the references are dominated by BB and not by
203 the definition of FROM. If adjust_vec is non-NULL, adjustments
204 will be postponed until adjust_vec_debug_stmts is called. */
206 static void
207 adjust_debug_stmts (tree from, tree to, basic_block bb)
209 adjust_info ai;
211 if (MAY_HAVE_DEBUG_STMTS && TREE_CODE (from) == SSA_NAME
212 && SSA_NAME_VAR (from) != gimple_vop (cfun))
214 ai.from = from;
215 ai.to = to;
216 ai.bb = bb;
218 if (adjust_vec)
219 VEC_safe_push (adjust_info, stack, adjust_vec, &ai);
220 else
221 adjust_debug_stmts_now (&ai);
225 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
226 to adjust any debug stmts that referenced the old phi arg,
227 presumably non-loop-closed references left over from other
228 transformations. */
230 static void
231 adjust_phi_and_debug_stmts (gimple update_phi, edge e, tree new_def)
233 tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
235 SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
237 if (MAY_HAVE_DEBUG_STMTS)
238 adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
239 gimple_bb (update_phi));
243 /* Update the PHI nodes of NEW_LOOP.
245 NEW_LOOP is a duplicate of ORIG_LOOP.
246 AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
247 AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
248 executes before it. */
250 static void
251 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
252 struct loop *new_loop, bool after)
254 tree new_ssa_name;
255 gimple phi_new, phi_orig;
256 tree def;
257 edge orig_loop_latch = loop_latch_edge (orig_loop);
258 edge orig_entry_e = loop_preheader_edge (orig_loop);
259 edge new_loop_exit_e = single_exit (new_loop);
260 edge new_loop_entry_e = loop_preheader_edge (new_loop);
261 edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
262 gimple_stmt_iterator gsi_new, gsi_orig;
265 step 1. For each loop-header-phi:
266 Add the first phi argument for the phi in NEW_LOOP
267 (the one associated with the entry of NEW_LOOP)
269 step 2. For each loop-header-phi:
270 Add the second phi argument for the phi in NEW_LOOP
271 (the one associated with the latch of NEW_LOOP)
273 step 3. Update the phis in the successor block of NEW_LOOP.
275 case 1: NEW_LOOP was placed before ORIG_LOOP:
276 The successor block of NEW_LOOP is the header of ORIG_LOOP.
277 Updating the phis in the successor block can therefore be done
278 along with the scanning of the loop header phis, because the
279 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
280 phi nodes, organized in the same order.
282 case 2: NEW_LOOP was placed after ORIG_LOOP:
283 The successor block of NEW_LOOP is the original exit block of
284 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
285 We postpone updating these phis to a later stage (when
286 loop guards are added).
290 /* Scan the phis in the headers of the old and new loops
291 (they are organized in exactly the same order). */
293 for (gsi_new = gsi_start_phis (new_loop->header),
294 gsi_orig = gsi_start_phis (orig_loop->header);
295 !gsi_end_p (gsi_new) && !gsi_end_p (gsi_orig);
296 gsi_next (&gsi_new), gsi_next (&gsi_orig))
298 source_location locus;
299 phi_new = gsi_stmt (gsi_new);
300 phi_orig = gsi_stmt (gsi_orig);
302 /* step 1. */
303 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
304 locus = gimple_phi_arg_location_from_edge (phi_orig, entry_arg_e);
305 add_phi_arg (phi_new, def, new_loop_entry_e, locus);
307 /* step 2. */
308 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
309 locus = gimple_phi_arg_location_from_edge (phi_orig, orig_loop_latch);
310 if (TREE_CODE (def) != SSA_NAME)
311 continue;
313 new_ssa_name = get_current_def (def);
314 if (!new_ssa_name)
316 /* This only happens if there are no definitions
317 inside the loop. use the phi_result in this case. */
318 new_ssa_name = PHI_RESULT (phi_new);
321 /* An ordinary ssa name defined in the loop. */
322 add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop), locus);
324 /* Drop any debug references outside the loop, if they would
325 become ill-formed SSA. */
326 adjust_debug_stmts (def, NULL, single_exit (orig_loop)->dest);
328 /* step 3 (case 1). */
329 if (!after)
331 gcc_assert (new_loop_exit_e == orig_entry_e);
332 adjust_phi_and_debug_stmts (phi_orig, new_loop_exit_e, new_ssa_name);
338 /* Update PHI nodes for a guard of the LOOP.
340 Input:
341 - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
342 controls whether LOOP is to be executed. GUARD_EDGE is the edge that
343 originates from the guard-bb, skips LOOP and reaches the (unique) exit
344 bb of LOOP. This loop-exit-bb is an empty bb with one successor.
345 We denote this bb NEW_MERGE_BB because before the guard code was added
346 it had a single predecessor (the LOOP header), and now it became a merge
347 point of two paths - the path that ends with the LOOP exit-edge, and
348 the path that ends with GUARD_EDGE.
349 - NEW_EXIT_BB: New basic block that is added by this function between LOOP
350 and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
352 ===> The CFG before the guard-code was added:
353 LOOP_header_bb:
354 loop_body
355 if (exit_loop) goto update_bb
356 else goto LOOP_header_bb
357 update_bb:
359 ==> The CFG after the guard-code was added:
360 guard_bb:
361 if (LOOP_guard_condition) goto new_merge_bb
362 else goto LOOP_header_bb
363 LOOP_header_bb:
364 loop_body
365 if (exit_loop_condition) goto new_merge_bb
366 else goto LOOP_header_bb
367 new_merge_bb:
368 goto update_bb
369 update_bb:
371 ==> The CFG after this function:
372 guard_bb:
373 if (LOOP_guard_condition) goto new_merge_bb
374 else goto LOOP_header_bb
375 LOOP_header_bb:
376 loop_body
377 if (exit_loop_condition) goto new_exit_bb
378 else goto LOOP_header_bb
379 new_exit_bb:
380 new_merge_bb:
381 goto update_bb
382 update_bb:
384 This function:
385 1. creates and updates the relevant phi nodes to account for the new
386 incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
387 1.1. Create phi nodes at NEW_MERGE_BB.
388 1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
389 UPDATE_BB). UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
390 2. preserves loop-closed-ssa-form by creating the required phi nodes
391 at the exit of LOOP (i.e, in NEW_EXIT_BB).
393 There are two flavors to this function:
395 slpeel_update_phi_nodes_for_guard1:
396 Here the guard controls whether we enter or skip LOOP, where LOOP is a
397 prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
398 for variables that have phis in the loop header.
400 slpeel_update_phi_nodes_for_guard2:
401 Here the guard controls whether we enter or skip LOOP, where LOOP is an
402 epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
403 for variables that have phis in the loop exit.
405 I.E., the overall structure is:
407 loop1_preheader_bb:
408 guard1 (goto loop1/merge1_bb)
409 loop1
410 loop1_exit_bb:
411 guard2 (goto merge1_bb/merge2_bb)
412 merge1_bb
413 loop2
414 loop2_exit_bb
415 merge2_bb
416 next_bb
418 slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
419 loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
420 that have phis in loop1->header).
422 slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
423 loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
424 that have phis in next_bb). It also adds some of these phis to
425 loop1_exit_bb.
427 slpeel_update_phi_nodes_for_guard1 is always called before
428 slpeel_update_phi_nodes_for_guard2. They are both needed in order
429 to create correct data-flow and loop-closed-ssa-form.
431 Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
432 that change between iterations of a loop (and therefore have a phi-node
433 at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
434 phis for variables that are used out of the loop (and therefore have
435 loop-closed exit phis). Some variables may be both updated between
436 iterations and used after the loop. This is why in loop1_exit_bb we
437 may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
438 and exit phis (created by slpeel_update_phi_nodes_for_guard2).
440 - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
441 an original loop. i.e., we have:
443 orig_loop
444 guard_bb (goto LOOP/new_merge)
445 new_loop <-- LOOP
446 new_exit
447 new_merge
448 next_bb
450 If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
451 have:
453 new_loop
454 guard_bb (goto LOOP/new_merge)
455 orig_loop <-- LOOP
456 new_exit
457 new_merge
458 next_bb
460 The SSA names defined in the original loop have a current
461 reaching definition that that records the corresponding new
462 ssa-name used in the new duplicated loop copy.
465 /* Function slpeel_update_phi_nodes_for_guard1
467 Input:
468 - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
469 - DEFS - a bitmap of ssa names to mark new names for which we recorded
470 information.
472 In the context of the overall structure, we have:
474 loop1_preheader_bb:
475 guard1 (goto loop1/merge1_bb)
476 LOOP-> loop1
477 loop1_exit_bb:
478 guard2 (goto merge1_bb/merge2_bb)
479 merge1_bb
480 loop2
481 loop2_exit_bb
482 merge2_bb
483 next_bb
485 For each name updated between loop iterations (i.e - for each name that has
486 an entry (loop-header) phi in LOOP) we create a new phi in:
487 1. merge1_bb (to account for the edge from guard1)
488 2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
491 static void
492 slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
493 bool is_new_loop, basic_block *new_exit_bb,
494 bitmap *defs)
496 gimple orig_phi, new_phi;
497 gimple update_phi, update_phi2;
498 tree guard_arg, loop_arg;
499 basic_block new_merge_bb = guard_edge->dest;
500 edge e = EDGE_SUCC (new_merge_bb, 0);
501 basic_block update_bb = e->dest;
502 basic_block orig_bb = loop->header;
503 edge new_exit_e;
504 tree current_new_name;
505 gimple_stmt_iterator gsi_orig, gsi_update;
507 /* Create new bb between loop and new_merge_bb. */
508 *new_exit_bb = split_edge (single_exit (loop));
510 new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
512 for (gsi_orig = gsi_start_phis (orig_bb),
513 gsi_update = gsi_start_phis (update_bb);
514 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
515 gsi_next (&gsi_orig), gsi_next (&gsi_update))
517 source_location loop_locus, guard_locus;;
518 orig_phi = gsi_stmt (gsi_orig);
519 update_phi = gsi_stmt (gsi_update);
521 /** 1. Handle new-merge-point phis **/
523 /* 1.1. Generate new phi node in NEW_MERGE_BB: */
524 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
525 new_merge_bb);
527 /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
528 of LOOP. Set the two phi args in NEW_PHI for these edges: */
529 loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
530 loop_locus = gimple_phi_arg_location_from_edge (orig_phi,
531 EDGE_SUCC (loop->latch,
532 0));
533 guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
534 guard_locus
535 = gimple_phi_arg_location_from_edge (orig_phi,
536 loop_preheader_edge (loop));
538 add_phi_arg (new_phi, loop_arg, new_exit_e, loop_locus);
539 add_phi_arg (new_phi, guard_arg, guard_edge, guard_locus);
541 /* 1.3. Update phi in successor block. */
542 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
543 || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
544 adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
545 update_phi2 = new_phi;
548 /** 2. Handle loop-closed-ssa-form phis **/
550 if (!is_gimple_reg (PHI_RESULT (orig_phi)))
551 continue;
553 /* 2.1. Generate new phi node in NEW_EXIT_BB: */
554 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
555 *new_exit_bb);
557 /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */
558 add_phi_arg (new_phi, loop_arg, single_exit (loop), loop_locus);
560 /* 2.3. Update phi in successor of NEW_EXIT_BB: */
561 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
562 adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
563 PHI_RESULT (new_phi));
565 /* 2.4. Record the newly created name with set_current_def.
566 We want to find a name such that
567 name = get_current_def (orig_loop_name)
568 and to set its current definition as follows:
569 set_current_def (name, new_phi_name)
571 If LOOP is a new loop then loop_arg is already the name we're
572 looking for. If LOOP is the original loop, then loop_arg is
573 the orig_loop_name and the relevant name is recorded in its
574 current reaching definition. */
575 if (is_new_loop)
576 current_new_name = loop_arg;
577 else
579 current_new_name = get_current_def (loop_arg);
580 /* current_def is not available only if the variable does not
581 change inside the loop, in which case we also don't care
582 about recording a current_def for it because we won't be
583 trying to create loop-exit-phis for it. */
584 if (!current_new_name)
585 continue;
587 gcc_assert (get_current_def (current_new_name) == NULL_TREE);
589 set_current_def (current_new_name, PHI_RESULT (new_phi));
590 bitmap_set_bit (*defs, SSA_NAME_VERSION (current_new_name));
595 /* Function slpeel_update_phi_nodes_for_guard2
597 Input:
598 - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
600 In the context of the overall structure, we have:
602 loop1_preheader_bb:
603 guard1 (goto loop1/merge1_bb)
604 loop1
605 loop1_exit_bb:
606 guard2 (goto merge1_bb/merge2_bb)
607 merge1_bb
608 LOOP-> loop2
609 loop2_exit_bb
610 merge2_bb
611 next_bb
613 For each name used out side the loop (i.e - for each name that has an exit
614 phi in next_bb) we create a new phi in:
615 1. merge2_bb (to account for the edge from guard_bb)
616 2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
617 3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
618 if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
621 static void
622 slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
623 bool is_new_loop, basic_block *new_exit_bb)
625 gimple orig_phi, new_phi;
626 gimple update_phi, update_phi2;
627 tree guard_arg, loop_arg;
628 basic_block new_merge_bb = guard_edge->dest;
629 edge e = EDGE_SUCC (new_merge_bb, 0);
630 basic_block update_bb = e->dest;
631 edge new_exit_e;
632 tree orig_def, orig_def_new_name;
633 tree new_name, new_name2;
634 tree arg;
635 gimple_stmt_iterator gsi;
637 /* Create new bb between loop and new_merge_bb. */
638 *new_exit_bb = split_edge (single_exit (loop));
640 new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
642 for (gsi = gsi_start_phis (update_bb); !gsi_end_p (gsi); gsi_next (&gsi))
644 update_phi = gsi_stmt (gsi);
645 orig_phi = update_phi;
646 orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
647 /* This loop-closed-phi actually doesn't represent a use
648 out of the loop - the phi arg is a constant. */
649 if (TREE_CODE (orig_def) != SSA_NAME)
650 continue;
651 orig_def_new_name = get_current_def (orig_def);
652 arg = NULL_TREE;
654 /** 1. Handle new-merge-point phis **/
656 /* 1.1. Generate new phi node in NEW_MERGE_BB: */
657 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
658 new_merge_bb);
660 /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
661 of LOOP. Set the two PHI args in NEW_PHI for these edges: */
662 new_name = orig_def;
663 new_name2 = NULL_TREE;
664 if (orig_def_new_name)
666 new_name = orig_def_new_name;
667 /* Some variables have both loop-entry-phis and loop-exit-phis.
668 Such variables were given yet newer names by phis placed in
669 guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
670 new_name2 = get_current_def (get_current_def (orig_name)). */
671 new_name2 = get_current_def (new_name);
674 if (is_new_loop)
676 guard_arg = orig_def;
677 loop_arg = new_name;
679 else
681 guard_arg = new_name;
682 loop_arg = orig_def;
684 if (new_name2)
685 guard_arg = new_name2;
687 add_phi_arg (new_phi, loop_arg, new_exit_e, UNKNOWN_LOCATION);
688 add_phi_arg (new_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
690 /* 1.3. Update phi in successor block. */
691 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
692 adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
693 update_phi2 = new_phi;
696 /** 2. Handle loop-closed-ssa-form phis **/
698 /* 2.1. Generate new phi node in NEW_EXIT_BB: */
699 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
700 *new_exit_bb);
702 /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */
703 add_phi_arg (new_phi, loop_arg, single_exit (loop), UNKNOWN_LOCATION);
705 /* 2.3. Update phi in successor of NEW_EXIT_BB: */
706 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
707 adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
708 PHI_RESULT (new_phi));
711 /** 3. Handle loop-closed-ssa-form phis for first loop **/
713 /* 3.1. Find the relevant names that need an exit-phi in
714 GUARD_BB, i.e. names for which
715 slpeel_update_phi_nodes_for_guard1 had not already created a
716 phi node. This is the case for names that are used outside
717 the loop (and therefore need an exit phi) but are not updated
718 across loop iterations (and therefore don't have a
719 loop-header-phi).
721 slpeel_update_phi_nodes_for_guard1 is responsible for
722 creating loop-exit phis in GUARD_BB for names that have a
723 loop-header-phi. When such a phi is created we also record
724 the new name in its current definition. If this new name
725 exists, then guard_arg was set to this new name (see 1.2
726 above). Therefore, if guard_arg is not this new name, this
727 is an indication that an exit-phi in GUARD_BB was not yet
728 created, so we take care of it here. */
729 if (guard_arg == new_name2)
730 continue;
731 arg = guard_arg;
733 /* 3.2. Generate new phi node in GUARD_BB: */
734 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
735 guard_edge->src);
737 /* 3.3. GUARD_BB has one incoming edge: */
738 gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
739 add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0),
740 UNKNOWN_LOCATION);
742 /* 3.4. Update phi in successor of GUARD_BB: */
743 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
744 == guard_arg);
745 adjust_phi_and_debug_stmts (update_phi2, guard_edge,
746 PHI_RESULT (new_phi));
751 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
752 that starts at zero, increases by one and its limit is NITERS.
754 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
756 void
757 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
759 tree indx_before_incr, indx_after_incr;
760 gimple cond_stmt;
761 gimple orig_cond;
762 edge exit_edge = single_exit (loop);
763 gimple_stmt_iterator loop_cond_gsi;
764 gimple_stmt_iterator incr_gsi;
765 bool insert_after;
766 tree init = build_int_cst (TREE_TYPE (niters), 0);
767 tree step = build_int_cst (TREE_TYPE (niters), 1);
768 LOC loop_loc;
769 enum tree_code code;
771 orig_cond = get_loop_exit_condition (loop);
772 gcc_assert (orig_cond);
773 loop_cond_gsi = gsi_for_stmt (orig_cond);
775 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
776 create_iv (init, step, NULL_TREE, loop,
777 &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
779 indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
780 true, NULL_TREE, true,
781 GSI_SAME_STMT);
782 niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
783 true, GSI_SAME_STMT);
785 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
786 cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
787 NULL_TREE);
789 gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
791 /* Remove old loop exit test: */
792 gsi_remove (&loop_cond_gsi, true);
794 loop_loc = find_loop_location (loop);
795 if (dump_file && (dump_flags & TDF_DETAILS))
797 if (loop_loc != UNKNOWN_LOC)
798 fprintf (dump_file, "\nloop at %s:%d: ",
799 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
800 print_gimple_stmt (dump_file, cond_stmt, 0, TDF_SLIM);
803 loop->nb_iterations = niters;
807 /* Given LOOP this function generates a new copy of it and puts it
808 on E which is either the entry or exit of LOOP. */
810 struct loop *
811 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, edge e)
813 struct loop *new_loop;
814 basic_block *new_bbs, *bbs;
815 bool at_exit;
816 bool was_imm_dom;
817 basic_block exit_dest;
818 gimple phi;
819 tree phi_arg;
820 edge exit, new_exit;
821 gimple_stmt_iterator gsi;
823 at_exit = (e == single_exit (loop));
824 if (!at_exit && e != loop_preheader_edge (loop))
825 return NULL;
827 bbs = get_loop_body (loop);
829 /* Check whether duplication is possible. */
830 if (!can_copy_bbs_p (bbs, loop->num_nodes))
832 free (bbs);
833 return NULL;
836 /* Generate new loop structure. */
837 new_loop = duplicate_loop (loop, loop_outer (loop));
838 if (!new_loop)
840 free (bbs);
841 return NULL;
844 exit_dest = single_exit (loop)->dest;
845 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
846 exit_dest) == loop->header ?
847 true : false);
849 new_bbs = XNEWVEC (basic_block, loop->num_nodes);
851 exit = single_exit (loop);
852 copy_bbs (bbs, loop->num_nodes, new_bbs,
853 &exit, 1, &new_exit, NULL,
854 e->src);
856 /* Duplicating phi args at exit bbs as coming
857 also from exit of duplicated loop. */
858 for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi); gsi_next (&gsi))
860 phi = gsi_stmt (gsi);
861 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, single_exit (loop));
862 if (phi_arg)
864 edge new_loop_exit_edge;
865 source_location locus;
867 locus = gimple_phi_arg_location_from_edge (phi, single_exit (loop));
868 if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
869 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
870 else
871 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
873 add_phi_arg (phi, phi_arg, new_loop_exit_edge, locus);
877 if (at_exit) /* Add the loop copy at exit. */
879 redirect_edge_and_branch_force (e, new_loop->header);
880 PENDING_STMT (e) = NULL;
881 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
882 if (was_imm_dom)
883 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
885 else /* Add the copy at entry. */
887 edge new_exit_e;
888 edge entry_e = loop_preheader_edge (loop);
889 basic_block preheader = entry_e->src;
891 if (!flow_bb_inside_loop_p (new_loop,
892 EDGE_SUCC (new_loop->header, 0)->dest))
893 new_exit_e = EDGE_SUCC (new_loop->header, 0);
894 else
895 new_exit_e = EDGE_SUCC (new_loop->header, 1);
897 redirect_edge_and_branch_force (new_exit_e, loop->header);
898 PENDING_STMT (new_exit_e) = NULL;
899 set_immediate_dominator (CDI_DOMINATORS, loop->header,
900 new_exit_e->src);
902 /* We have to add phi args to the loop->header here as coming
903 from new_exit_e edge. */
904 for (gsi = gsi_start_phis (loop->header);
905 !gsi_end_p (gsi);
906 gsi_next (&gsi))
908 phi = gsi_stmt (gsi);
909 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
910 if (phi_arg)
911 add_phi_arg (phi, phi_arg, new_exit_e,
912 gimple_phi_arg_location_from_edge (phi, entry_e));
915 redirect_edge_and_branch_force (entry_e, new_loop->header);
916 PENDING_STMT (entry_e) = NULL;
917 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
920 free (new_bbs);
921 free (bbs);
923 return new_loop;
927 /* Given the condition statement COND, put it as the last statement
928 of GUARD_BB; EXIT_BB is the basic block to skip the loop;
929 Assumes that this is the single exit of the guarded loop.
930 Returns the skip edge, inserts new stmts on the COND_EXPR_STMT_LIST. */
932 static edge
933 slpeel_add_loop_guard (basic_block guard_bb, tree cond,
934 gimple_seq cond_expr_stmt_list,
935 basic_block exit_bb, basic_block dom_bb)
937 gimple_stmt_iterator gsi;
938 edge new_e, enter_e;
939 gimple cond_stmt;
940 gimple_seq gimplify_stmt_list = NULL;
942 enter_e = EDGE_SUCC (guard_bb, 0);
943 enter_e->flags &= ~EDGE_FALLTHRU;
944 enter_e->flags |= EDGE_FALSE_VALUE;
945 gsi = gsi_last_bb (guard_bb);
947 cond = force_gimple_operand (cond, &gimplify_stmt_list, true, NULL_TREE);
948 if (gimplify_stmt_list)
949 gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
950 cond_stmt = gimple_build_cond (NE_EXPR,
951 cond, build_int_cst (TREE_TYPE (cond), 0),
952 NULL_TREE, NULL_TREE);
953 if (cond_expr_stmt_list)
954 gsi_insert_seq_after (&gsi, cond_expr_stmt_list, GSI_NEW_STMT);
956 gsi = gsi_last_bb (guard_bb);
957 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
959 /* Add new edge to connect guard block to the merge/loop-exit block. */
960 new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
961 set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
962 return new_e;
966 /* This function verifies that the following restrictions apply to LOOP:
967 (1) it is innermost
968 (2) it consists of exactly 2 basic blocks - header, and an empty latch.
969 (3) it is single entry, single exit
970 (4) its exit condition is the last stmt in the header
971 (5) E is the entry/exit edge of LOOP.
974 bool
975 slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
977 edge exit_e = single_exit (loop);
978 edge entry_e = loop_preheader_edge (loop);
979 gimple orig_cond = get_loop_exit_condition (loop);
980 gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
982 if (need_ssa_update_p (cfun))
983 return false;
985 if (loop->inner
986 /* All loops have an outer scope; the only case loop->outer is NULL is for
987 the function itself. */
988 || !loop_outer (loop)
989 || loop->num_nodes != 2
990 || !empty_block_p (loop->latch)
991 || !single_exit (loop)
992 /* Verify that new loop exit condition can be trivially modified. */
993 || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
994 || (e != exit_e && e != entry_e))
995 return false;
997 return true;
1000 #ifdef ENABLE_CHECKING
1001 static void
1002 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
1003 struct loop *second_loop)
1005 basic_block loop1_exit_bb = single_exit (first_loop)->dest;
1006 basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
1007 basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
1009 /* A guard that controls whether the second_loop is to be executed or skipped
1010 is placed in first_loop->exit. first_loop->exit therefore has two
1011 successors - one is the preheader of second_loop, and the other is a bb
1012 after second_loop.
1014 gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
1016 /* 1. Verify that one of the successors of first_loop->exit is the preheader
1017 of second_loop. */
1019 /* The preheader of new_loop is expected to have two predecessors:
1020 first_loop->exit and the block that precedes first_loop. */
1022 gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
1023 && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
1024 && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
1025 || (EDGE_PRED (loop2_entry_bb, 1)->src == loop1_exit_bb
1026 && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
1028 /* Verify that the other successor of first_loop->exit is after the
1029 second_loop. */
1030 /* TODO */
1032 #endif
1034 /* If the run time cost model check determines that vectorization is
1035 not profitable and hence scalar loop should be generated then set
1036 FIRST_NITERS to prologue peeled iterations. This will allow all the
1037 iterations to be executed in the prologue peeled scalar loop. */
1039 static void
1040 set_prologue_iterations (basic_block bb_before_first_loop,
1041 tree first_niters,
1042 struct loop *loop,
1043 unsigned int th)
1045 edge e;
1046 basic_block cond_bb, then_bb;
1047 tree var, prologue_after_cost_adjust_name;
1048 gimple_stmt_iterator gsi;
1049 gimple newphi;
1050 edge e_true, e_false, e_fallthru;
1051 gimple cond_stmt;
1052 gimple_seq gimplify_stmt_list = NULL, stmts = NULL;
1053 tree cost_pre_condition = NULL_TREE;
1054 tree scalar_loop_iters =
1055 unshare_expr (LOOP_VINFO_NITERS_UNCHANGED (loop_vec_info_for_loop (loop)));
1057 e = single_pred_edge (bb_before_first_loop);
1058 cond_bb = split_edge(e);
1060 e = single_pred_edge (bb_before_first_loop);
1061 then_bb = split_edge(e);
1062 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
1064 e_false = make_single_succ_edge (cond_bb, bb_before_first_loop,
1065 EDGE_FALSE_VALUE);
1066 set_immediate_dominator (CDI_DOMINATORS, bb_before_first_loop, cond_bb);
1068 e_true = EDGE_PRED (then_bb, 0);
1069 e_true->flags &= ~EDGE_FALLTHRU;
1070 e_true->flags |= EDGE_TRUE_VALUE;
1072 e_fallthru = EDGE_SUCC (then_bb, 0);
1074 cost_pre_condition =
1075 fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
1076 build_int_cst (TREE_TYPE (scalar_loop_iters), th));
1077 cost_pre_condition =
1078 force_gimple_operand (cost_pre_condition, &gimplify_stmt_list,
1079 true, NULL_TREE);
1080 cond_stmt = gimple_build_cond (NE_EXPR, cost_pre_condition,
1081 build_int_cst (TREE_TYPE (cost_pre_condition),
1082 0), NULL_TREE, NULL_TREE);
1084 gsi = gsi_last_bb (cond_bb);
1085 if (gimplify_stmt_list)
1086 gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
1088 gsi = gsi_last_bb (cond_bb);
1089 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
1091 var = create_tmp_var (TREE_TYPE (scalar_loop_iters),
1092 "prologue_after_cost_adjust");
1093 add_referenced_var (var);
1094 prologue_after_cost_adjust_name =
1095 force_gimple_operand (scalar_loop_iters, &stmts, false, var);
1097 gsi = gsi_last_bb (then_bb);
1098 if (stmts)
1099 gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
1101 newphi = create_phi_node (var, bb_before_first_loop);
1102 add_phi_arg (newphi, prologue_after_cost_adjust_name, e_fallthru,
1103 UNKNOWN_LOCATION);
1104 add_phi_arg (newphi, first_niters, e_false, UNKNOWN_LOCATION);
1106 first_niters = PHI_RESULT (newphi);
1110 /* Remove dead assignments from loop NEW_LOOP. */
1112 static void
1113 remove_dead_stmts_from_loop (struct loop *new_loop)
1115 basic_block *bbs = get_loop_body (new_loop);
1116 unsigned i;
1117 for (i = 0; i < new_loop->num_nodes; ++i)
1119 gimple_stmt_iterator gsi;
1120 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi);)
1122 gimple stmt = gsi_stmt (gsi);
1123 if (is_gimple_assign (stmt)
1124 && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
1125 && has_zero_uses (gimple_assign_lhs (stmt)))
1127 gsi_remove (&gsi, true);
1128 release_defs (stmt);
1130 else
1131 gsi_next (&gsi);
1134 free (bbs);
1138 /* Function slpeel_tree_peel_loop_to_edge.
1140 Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
1141 that is placed on the entry (exit) edge E of LOOP. After this transformation
1142 we have two loops one after the other - first-loop iterates FIRST_NITERS
1143 times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
1144 If the cost model indicates that it is profitable to emit a scalar
1145 loop instead of the vector one, then the prolog (epilog) loop will iterate
1146 for the entire unchanged scalar iterations of the loop.
1148 Input:
1149 - LOOP: the loop to be peeled.
1150 - E: the exit or entry edge of LOOP.
1151 If it is the entry edge, we peel the first iterations of LOOP. In this
1152 case first-loop is LOOP, and second-loop is the newly created loop.
1153 If it is the exit edge, we peel the last iterations of LOOP. In this
1154 case, first-loop is the newly created loop, and second-loop is LOOP.
1155 - NITERS: the number of iterations that LOOP iterates.
1156 - FIRST_NITERS: the number of iterations that the first-loop should iterate.
1157 - UPDATE_FIRST_LOOP_COUNT: specified whether this function is responsible
1158 for updating the loop bound of the first-loop to FIRST_NITERS. If it
1159 is false, the caller of this function may want to take care of this
1160 (this can be useful if we don't want new stmts added to first-loop).
1161 - TH: cost model profitability threshold of iterations for vectorization.
1162 - CHECK_PROFITABILITY: specify whether cost model check has not occurred
1163 during versioning and hence needs to occur during
1164 prologue generation or whether cost model check
1165 has not occurred during prologue generation and hence
1166 needs to occur during epilogue generation.
1169 Output:
1170 The function returns a pointer to the new loop-copy, or NULL if it failed
1171 to perform the transformation.
1173 The function generates two if-then-else guards: one before the first loop,
1174 and the other before the second loop:
1175 The first guard is:
1176 if (FIRST_NITERS == 0) then skip the first loop,
1177 and go directly to the second loop.
1178 The second guard is:
1179 if (FIRST_NITERS == NITERS) then skip the second loop.
1181 If the optional COND_EXPR and COND_EXPR_STMT_LIST arguments are given
1182 then the generated condition is combined with COND_EXPR and the
1183 statements in COND_EXPR_STMT_LIST are emitted together with it.
1185 FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1186 FORNOW the resulting code will not be in loop-closed-ssa form.
1189 static struct loop*
1190 slpeel_tree_peel_loop_to_edge (struct loop *loop,
1191 edge e, tree first_niters,
1192 tree niters, bool update_first_loop_count,
1193 unsigned int th, bool check_profitability,
1194 tree cond_expr, gimple_seq cond_expr_stmt_list)
1196 struct loop *new_loop = NULL, *first_loop, *second_loop;
1197 edge skip_e;
1198 tree pre_condition = NULL_TREE;
1199 bitmap definitions;
1200 basic_block bb_before_second_loop, bb_after_second_loop;
1201 basic_block bb_before_first_loop;
1202 basic_block bb_between_loops;
1203 basic_block new_exit_bb;
1204 edge exit_e = single_exit (loop);
1205 LOC loop_loc;
1206 tree cost_pre_condition = NULL_TREE;
1208 if (!slpeel_can_duplicate_loop_p (loop, e))
1209 return NULL;
1211 /* We have to initialize cfg_hooks. Then, when calling
1212 cfg_hooks->split_edge, the function tree_split_edge
1213 is actually called and, when calling cfg_hooks->duplicate_block,
1214 the function tree_duplicate_bb is called. */
1215 gimple_register_cfg_hooks ();
1218 /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1219 Resulting CFG would be:
1221 first_loop:
1222 do {
1223 } while ...
1225 second_loop:
1226 do {
1227 } while ...
1229 orig_exit_bb:
1232 if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, e)))
1234 loop_loc = find_loop_location (loop);
1235 if (dump_file && (dump_flags & TDF_DETAILS))
1237 if (loop_loc != UNKNOWN_LOC)
1238 fprintf (dump_file, "\n%s:%d: note: ",
1239 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
1240 fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
1242 return NULL;
1245 if (MAY_HAVE_DEBUG_STMTS)
1247 gcc_assert (!adjust_vec);
1248 adjust_vec = VEC_alloc (adjust_info, stack, 32);
1251 if (e == exit_e)
1253 /* NEW_LOOP was placed after LOOP. */
1254 first_loop = loop;
1255 second_loop = new_loop;
1257 else
1259 /* NEW_LOOP was placed before LOOP. */
1260 first_loop = new_loop;
1261 second_loop = loop;
1264 definitions = ssa_names_to_replace ();
1265 slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1266 rename_variables_in_loop (new_loop);
1269 /* 2. Add the guard code in one of the following ways:
1271 2.a Add the guard that controls whether the first loop is executed.
1272 This occurs when this function is invoked for prologue or epilogue
1273 generation and when the cost model check can be done at compile time.
1275 Resulting CFG would be:
1277 bb_before_first_loop:
1278 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1279 GOTO first-loop
1281 first_loop:
1282 do {
1283 } while ...
1285 bb_before_second_loop:
1287 second_loop:
1288 do {
1289 } while ...
1291 orig_exit_bb:
1293 2.b Add the cost model check that allows the prologue
1294 to iterate for the entire unchanged scalar
1295 iterations of the loop in the event that the cost
1296 model indicates that the scalar loop is more
1297 profitable than the vector one. This occurs when
1298 this function is invoked for prologue generation
1299 and the cost model check needs to be done at run
1300 time.
1302 Resulting CFG after prologue peeling would be:
1304 if (scalar_loop_iterations <= th)
1305 FIRST_NITERS = scalar_loop_iterations
1307 bb_before_first_loop:
1308 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1309 GOTO first-loop
1311 first_loop:
1312 do {
1313 } while ...
1315 bb_before_second_loop:
1317 second_loop:
1318 do {
1319 } while ...
1321 orig_exit_bb:
1323 2.c Add the cost model check that allows the epilogue
1324 to iterate for the entire unchanged scalar
1325 iterations of the loop in the event that the cost
1326 model indicates that the scalar loop is more
1327 profitable than the vector one. This occurs when
1328 this function is invoked for epilogue generation
1329 and the cost model check needs to be done at run
1330 time. This check is combined with any pre-existing
1331 check in COND_EXPR to avoid versioning.
1333 Resulting CFG after prologue peeling would be:
1335 bb_before_first_loop:
1336 if ((scalar_loop_iterations <= th)
1338 FIRST_NITERS == 0) GOTO bb_before_second_loop
1339 GOTO first-loop
1341 first_loop:
1342 do {
1343 } while ...
1345 bb_before_second_loop:
1347 second_loop:
1348 do {
1349 } while ...
1351 orig_exit_bb:
1354 bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1355 bb_before_second_loop = split_edge (single_exit (first_loop));
1357 /* Epilogue peeling. */
1358 if (!update_first_loop_count)
1360 pre_condition =
1361 fold_build2 (LE_EXPR, boolean_type_node, first_niters,
1362 build_int_cst (TREE_TYPE (first_niters), 0));
1363 if (check_profitability)
1365 tree scalar_loop_iters
1366 = unshare_expr (LOOP_VINFO_NITERS_UNCHANGED
1367 (loop_vec_info_for_loop (loop)));
1368 cost_pre_condition =
1369 fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
1370 build_int_cst (TREE_TYPE (scalar_loop_iters), th));
1372 pre_condition = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1373 cost_pre_condition, pre_condition);
1375 if (cond_expr)
1377 pre_condition =
1378 fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1379 pre_condition,
1380 fold_build1 (TRUTH_NOT_EXPR, boolean_type_node,
1381 cond_expr));
1385 /* Prologue peeling. */
1386 else
1388 if (check_profitability)
1389 set_prologue_iterations (bb_before_first_loop, first_niters,
1390 loop, th);
1392 pre_condition =
1393 fold_build2 (LE_EXPR, boolean_type_node, first_niters,
1394 build_int_cst (TREE_TYPE (first_niters), 0));
1397 skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1398 cond_expr_stmt_list,
1399 bb_before_second_loop, bb_before_first_loop);
1400 slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
1401 first_loop == new_loop,
1402 &new_exit_bb, &definitions);
1405 /* 3. Add the guard that controls whether the second loop is executed.
1406 Resulting CFG would be:
1408 bb_before_first_loop:
1409 if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1410 GOTO first-loop
1412 first_loop:
1413 do {
1414 } while ...
1416 bb_between_loops:
1417 if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1418 GOTO bb_before_second_loop
1420 bb_before_second_loop:
1422 second_loop:
1423 do {
1424 } while ...
1426 bb_after_second_loop:
1428 orig_exit_bb:
1431 bb_between_loops = new_exit_bb;
1432 bb_after_second_loop = split_edge (single_exit (second_loop));
1434 pre_condition =
1435 fold_build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
1436 skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, NULL,
1437 bb_after_second_loop, bb_before_first_loop);
1438 slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
1439 second_loop == new_loop, &new_exit_bb);
1441 /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1443 if (update_first_loop_count)
1444 slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1446 /* Remove all pattern statements from the loop copy. They will confuse
1447 the expander if DCE is disabled.
1448 ??? The pattern recognizer should be split into an analysis and
1449 a transformation phase that is then run only on the loop that is
1450 going to be transformed. */
1451 remove_dead_stmts_from_loop (new_loop);
1453 adjust_vec_debug_stmts ();
1455 BITMAP_FREE (definitions);
1456 delete_update_ssa ();
1458 return new_loop;
1461 /* Function vect_get_loop_location.
1463 Extract the location of the loop in the source code.
1464 If the loop is not well formed for vectorization, an estimated
1465 location is calculated.
1466 Return the loop location if succeed and NULL if not. */
1469 find_loop_location (struct loop *loop)
1471 gimple stmt = NULL;
1472 basic_block bb;
1473 gimple_stmt_iterator si;
1475 if (!loop)
1476 return UNKNOWN_LOC;
1478 stmt = get_loop_exit_condition (loop);
1480 if (stmt && gimple_location (stmt) != UNKNOWN_LOC)
1481 return gimple_location (stmt);
1483 /* If we got here the loop is probably not "well formed",
1484 try to estimate the loop location */
1486 if (!loop->header)
1487 return UNKNOWN_LOC;
1489 bb = loop->header;
1491 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1493 stmt = gsi_stmt (si);
1494 if (gimple_location (stmt) != UNKNOWN_LOC)
1495 return gimple_location (stmt);
1498 return UNKNOWN_LOC;
1502 /* This function builds ni_name = number of iterations loop executes
1503 on the loop preheader. If SEQ is given the stmt is instead emitted
1504 there. */
1506 static tree
1507 vect_build_loop_niters (loop_vec_info loop_vinfo, gimple_seq seq)
1509 tree ni_name, var;
1510 gimple_seq stmts = NULL;
1511 edge pe;
1512 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1513 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1515 var = create_tmp_var (TREE_TYPE (ni), "niters");
1516 add_referenced_var (var);
1517 ni_name = force_gimple_operand (ni, &stmts, false, var);
1519 pe = loop_preheader_edge (loop);
1520 if (stmts)
1522 if (seq)
1523 gimple_seq_add_seq (&seq, stmts);
1524 else
1526 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1527 gcc_assert (!new_bb);
1531 return ni_name;
1535 /* This function generates the following statements:
1537 ni_name = number of iterations loop executes
1538 ratio = ni_name / vf
1539 ratio_mult_vf_name = ratio * vf
1541 and places them at the loop preheader edge or in COND_EXPR_STMT_LIST
1542 if that is non-NULL. */
1544 static void
1545 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
1546 tree *ni_name_ptr,
1547 tree *ratio_mult_vf_name_ptr,
1548 tree *ratio_name_ptr,
1549 gimple_seq cond_expr_stmt_list)
1552 edge pe;
1553 basic_block new_bb;
1554 gimple_seq stmts;
1555 tree ni_name;
1556 tree var;
1557 tree ratio_name;
1558 tree ratio_mult_vf_name;
1559 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1560 tree ni = LOOP_VINFO_NITERS (loop_vinfo);
1561 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1562 tree log_vf;
1564 pe = loop_preheader_edge (loop);
1566 /* Generate temporary variable that contains
1567 number of iterations loop executes. */
1569 ni_name = vect_build_loop_niters (loop_vinfo, cond_expr_stmt_list);
1570 log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
1572 /* Create: ratio = ni >> log2(vf) */
1574 ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf);
1575 if (!is_gimple_val (ratio_name))
1577 var = create_tmp_var (TREE_TYPE (ni), "bnd");
1578 add_referenced_var (var);
1580 stmts = NULL;
1581 ratio_name = force_gimple_operand (ratio_name, &stmts, true, var);
1582 if (cond_expr_stmt_list)
1583 gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
1584 else
1586 pe = loop_preheader_edge (loop);
1587 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1588 gcc_assert (!new_bb);
1592 /* Create: ratio_mult_vf = ratio << log2 (vf). */
1594 ratio_mult_vf_name = fold_build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name),
1595 ratio_name, log_vf);
1596 if (!is_gimple_val (ratio_mult_vf_name))
1598 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
1599 add_referenced_var (var);
1601 stmts = NULL;
1602 ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts,
1603 true, var);
1604 if (cond_expr_stmt_list)
1605 gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
1606 else
1608 pe = loop_preheader_edge (loop);
1609 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1610 gcc_assert (!new_bb);
1614 *ni_name_ptr = ni_name;
1615 *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
1616 *ratio_name_ptr = ratio_name;
1618 return;
1621 /* Function vect_can_advance_ivs_p
1623 In case the number of iterations that LOOP iterates is unknown at compile
1624 time, an epilog loop will be generated, and the loop induction variables
1625 (IVs) will be "advanced" to the value they are supposed to take just before
1626 the epilog loop. Here we check that the access function of the loop IVs
1627 and the expression that represents the loop bound are simple enough.
1628 These restrictions will be relaxed in the future. */
1630 bool
1631 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1633 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1634 basic_block bb = loop->header;
1635 gimple phi;
1636 gimple_stmt_iterator gsi;
1638 /* Analyze phi functions of the loop header. */
1640 if (vect_print_dump_info (REPORT_DETAILS))
1641 fprintf (vect_dump, "vect_can_advance_ivs_p:");
1643 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1645 tree access_fn = NULL;
1646 tree evolution_part;
1648 phi = gsi_stmt (gsi);
1649 if (vect_print_dump_info (REPORT_DETAILS))
1651 fprintf (vect_dump, "Analyze phi: ");
1652 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
1655 /* Skip virtual phi's. The data dependences that are associated with
1656 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
1658 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
1660 if (vect_print_dump_info (REPORT_DETAILS))
1661 fprintf (vect_dump, "virtual phi. skip.");
1662 continue;
1665 /* Skip reduction phis. */
1667 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1669 if (vect_print_dump_info (REPORT_DETAILS))
1670 fprintf (vect_dump, "reduc phi. skip.");
1671 continue;
1674 /* Analyze the evolution function. */
1676 access_fn = instantiate_parameters
1677 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1679 if (!access_fn)
1681 if (vect_print_dump_info (REPORT_DETAILS))
1682 fprintf (vect_dump, "No Access function.");
1683 return false;
1686 if (vect_print_dump_info (REPORT_DETAILS))
1688 fprintf (vect_dump, "Access function of PHI: ");
1689 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
1692 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
1694 if (evolution_part == NULL_TREE)
1696 if (vect_print_dump_info (REPORT_DETAILS))
1697 fprintf (vect_dump, "No evolution.");
1698 return false;
1701 /* FORNOW: We do not transform initial conditions of IVs
1702 which evolution functions are a polynomial of degree >= 2. */
1704 if (tree_is_chrec (evolution_part))
1705 return false;
1708 return true;
1712 /* Function vect_update_ivs_after_vectorizer.
1714 "Advance" the induction variables of LOOP to the value they should take
1715 after the execution of LOOP. This is currently necessary because the
1716 vectorizer does not handle induction variables that are used after the
1717 loop. Such a situation occurs when the last iterations of LOOP are
1718 peeled, because:
1719 1. We introduced new uses after LOOP for IVs that were not originally used
1720 after LOOP: the IVs of LOOP are now used by an epilog loop.
1721 2. LOOP is going to be vectorized; this means that it will iterate N/VF
1722 times, whereas the loop IVs should be bumped N times.
1724 Input:
1725 - LOOP - a loop that is going to be vectorized. The last few iterations
1726 of LOOP were peeled.
1727 - NITERS - the number of iterations that LOOP executes (before it is
1728 vectorized). i.e, the number of times the ivs should be bumped.
1729 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
1730 coming out from LOOP on which there are uses of the LOOP ivs
1731 (this is the path from LOOP->exit to epilog_loop->preheader).
1733 The new definitions of the ivs are placed in LOOP->exit.
1734 The phi args associated with the edge UPDATE_E in the bb
1735 UPDATE_E->dest are updated accordingly.
1737 Assumption 1: Like the rest of the vectorizer, this function assumes
1738 a single loop exit that has a single predecessor.
1740 Assumption 2: The phi nodes in the LOOP header and in update_bb are
1741 organized in the same order.
1743 Assumption 3: The access function of the ivs is simple enough (see
1744 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
1746 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
1747 coming out of LOOP on which the ivs of LOOP are used (this is the path
1748 that leads to the epilog loop; other paths skip the epilog loop). This
1749 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
1750 needs to have its phis updated.
1753 static void
1754 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
1755 edge update_e)
1757 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1758 basic_block exit_bb = single_exit (loop)->dest;
1759 gimple phi, phi1;
1760 gimple_stmt_iterator gsi, gsi1;
1761 basic_block update_bb = update_e->dest;
1763 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
1765 /* Make sure there exists a single-predecessor exit bb: */
1766 gcc_assert (single_pred_p (exit_bb));
1768 for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
1769 !gsi_end_p (gsi) && !gsi_end_p (gsi1);
1770 gsi_next (&gsi), gsi_next (&gsi1))
1772 tree access_fn = NULL;
1773 tree evolution_part;
1774 tree init_expr;
1775 tree step_expr, off;
1776 tree type;
1777 tree var, ni, ni_name;
1778 gimple_stmt_iterator last_gsi;
1780 phi = gsi_stmt (gsi);
1781 phi1 = gsi_stmt (gsi1);
1782 if (vect_print_dump_info (REPORT_DETAILS))
1784 fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: ");
1785 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
1788 /* Skip virtual phi's. */
1789 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
1791 if (vect_print_dump_info (REPORT_DETAILS))
1792 fprintf (vect_dump, "virtual phi. skip.");
1793 continue;
1796 /* Skip reduction phis. */
1797 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1799 if (vect_print_dump_info (REPORT_DETAILS))
1800 fprintf (vect_dump, "reduc phi. skip.");
1801 continue;
1804 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
1805 gcc_assert (access_fn);
1806 /* We can end up with an access_fn like
1807 (short int) {(short unsigned int) i_49, +, 1}_1
1808 for further analysis we need to strip the outer cast but we
1809 need to preserve the original type. */
1810 type = TREE_TYPE (access_fn);
1811 STRIP_NOPS (access_fn);
1812 evolution_part =
1813 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
1814 gcc_assert (evolution_part != NULL_TREE);
1816 /* FORNOW: We do not support IVs whose evolution function is a polynomial
1817 of degree >= 2 or exponential. */
1818 gcc_assert (!tree_is_chrec (evolution_part));
1820 step_expr = evolution_part;
1821 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
1822 loop->num));
1823 init_expr = fold_convert (type, init_expr);
1825 off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
1826 fold_convert (TREE_TYPE (step_expr), niters),
1827 step_expr);
1828 if (POINTER_TYPE_P (TREE_TYPE (init_expr)))
1829 ni = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (init_expr),
1830 init_expr,
1831 fold_convert (sizetype, off));
1832 else
1833 ni = fold_build2 (PLUS_EXPR, TREE_TYPE (init_expr),
1834 init_expr,
1835 fold_convert (TREE_TYPE (init_expr), off));
1837 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
1838 add_referenced_var (var);
1840 last_gsi = gsi_last_bb (exit_bb);
1841 ni_name = force_gimple_operand_gsi (&last_gsi, ni, false, var,
1842 true, GSI_SAME_STMT);
1844 /* Fix phi expressions in the successor bb. */
1845 adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
1849 /* Return the more conservative threshold between the
1850 min_profitable_iters returned by the cost model and the user
1851 specified threshold, if provided. */
1853 static unsigned int
1854 conservative_cost_threshold (loop_vec_info loop_vinfo,
1855 int min_profitable_iters)
1857 unsigned int th;
1858 int min_scalar_loop_bound;
1860 min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
1861 * LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 1);
1863 /* Use the cost model only if it is more conservative than user specified
1864 threshold. */
1865 th = (unsigned) min_scalar_loop_bound;
1866 if (min_profitable_iters
1867 && (!min_scalar_loop_bound
1868 || min_profitable_iters > min_scalar_loop_bound))
1869 th = (unsigned) min_profitable_iters;
1871 if (th && vect_print_dump_info (REPORT_COST))
1872 fprintf (vect_dump, "Profitability threshold is %u loop iterations.", th);
1874 return th;
1877 /* Function vect_do_peeling_for_loop_bound
1879 Peel the last iterations of the loop represented by LOOP_VINFO.
1880 The peeled iterations form a new epilog loop. Given that the loop now
1881 iterates NITERS times, the new epilog loop iterates
1882 NITERS % VECTORIZATION_FACTOR times.
1884 The original loop will later be made to iterate
1885 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).
1887 COND_EXPR and COND_EXPR_STMT_LIST are combined with a new generated
1888 test. */
1890 void
1891 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
1892 tree cond_expr, gimple_seq cond_expr_stmt_list)
1894 tree ni_name, ratio_mult_vf_name;
1895 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1896 struct loop *new_loop;
1897 edge update_e;
1898 basic_block preheader;
1899 int loop_num;
1900 bool check_profitability = false;
1901 unsigned int th = 0;
1902 int min_profitable_iters;
1904 if (vect_print_dump_info (REPORT_DETAILS))
1905 fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
1907 initialize_original_copy_tables ();
1909 /* Generate the following variables on the preheader of original loop:
1911 ni_name = number of iteration the original loop executes
1912 ratio = ni_name / vf
1913 ratio_mult_vf_name = ratio * vf */
1914 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
1915 &ratio_mult_vf_name, ratio,
1916 cond_expr_stmt_list);
1918 loop_num = loop->num;
1920 /* If cost model check not done during versioning and
1921 peeling for alignment. */
1922 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
1923 && !LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo)
1924 && !LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo)
1925 && !cond_expr)
1927 check_profitability = true;
1929 /* Get profitability threshold for vectorized loop. */
1930 min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
1932 th = conservative_cost_threshold (loop_vinfo,
1933 min_profitable_iters);
1936 new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
1937 ratio_mult_vf_name, ni_name, false,
1938 th, check_profitability,
1939 cond_expr, cond_expr_stmt_list);
1940 gcc_assert (new_loop);
1941 gcc_assert (loop_num == loop->num);
1942 #ifdef ENABLE_CHECKING
1943 slpeel_verify_cfg_after_peeling (loop, new_loop);
1944 #endif
1946 /* A guard that controls whether the new_loop is to be executed or skipped
1947 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
1948 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
1949 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
1950 is on the path where the LOOP IVs are used and need to be updated. */
1952 preheader = loop_preheader_edge (new_loop)->src;
1953 if (EDGE_PRED (preheader, 0)->src == single_exit (loop)->dest)
1954 update_e = EDGE_PRED (preheader, 0);
1955 else
1956 update_e = EDGE_PRED (preheader, 1);
1958 /* Update IVs of original loop as if they were advanced
1959 by ratio_mult_vf_name steps. */
1960 vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
1962 /* After peeling we have to reset scalar evolution analyzer. */
1963 scev_reset ();
1965 free_original_copy_tables ();
1969 /* Function vect_gen_niters_for_prolog_loop
1971 Set the number of iterations for the loop represented by LOOP_VINFO
1972 to the minimum between LOOP_NITERS (the original iteration count of the loop)
1973 and the misalignment of DR - the data reference recorded in
1974 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
1975 this loop, the data reference DR will refer to an aligned location.
1977 The following computation is generated:
1979 If the misalignment of DR is known at compile time:
1980 addr_mis = int mis = DR_MISALIGNMENT (dr);
1981 Else, compute address misalignment in bytes:
1982 addr_mis = addr & (vectype_size - 1)
1984 prolog_niters = min (LOOP_NITERS, ((VF - addr_mis/elem_size)&(VF-1))/step)
1986 (elem_size = element type size; an element is the scalar element whose type
1987 is the inner type of the vectype)
1989 When the step of the data-ref in the loop is not 1 (as in interleaved data
1990 and SLP), the number of iterations of the prolog must be divided by the step
1991 (which is equal to the size of interleaved group).
1993 The above formulas assume that VF == number of elements in the vector. This
1994 may not hold when there are multiple-types in the loop.
1995 In this case, for some data-references in the loop the VF does not represent
1996 the number of elements that fit in the vector. Therefore, instead of VF we
1997 use TYPE_VECTOR_SUBPARTS. */
1999 static tree
2000 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters,
2001 tree *wide_prolog_niters)
2003 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
2004 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2005 tree var;
2006 gimple_seq stmts;
2007 tree iters, iters_name;
2008 edge pe;
2009 basic_block new_bb;
2010 gimple dr_stmt = DR_STMT (dr);
2011 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
2012 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2013 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
2014 tree niters_type = TREE_TYPE (loop_niters);
2015 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
2017 pe = loop_preheader_edge (loop);
2019 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
2021 int npeel = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
2023 if (vect_print_dump_info (REPORT_DETAILS))
2024 fprintf (vect_dump, "known peeling = %d.", npeel);
2026 iters = build_int_cst (niters_type, npeel);
2028 else
2030 gimple_seq new_stmts = NULL;
2031 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
2032 tree offset = negative
2033 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : NULL_TREE;
2034 tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
2035 &new_stmts, offset, loop);
2036 tree ptr_type = TREE_TYPE (start_addr);
2037 tree size = TYPE_SIZE (ptr_type);
2038 tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
2039 tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
2040 tree elem_size_log =
2041 build_int_cst (type, exact_log2 (vectype_align/nelements));
2042 tree nelements_minus_1 = build_int_cst (type, nelements - 1);
2043 tree nelements_tree = build_int_cst (type, nelements);
2044 tree byte_misalign;
2045 tree elem_misalign;
2047 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmts);
2048 gcc_assert (!new_bb);
2050 /* Create: byte_misalign = addr & (vectype_size - 1) */
2051 byte_misalign =
2052 fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
2053 vectype_size_minus_1);
2055 /* Create: elem_misalign = byte_misalign / element_size */
2056 elem_misalign =
2057 fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
2059 /* Create: (niters_type) (nelements - elem_misalign)&(nelements - 1) */
2060 if (negative)
2061 iters = fold_build2 (MINUS_EXPR, type, elem_misalign, nelements_tree);
2062 else
2063 iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
2064 iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
2065 iters = fold_convert (niters_type, iters);
2068 /* Create: prolog_loop_niters = min (iters, loop_niters) */
2069 /* If the loop bound is known at compile time we already verified that it is
2070 greater than vf; since the misalignment ('iters') is at most vf, there's
2071 no need to generate the MIN_EXPR in this case. */
2072 if (TREE_CODE (loop_niters) != INTEGER_CST)
2073 iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters);
2075 if (vect_print_dump_info (REPORT_DETAILS))
2077 fprintf (vect_dump, "niters for prolog loop: ");
2078 print_generic_expr (vect_dump, iters, TDF_SLIM);
2081 var = create_tmp_var (niters_type, "prolog_loop_niters");
2082 add_referenced_var (var);
2083 stmts = NULL;
2084 iters_name = force_gimple_operand (iters, &stmts, false, var);
2085 if (types_compatible_p (sizetype, niters_type))
2086 *wide_prolog_niters = iters_name;
2087 else
2089 gimple_seq seq = NULL;
2090 tree wide_iters = fold_convert (sizetype, iters);
2091 var = create_tmp_var (sizetype, "prolog_loop_niters");
2092 add_referenced_var (var);
2093 *wide_prolog_niters = force_gimple_operand (wide_iters, &seq, false,
2094 var);
2095 if (seq)
2096 gimple_seq_add_seq (&stmts, seq);
2099 /* Insert stmt on loop preheader edge. */
2100 if (stmts)
2102 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2103 gcc_assert (!new_bb);
2106 return iters_name;
2110 /* Function vect_update_init_of_dr
2112 NITERS iterations were peeled from LOOP. DR represents a data reference
2113 in LOOP. This function updates the information recorded in DR to
2114 account for the fact that the first NITERS iterations had already been
2115 executed. Specifically, it updates the OFFSET field of DR. */
2117 static void
2118 vect_update_init_of_dr (struct data_reference *dr, tree niters)
2120 tree offset = DR_OFFSET (dr);
2122 niters = fold_build2 (MULT_EXPR, sizetype,
2123 fold_convert (sizetype, niters),
2124 fold_convert (sizetype, DR_STEP (dr)));
2125 offset = fold_build2 (PLUS_EXPR, sizetype,
2126 fold_convert (sizetype, offset), niters);
2127 DR_OFFSET (dr) = offset;
2131 /* Function vect_update_inits_of_drs
2133 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
2134 This function updates the information recorded for the data references in
2135 the loop to account for the fact that the first NITERS iterations had
2136 already been executed. Specifically, it updates the initial_condition of
2137 the access_function of all the data_references in the loop. */
2139 static void
2140 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
2142 unsigned int i;
2143 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2144 struct data_reference *dr;
2146 if (vect_print_dump_info (REPORT_DETAILS))
2147 fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
2149 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2150 vect_update_init_of_dr (dr, niters);
2154 /* Function vect_do_peeling_for_alignment
2156 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
2157 'niters' is set to the misalignment of one of the data references in the
2158 loop, thereby forcing it to refer to an aligned location at the beginning
2159 of the execution of this loop. The data reference for which we are
2160 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
2162 void
2163 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo)
2165 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2166 tree niters_of_prolog_loop, ni_name;
2167 tree n_iters;
2168 tree wide_prolog_niters;
2169 struct loop *new_loop;
2170 unsigned int th = 0;
2171 int min_profitable_iters;
2173 if (vect_print_dump_info (REPORT_DETAILS))
2174 fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
2176 initialize_original_copy_tables ();
2178 ni_name = vect_build_loop_niters (loop_vinfo, NULL);
2179 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name,
2180 &wide_prolog_niters);
2183 /* Get profitability threshold for vectorized loop. */
2184 min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
2185 th = conservative_cost_threshold (loop_vinfo,
2186 min_profitable_iters);
2188 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
2189 new_loop =
2190 slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop),
2191 niters_of_prolog_loop, ni_name, true,
2192 th, true, NULL_TREE, NULL);
2194 gcc_assert (new_loop);
2195 #ifdef ENABLE_CHECKING
2196 slpeel_verify_cfg_after_peeling (new_loop, loop);
2197 #endif
2199 /* Update number of times loop executes. */
2200 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
2201 LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
2202 TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
2204 /* Update the init conditions of the access functions of all data refs. */
2205 vect_update_inits_of_drs (loop_vinfo, wide_prolog_niters);
2207 /* After peeling we have to reset scalar evolution analyzer. */
2208 scev_reset ();
2210 free_original_copy_tables ();
2214 /* Function vect_create_cond_for_align_checks.
2216 Create a conditional expression that represents the alignment checks for
2217 all of data references (array element references) whose alignment must be
2218 checked at runtime.
2220 Input:
2221 COND_EXPR - input conditional expression. New conditions will be chained
2222 with logical AND operation.
2223 LOOP_VINFO - two fields of the loop information are used.
2224 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
2225 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
2227 Output:
2228 COND_EXPR_STMT_LIST - statements needed to construct the conditional
2229 expression.
2230 The returned value is the conditional expression to be used in the if
2231 statement that controls which version of the loop gets executed at runtime.
2233 The algorithm makes two assumptions:
2234 1) The number of bytes "n" in a vector is a power of 2.
2235 2) An address "a" is aligned if a%n is zero and that this
2236 test can be done as a&(n-1) == 0. For example, for 16
2237 byte vectors the test is a&0xf == 0. */
2239 static void
2240 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
2241 tree *cond_expr,
2242 gimple_seq *cond_expr_stmt_list)
2244 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2245 VEC(gimple,heap) *may_misalign_stmts
2246 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2247 gimple ref_stmt;
2248 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
2249 tree mask_cst;
2250 unsigned int i;
2251 tree psize;
2252 tree int_ptrsize_type;
2253 char tmp_name[20];
2254 tree or_tmp_name = NULL_TREE;
2255 tree and_tmp, and_tmp_name;
2256 gimple and_stmt;
2257 tree ptrsize_zero;
2258 tree part_cond_expr;
2260 /* Check that mask is one less than a power of 2, i.e., mask is
2261 all zeros followed by all ones. */
2262 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2264 /* CHECKME: what is the best integer or unsigned type to use to hold a
2265 cast from a pointer value? */
2266 psize = TYPE_SIZE (ptr_type_node);
2267 int_ptrsize_type
2268 = lang_hooks.types.type_for_size (tree_low_cst (psize, 1), 0);
2270 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2271 of the first vector of the i'th data reference. */
2273 FOR_EACH_VEC_ELT (gimple, may_misalign_stmts, i, ref_stmt)
2275 gimple_seq new_stmt_list = NULL;
2276 tree addr_base;
2277 tree addr_tmp, addr_tmp_name;
2278 tree or_tmp, new_or_tmp_name;
2279 gimple addr_stmt, or_stmt;
2280 stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
2281 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2282 bool negative = tree_int_cst_compare
2283 (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
2284 tree offset = negative
2285 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : NULL_TREE;
2287 /* create: addr_tmp = (int)(address_of_first_vector) */
2288 addr_base =
2289 vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
2290 offset, loop);
2291 if (new_stmt_list != NULL)
2292 gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
2294 sprintf (tmp_name, "%s%d", "addr2int", i);
2295 addr_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
2296 add_referenced_var (addr_tmp);
2297 addr_tmp_name = make_ssa_name (addr_tmp, NULL);
2298 addr_stmt = gimple_build_assign_with_ops (NOP_EXPR, addr_tmp_name,
2299 addr_base, NULL_TREE);
2300 SSA_NAME_DEF_STMT (addr_tmp_name) = addr_stmt;
2301 gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
2303 /* The addresses are OR together. */
2305 if (or_tmp_name != NULL_TREE)
2307 /* create: or_tmp = or_tmp | addr_tmp */
2308 sprintf (tmp_name, "%s%d", "orptrs", i);
2309 or_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
2310 add_referenced_var (or_tmp);
2311 new_or_tmp_name = make_ssa_name (or_tmp, NULL);
2312 or_stmt = gimple_build_assign_with_ops (BIT_IOR_EXPR,
2313 new_or_tmp_name,
2314 or_tmp_name, addr_tmp_name);
2315 SSA_NAME_DEF_STMT (new_or_tmp_name) = or_stmt;
2316 gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
2317 or_tmp_name = new_or_tmp_name;
2319 else
2320 or_tmp_name = addr_tmp_name;
2322 } /* end for i */
2324 mask_cst = build_int_cst (int_ptrsize_type, mask);
2326 /* create: and_tmp = or_tmp & mask */
2327 and_tmp = create_tmp_var (int_ptrsize_type, "andmask" );
2328 add_referenced_var (and_tmp);
2329 and_tmp_name = make_ssa_name (and_tmp, NULL);
2331 and_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, and_tmp_name,
2332 or_tmp_name, mask_cst);
2333 SSA_NAME_DEF_STMT (and_tmp_name) = and_stmt;
2334 gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
2336 /* Make and_tmp the left operand of the conditional test against zero.
2337 if and_tmp has a nonzero bit then some address is unaligned. */
2338 ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2339 part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
2340 and_tmp_name, ptrsize_zero);
2341 if (*cond_expr)
2342 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2343 *cond_expr, part_cond_expr);
2344 else
2345 *cond_expr = part_cond_expr;
2349 /* Function vect_vfa_segment_size.
2351 Create an expression that computes the size of segment
2352 that will be accessed for a data reference. The functions takes into
2353 account that realignment loads may access one more vector.
2355 Input:
2356 DR: The data reference.
2357 VECT_FACTOR: vectorization factor.
2359 Return an expression whose value is the size of segment which will be
2360 accessed by DR. */
2362 static tree
2363 vect_vfa_segment_size (struct data_reference *dr, tree vect_factor)
2365 tree segment_length = fold_build2 (MULT_EXPR, integer_type_node,
2366 DR_STEP (dr), vect_factor);
2368 if (vect_supportable_dr_alignment (dr, false)
2369 == dr_explicit_realign_optimized)
2371 tree vector_size = TYPE_SIZE_UNIT
2372 (STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr))));
2374 segment_length = fold_build2 (PLUS_EXPR, integer_type_node,
2375 segment_length, vector_size);
2377 return fold_convert (sizetype, segment_length);
2381 /* Function vect_create_cond_for_alias_checks.
2383 Create a conditional expression that represents the run-time checks for
2384 overlapping of address ranges represented by a list of data references
2385 relations passed as input.
2387 Input:
2388 COND_EXPR - input conditional expression. New conditions will be chained
2389 with logical AND operation.
2390 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2391 to be checked.
2393 Output:
2394 COND_EXPR - conditional expression.
2395 COND_EXPR_STMT_LIST - statements needed to construct the conditional
2396 expression.
2399 The returned value is the conditional expression to be used in the if
2400 statement that controls which version of the loop gets executed at runtime.
2403 static void
2404 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo,
2405 tree * cond_expr,
2406 gimple_seq * cond_expr_stmt_list)
2408 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2409 VEC (ddr_p, heap) * may_alias_ddrs =
2410 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2411 tree vect_factor =
2412 build_int_cst (integer_type_node, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
2414 ddr_p ddr;
2415 unsigned int i;
2416 tree part_cond_expr;
2418 /* Create expression
2419 ((store_ptr_0 + store_segment_length_0) < load_ptr_0)
2420 || (load_ptr_0 + load_segment_length_0) < store_ptr_0))
2424 ((store_ptr_n + store_segment_length_n) < load_ptr_n)
2425 || (load_ptr_n + load_segment_length_n) < store_ptr_n)) */
2427 if (VEC_empty (ddr_p, may_alias_ddrs))
2428 return;
2430 FOR_EACH_VEC_ELT (ddr_p, may_alias_ddrs, i, ddr)
2432 struct data_reference *dr_a, *dr_b;
2433 gimple dr_group_first_a, dr_group_first_b;
2434 tree addr_base_a, addr_base_b;
2435 tree segment_length_a, segment_length_b;
2436 gimple stmt_a, stmt_b;
2437 tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
2439 dr_a = DDR_A (ddr);
2440 stmt_a = DR_STMT (DDR_A (ddr));
2441 dr_group_first_a = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_a));
2442 if (dr_group_first_a)
2444 stmt_a = dr_group_first_a;
2445 dr_a = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_a));
2448 dr_b = DDR_B (ddr);
2449 stmt_b = DR_STMT (DDR_B (ddr));
2450 dr_group_first_b = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_b));
2451 if (dr_group_first_b)
2453 stmt_b = dr_group_first_b;
2454 dr_b = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_b));
2457 addr_base_a =
2458 vect_create_addr_base_for_vector_ref (stmt_a, cond_expr_stmt_list,
2459 NULL_TREE, loop);
2460 addr_base_b =
2461 vect_create_addr_base_for_vector_ref (stmt_b, cond_expr_stmt_list,
2462 NULL_TREE, loop);
2464 segment_length_a = vect_vfa_segment_size (dr_a, vect_factor);
2465 segment_length_b = vect_vfa_segment_size (dr_b, vect_factor);
2467 if (vect_print_dump_info (REPORT_DR_DETAILS))
2469 fprintf (vect_dump,
2470 "create runtime check for data references ");
2471 print_generic_expr (vect_dump, DR_REF (dr_a), TDF_SLIM);
2472 fprintf (vect_dump, " and ");
2473 print_generic_expr (vect_dump, DR_REF (dr_b), TDF_SLIM);
2476 seg_a_min = addr_base_a;
2477 seg_a_max = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (addr_base_a),
2478 addr_base_a, segment_length_a);
2479 if (tree_int_cst_compare (DR_STEP (dr_a), size_zero_node) < 0)
2480 seg_a_min = seg_a_max, seg_a_max = addr_base_a;
2482 seg_b_min = addr_base_b;
2483 seg_b_max = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (addr_base_b),
2484 addr_base_b, segment_length_b);
2485 if (tree_int_cst_compare (DR_STEP (dr_b), size_zero_node) < 0)
2486 seg_b_min = seg_b_max, seg_b_max = addr_base_b;
2488 part_cond_expr =
2489 fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2490 fold_build2 (LT_EXPR, boolean_type_node, seg_a_max, seg_b_min),
2491 fold_build2 (LT_EXPR, boolean_type_node, seg_b_max, seg_a_min));
2493 if (*cond_expr)
2494 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2495 *cond_expr, part_cond_expr);
2496 else
2497 *cond_expr = part_cond_expr;
2500 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS))
2501 fprintf (vect_dump, "created %u versioning for alias checks.\n",
2502 VEC_length (ddr_p, may_alias_ddrs));
2506 /* Function vect_loop_versioning.
2508 If the loop has data references that may or may not be aligned or/and
2509 has data reference relations whose independence was not proven then
2510 two versions of the loop need to be generated, one which is vectorized
2511 and one which isn't. A test is then generated to control which of the
2512 loops is executed. The test checks for the alignment of all of the
2513 data references that may or may not be aligned. An additional
2514 sequence of runtime tests is generated for each pairs of DDRs whose
2515 independence was not proven. The vectorized version of loop is
2516 executed only if both alias and alignment tests are passed.
2518 The test generated to check which version of loop is executed
2519 is modified to also check for profitability as indicated by the
2520 cost model initially.
2522 The versioning precondition(s) are placed in *COND_EXPR and
2523 *COND_EXPR_STMT_LIST. If DO_VERSIONING is true versioning is
2524 also performed, otherwise only the conditions are generated. */
2526 void
2527 vect_loop_versioning (loop_vec_info loop_vinfo, bool do_versioning,
2528 tree *cond_expr, gimple_seq *cond_expr_stmt_list)
2530 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2531 basic_block condition_bb;
2532 gimple_stmt_iterator gsi, cond_exp_gsi;
2533 basic_block merge_bb;
2534 basic_block new_exit_bb;
2535 edge new_exit_e, e;
2536 gimple orig_phi, new_phi;
2537 tree arg;
2538 unsigned prob = 4 * REG_BR_PROB_BASE / 5;
2539 gimple_seq gimplify_stmt_list = NULL;
2540 tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2541 int min_profitable_iters = 0;
2542 unsigned int th;
2544 /* Get profitability threshold for vectorized loop. */
2545 min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
2547 th = conservative_cost_threshold (loop_vinfo,
2548 min_profitable_iters);
2550 *cond_expr =
2551 fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
2552 build_int_cst (TREE_TYPE (scalar_loop_iters), th));
2554 *cond_expr = force_gimple_operand (*cond_expr, cond_expr_stmt_list,
2555 false, NULL_TREE);
2557 if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
2558 vect_create_cond_for_align_checks (loop_vinfo, cond_expr,
2559 cond_expr_stmt_list);
2561 if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2562 vect_create_cond_for_alias_checks (loop_vinfo, cond_expr,
2563 cond_expr_stmt_list);
2565 *cond_expr =
2566 fold_build2 (NE_EXPR, boolean_type_node, *cond_expr, integer_zero_node);
2567 *cond_expr =
2568 force_gimple_operand (*cond_expr, &gimplify_stmt_list, true, NULL_TREE);
2569 gimple_seq_add_seq (cond_expr_stmt_list, gimplify_stmt_list);
2571 /* If we only needed the extra conditions and a new loop copy
2572 bail out here. */
2573 if (!do_versioning)
2574 return;
2576 initialize_original_copy_tables ();
2577 loop_version (loop, *cond_expr, &condition_bb,
2578 prob, prob, REG_BR_PROB_BASE - prob, true);
2579 free_original_copy_tables();
2581 /* Loop versioning violates an assumption we try to maintain during
2582 vectorization - that the loop exit block has a single predecessor.
2583 After versioning, the exit block of both loop versions is the same
2584 basic block (i.e. it has two predecessors). Just in order to simplify
2585 following transformations in the vectorizer, we fix this situation
2586 here by adding a new (empty) block on the exit-edge of the loop,
2587 with the proper loop-exit phis to maintain loop-closed-form. */
2589 merge_bb = single_exit (loop)->dest;
2590 gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
2591 new_exit_bb = split_edge (single_exit (loop));
2592 new_exit_e = single_exit (loop);
2593 e = EDGE_SUCC (new_exit_bb, 0);
2595 for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2597 orig_phi = gsi_stmt (gsi);
2598 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
2599 new_exit_bb);
2600 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2601 add_phi_arg (new_phi, arg, new_exit_e,
2602 gimple_phi_arg_location_from_edge (orig_phi, e));
2603 adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
2606 /* End loop-exit-fixes after versioning. */
2608 update_ssa (TODO_update_ssa);
2609 if (*cond_expr_stmt_list)
2611 cond_exp_gsi = gsi_last_bb (condition_bb);
2612 gsi_insert_seq_before (&cond_exp_gsi, *cond_expr_stmt_list,
2613 GSI_SAME_STMT);
2614 *cond_expr_stmt_list = NULL;
2616 *cond_expr = NULL_TREE;