IVOPT performance tuning patch. The main problem is a variant of maximal weight
[official-gcc.git] / gcc / tree-vect-loop-manip.c
blob82f0a7867ac0bfa4a36649f9fdf72c782f29f26c
1 /* Vectorizer Specific Loop Manipulations
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "basic-block.h"
30 #include "tree-pretty-print.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
34 #include "cfgloop.h"
35 #include "cfglayout.h"
36 #include "diagnostic-core.h"
37 #include "toplev.h"
38 #include "tree-scalar-evolution.h"
39 #include "tree-vectorizer.h"
40 #include "langhooks.h"
42 /*************************************************************************
43 Simple Loop Peeling Utilities
45 Utilities to support loop peeling for vectorization purposes.
46 *************************************************************************/
49 /* Renames the use *OP_P. */
51 static void
52 rename_use_op (use_operand_p op_p)
54 tree new_name;
56 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
57 return;
59 new_name = get_current_def (USE_FROM_PTR (op_p));
61 /* Something defined outside of the loop. */
62 if (!new_name)
63 return;
65 /* An ordinary ssa name defined in the loop. */
67 SET_USE (op_p, new_name);
71 /* Renames the variables in basic block BB. */
73 void
74 rename_variables_in_bb (basic_block bb)
76 gimple_stmt_iterator gsi;
77 gimple stmt;
78 use_operand_p use_p;
79 ssa_op_iter iter;
80 edge e;
81 edge_iterator ei;
82 struct loop *loop = bb->loop_father;
84 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
86 stmt = gsi_stmt (gsi);
87 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
88 rename_use_op (use_p);
91 FOR_EACH_EDGE (e, ei, bb->succs)
93 if (!flow_bb_inside_loop_p (loop, e->dest))
94 continue;
95 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
96 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi_stmt (gsi), e));
101 /* Renames variables in new generated LOOP. */
103 void
104 rename_variables_in_loop (struct loop *loop)
106 unsigned i;
107 basic_block *bbs;
109 bbs = get_loop_body (loop);
111 for (i = 0; i < loop->num_nodes; i++)
112 rename_variables_in_bb (bbs[i]);
114 free (bbs);
117 typedef struct
119 tree from, to;
120 basic_block bb;
121 } adjust_info;
123 DEF_VEC_O(adjust_info);
124 DEF_VEC_ALLOC_O_STACK(adjust_info);
125 #define VEC_adjust_info_stack_alloc(alloc) VEC_stack_alloc (adjust_info, alloc)
127 /* A stack of values to be adjusted in debug stmts. We have to
128 process them LIFO, so that the closest substitution applies. If we
129 processed them FIFO, without the stack, we might substitute uses
130 with a PHI DEF that would soon become non-dominant, and when we got
131 to the suitable one, it wouldn't have anything to substitute any
132 more. */
133 static VEC(adjust_info, stack) *adjust_vec;
135 /* Adjust any debug stmts that referenced AI->from values to use the
136 loop-closed AI->to, if the references are dominated by AI->bb and
137 not by the definition of AI->from. */
139 static void
140 adjust_debug_stmts_now (adjust_info *ai)
142 basic_block bbphi = ai->bb;
143 tree orig_def = ai->from;
144 tree new_def = ai->to;
145 imm_use_iterator imm_iter;
146 gimple stmt;
147 basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
149 gcc_assert (dom_info_available_p (CDI_DOMINATORS));
151 /* Adjust any debug stmts that held onto non-loop-closed
152 references. */
153 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
155 use_operand_p use_p;
156 basic_block bbuse;
158 if (!is_gimple_debug (stmt))
159 continue;
161 gcc_assert (gimple_debug_bind_p (stmt));
163 bbuse = gimple_bb (stmt);
165 if ((bbuse == bbphi
166 || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
167 && !(bbuse == bbdef
168 || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
170 if (new_def)
171 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
172 SET_USE (use_p, new_def);
173 else
175 gimple_debug_bind_reset_value (stmt);
176 update_stmt (stmt);
182 /* Adjust debug stmts as scheduled before. */
184 static void
185 adjust_vec_debug_stmts (void)
187 if (!MAY_HAVE_DEBUG_STMTS)
188 return;
190 gcc_assert (adjust_vec);
192 while (!VEC_empty (adjust_info, adjust_vec))
194 adjust_debug_stmts_now (VEC_last (adjust_info, adjust_vec));
195 VEC_pop (adjust_info, adjust_vec);
198 VEC_free (adjust_info, stack, adjust_vec);
201 /* Adjust any debug stmts that referenced FROM values to use the
202 loop-closed TO, if the references are dominated by BB and not by
203 the definition of FROM. If adjust_vec is non-NULL, adjustments
204 will be postponed until adjust_vec_debug_stmts is called. */
206 static void
207 adjust_debug_stmts (tree from, tree to, basic_block bb)
209 adjust_info ai;
211 if (MAY_HAVE_DEBUG_STMTS && TREE_CODE (from) == SSA_NAME
212 && SSA_NAME_VAR (from) != gimple_vop (cfun))
214 ai.from = from;
215 ai.to = to;
216 ai.bb = bb;
218 if (adjust_vec)
219 VEC_safe_push (adjust_info, stack, adjust_vec, &ai);
220 else
221 adjust_debug_stmts_now (&ai);
225 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
226 to adjust any debug stmts that referenced the old phi arg,
227 presumably non-loop-closed references left over from other
228 transformations. */
230 static void
231 adjust_phi_and_debug_stmts (gimple update_phi, edge e, tree new_def)
233 tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
235 SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
237 if (MAY_HAVE_DEBUG_STMTS)
238 adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
239 gimple_bb (update_phi));
243 /* Update the PHI nodes of NEW_LOOP.
245 NEW_LOOP is a duplicate of ORIG_LOOP.
246 AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
247 AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
248 executes before it. */
250 static void
251 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
252 struct loop *new_loop, bool after)
254 tree new_ssa_name;
255 gimple phi_new, phi_orig;
256 tree def;
257 edge orig_loop_latch = loop_latch_edge (orig_loop);
258 edge orig_entry_e = loop_preheader_edge (orig_loop);
259 edge new_loop_exit_e = single_exit (new_loop);
260 edge new_loop_entry_e = loop_preheader_edge (new_loop);
261 edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
262 gimple_stmt_iterator gsi_new, gsi_orig;
265 step 1. For each loop-header-phi:
266 Add the first phi argument for the phi in NEW_LOOP
267 (the one associated with the entry of NEW_LOOP)
269 step 2. For each loop-header-phi:
270 Add the second phi argument for the phi in NEW_LOOP
271 (the one associated with the latch of NEW_LOOP)
273 step 3. Update the phis in the successor block of NEW_LOOP.
275 case 1: NEW_LOOP was placed before ORIG_LOOP:
276 The successor block of NEW_LOOP is the header of ORIG_LOOP.
277 Updating the phis in the successor block can therefore be done
278 along with the scanning of the loop header phis, because the
279 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
280 phi nodes, organized in the same order.
282 case 2: NEW_LOOP was placed after ORIG_LOOP:
283 The successor block of NEW_LOOP is the original exit block of
284 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
285 We postpone updating these phis to a later stage (when
286 loop guards are added).
290 /* Scan the phis in the headers of the old and new loops
291 (they are organized in exactly the same order). */
293 for (gsi_new = gsi_start_phis (new_loop->header),
294 gsi_orig = gsi_start_phis (orig_loop->header);
295 !gsi_end_p (gsi_new) && !gsi_end_p (gsi_orig);
296 gsi_next (&gsi_new), gsi_next (&gsi_orig))
298 source_location locus;
299 phi_new = gsi_stmt (gsi_new);
300 phi_orig = gsi_stmt (gsi_orig);
302 /* step 1. */
303 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
304 locus = gimple_phi_arg_location_from_edge (phi_orig, entry_arg_e);
305 add_phi_arg (phi_new, def, new_loop_entry_e, locus);
307 /* step 2. */
308 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
309 locus = gimple_phi_arg_location_from_edge (phi_orig, orig_loop_latch);
310 if (TREE_CODE (def) != SSA_NAME)
311 continue;
313 new_ssa_name = get_current_def (def);
314 if (!new_ssa_name)
316 /* This only happens if there are no definitions
317 inside the loop. use the phi_result in this case. */
318 new_ssa_name = PHI_RESULT (phi_new);
321 /* An ordinary ssa name defined in the loop. */
322 add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop), locus);
324 /* Drop any debug references outside the loop, if they would
325 become ill-formed SSA. */
326 adjust_debug_stmts (def, NULL, single_exit (orig_loop)->dest);
328 /* step 3 (case 1). */
329 if (!after)
331 gcc_assert (new_loop_exit_e == orig_entry_e);
332 adjust_phi_and_debug_stmts (phi_orig, new_loop_exit_e, new_ssa_name);
338 /* Update PHI nodes for a guard of the LOOP.
340 Input:
341 - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
342 controls whether LOOP is to be executed. GUARD_EDGE is the edge that
343 originates from the guard-bb, skips LOOP and reaches the (unique) exit
344 bb of LOOP. This loop-exit-bb is an empty bb with one successor.
345 We denote this bb NEW_MERGE_BB because before the guard code was added
346 it had a single predecessor (the LOOP header), and now it became a merge
347 point of two paths - the path that ends with the LOOP exit-edge, and
348 the path that ends with GUARD_EDGE.
349 - NEW_EXIT_BB: New basic block that is added by this function between LOOP
350 and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
352 ===> The CFG before the guard-code was added:
353 LOOP_header_bb:
354 loop_body
355 if (exit_loop) goto update_bb
356 else goto LOOP_header_bb
357 update_bb:
359 ==> The CFG after the guard-code was added:
360 guard_bb:
361 if (LOOP_guard_condition) goto new_merge_bb
362 else goto LOOP_header_bb
363 LOOP_header_bb:
364 loop_body
365 if (exit_loop_condition) goto new_merge_bb
366 else goto LOOP_header_bb
367 new_merge_bb:
368 goto update_bb
369 update_bb:
371 ==> The CFG after this function:
372 guard_bb:
373 if (LOOP_guard_condition) goto new_merge_bb
374 else goto LOOP_header_bb
375 LOOP_header_bb:
376 loop_body
377 if (exit_loop_condition) goto new_exit_bb
378 else goto LOOP_header_bb
379 new_exit_bb:
380 new_merge_bb:
381 goto update_bb
382 update_bb:
384 This function:
385 1. creates and updates the relevant phi nodes to account for the new
386 incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
387 1.1. Create phi nodes at NEW_MERGE_BB.
388 1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
389 UPDATE_BB). UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
390 2. preserves loop-closed-ssa-form by creating the required phi nodes
391 at the exit of LOOP (i.e, in NEW_EXIT_BB).
393 There are two flavors to this function:
395 slpeel_update_phi_nodes_for_guard1:
396 Here the guard controls whether we enter or skip LOOP, where LOOP is a
397 prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
398 for variables that have phis in the loop header.
400 slpeel_update_phi_nodes_for_guard2:
401 Here the guard controls whether we enter or skip LOOP, where LOOP is an
402 epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
403 for variables that have phis in the loop exit.
405 I.E., the overall structure is:
407 loop1_preheader_bb:
408 guard1 (goto loop1/merge1_bb)
409 loop1
410 loop1_exit_bb:
411 guard2 (goto merge1_bb/merge2_bb)
412 merge1_bb
413 loop2
414 loop2_exit_bb
415 merge2_bb
416 next_bb
418 slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
419 loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
420 that have phis in loop1->header).
422 slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
423 loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
424 that have phis in next_bb). It also adds some of these phis to
425 loop1_exit_bb.
427 slpeel_update_phi_nodes_for_guard1 is always called before
428 slpeel_update_phi_nodes_for_guard2. They are both needed in order
429 to create correct data-flow and loop-closed-ssa-form.
431 Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
432 that change between iterations of a loop (and therefore have a phi-node
433 at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
434 phis for variables that are used out of the loop (and therefore have
435 loop-closed exit phis). Some variables may be both updated between
436 iterations and used after the loop. This is why in loop1_exit_bb we
437 may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
438 and exit phis (created by slpeel_update_phi_nodes_for_guard2).
440 - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
441 an original loop. i.e., we have:
443 orig_loop
444 guard_bb (goto LOOP/new_merge)
445 new_loop <-- LOOP
446 new_exit
447 new_merge
448 next_bb
450 If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
451 have:
453 new_loop
454 guard_bb (goto LOOP/new_merge)
455 orig_loop <-- LOOP
456 new_exit
457 new_merge
458 next_bb
460 The SSA names defined in the original loop have a current
461 reaching definition that that records the corresponding new
462 ssa-name used in the new duplicated loop copy.
465 /* Function slpeel_update_phi_nodes_for_guard1
467 Input:
468 - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
469 - DEFS - a bitmap of ssa names to mark new names for which we recorded
470 information.
472 In the context of the overall structure, we have:
474 loop1_preheader_bb:
475 guard1 (goto loop1/merge1_bb)
476 LOOP-> loop1
477 loop1_exit_bb:
478 guard2 (goto merge1_bb/merge2_bb)
479 merge1_bb
480 loop2
481 loop2_exit_bb
482 merge2_bb
483 next_bb
485 For each name updated between loop iterations (i.e - for each name that has
486 an entry (loop-header) phi in LOOP) we create a new phi in:
487 1. merge1_bb (to account for the edge from guard1)
488 2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
491 static void
492 slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
493 bool is_new_loop, basic_block *new_exit_bb,
494 bitmap *defs)
496 gimple orig_phi, new_phi;
497 gimple update_phi, update_phi2;
498 tree guard_arg, loop_arg;
499 basic_block new_merge_bb = guard_edge->dest;
500 edge e = EDGE_SUCC (new_merge_bb, 0);
501 basic_block update_bb = e->dest;
502 basic_block orig_bb = loop->header;
503 edge new_exit_e;
504 tree current_new_name;
505 gimple_stmt_iterator gsi_orig, gsi_update;
507 /* Create new bb between loop and new_merge_bb. */
508 *new_exit_bb = split_edge (single_exit (loop));
510 new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
512 for (gsi_orig = gsi_start_phis (orig_bb),
513 gsi_update = gsi_start_phis (update_bb);
514 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
515 gsi_next (&gsi_orig), gsi_next (&gsi_update))
517 source_location loop_locus, guard_locus;;
518 orig_phi = gsi_stmt (gsi_orig);
519 update_phi = gsi_stmt (gsi_update);
521 /** 1. Handle new-merge-point phis **/
523 /* 1.1. Generate new phi node in NEW_MERGE_BB: */
524 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
525 new_merge_bb);
527 /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
528 of LOOP. Set the two phi args in NEW_PHI for these edges: */
529 loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
530 loop_locus = gimple_phi_arg_location_from_edge (orig_phi,
531 EDGE_SUCC (loop->latch,
532 0));
533 guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
534 guard_locus
535 = gimple_phi_arg_location_from_edge (orig_phi,
536 loop_preheader_edge (loop));
538 add_phi_arg (new_phi, loop_arg, new_exit_e, loop_locus);
539 add_phi_arg (new_phi, guard_arg, guard_edge, guard_locus);
541 /* 1.3. Update phi in successor block. */
542 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
543 || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
544 adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
545 update_phi2 = new_phi;
548 /** 2. Handle loop-closed-ssa-form phis **/
550 if (!is_gimple_reg (PHI_RESULT (orig_phi)))
551 continue;
553 /* 2.1. Generate new phi node in NEW_EXIT_BB: */
554 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
555 *new_exit_bb);
557 /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */
558 add_phi_arg (new_phi, loop_arg, single_exit (loop), loop_locus);
560 /* 2.3. Update phi in successor of NEW_EXIT_BB: */
561 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
562 adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
563 PHI_RESULT (new_phi));
565 /* 2.4. Record the newly created name with set_current_def.
566 We want to find a name such that
567 name = get_current_def (orig_loop_name)
568 and to set its current definition as follows:
569 set_current_def (name, new_phi_name)
571 If LOOP is a new loop then loop_arg is already the name we're
572 looking for. If LOOP is the original loop, then loop_arg is
573 the orig_loop_name and the relevant name is recorded in its
574 current reaching definition. */
575 if (is_new_loop)
576 current_new_name = loop_arg;
577 else
579 current_new_name = get_current_def (loop_arg);
580 /* current_def is not available only if the variable does not
581 change inside the loop, in which case we also don't care
582 about recording a current_def for it because we won't be
583 trying to create loop-exit-phis for it. */
584 if (!current_new_name)
585 continue;
587 gcc_assert (get_current_def (current_new_name) == NULL_TREE);
589 set_current_def (current_new_name, PHI_RESULT (new_phi));
590 bitmap_set_bit (*defs, SSA_NAME_VERSION (current_new_name));
595 /* Function slpeel_update_phi_nodes_for_guard2
597 Input:
598 - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
600 In the context of the overall structure, we have:
602 loop1_preheader_bb:
603 guard1 (goto loop1/merge1_bb)
604 loop1
605 loop1_exit_bb:
606 guard2 (goto merge1_bb/merge2_bb)
607 merge1_bb
608 LOOP-> loop2
609 loop2_exit_bb
610 merge2_bb
611 next_bb
613 For each name used out side the loop (i.e - for each name that has an exit
614 phi in next_bb) we create a new phi in:
615 1. merge2_bb (to account for the edge from guard_bb)
616 2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
617 3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
618 if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
621 static void
622 slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
623 bool is_new_loop, basic_block *new_exit_bb)
625 gimple orig_phi, new_phi;
626 gimple update_phi, update_phi2;
627 tree guard_arg, loop_arg;
628 basic_block new_merge_bb = guard_edge->dest;
629 edge e = EDGE_SUCC (new_merge_bb, 0);
630 basic_block update_bb = e->dest;
631 edge new_exit_e;
632 tree orig_def, orig_def_new_name;
633 tree new_name, new_name2;
634 tree arg;
635 gimple_stmt_iterator gsi;
637 /* Create new bb between loop and new_merge_bb. */
638 *new_exit_bb = split_edge (single_exit (loop));
640 new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
642 for (gsi = gsi_start_phis (update_bb); !gsi_end_p (gsi); gsi_next (&gsi))
644 update_phi = gsi_stmt (gsi);
645 orig_phi = update_phi;
646 orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
647 /* This loop-closed-phi actually doesn't represent a use
648 out of the loop - the phi arg is a constant. */
649 if (TREE_CODE (orig_def) != SSA_NAME)
650 continue;
651 orig_def_new_name = get_current_def (orig_def);
652 arg = NULL_TREE;
654 /** 1. Handle new-merge-point phis **/
656 /* 1.1. Generate new phi node in NEW_MERGE_BB: */
657 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
658 new_merge_bb);
660 /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
661 of LOOP. Set the two PHI args in NEW_PHI for these edges: */
662 new_name = orig_def;
663 new_name2 = NULL_TREE;
664 if (orig_def_new_name)
666 new_name = orig_def_new_name;
667 /* Some variables have both loop-entry-phis and loop-exit-phis.
668 Such variables were given yet newer names by phis placed in
669 guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
670 new_name2 = get_current_def (get_current_def (orig_name)). */
671 new_name2 = get_current_def (new_name);
674 if (is_new_loop)
676 guard_arg = orig_def;
677 loop_arg = new_name;
679 else
681 guard_arg = new_name;
682 loop_arg = orig_def;
684 if (new_name2)
685 guard_arg = new_name2;
687 add_phi_arg (new_phi, loop_arg, new_exit_e, UNKNOWN_LOCATION);
688 add_phi_arg (new_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
690 /* 1.3. Update phi in successor block. */
691 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
692 adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
693 update_phi2 = new_phi;
696 /** 2. Handle loop-closed-ssa-form phis **/
698 /* 2.1. Generate new phi node in NEW_EXIT_BB: */
699 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
700 *new_exit_bb);
702 /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */
703 add_phi_arg (new_phi, loop_arg, single_exit (loop), UNKNOWN_LOCATION);
705 /* 2.3. Update phi in successor of NEW_EXIT_BB: */
706 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
707 adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
708 PHI_RESULT (new_phi));
711 /** 3. Handle loop-closed-ssa-form phis for first loop **/
713 /* 3.1. Find the relevant names that need an exit-phi in
714 GUARD_BB, i.e. names for which
715 slpeel_update_phi_nodes_for_guard1 had not already created a
716 phi node. This is the case for names that are used outside
717 the loop (and therefore need an exit phi) but are not updated
718 across loop iterations (and therefore don't have a
719 loop-header-phi).
721 slpeel_update_phi_nodes_for_guard1 is responsible for
722 creating loop-exit phis in GUARD_BB for names that have a
723 loop-header-phi. When such a phi is created we also record
724 the new name in its current definition. If this new name
725 exists, then guard_arg was set to this new name (see 1.2
726 above). Therefore, if guard_arg is not this new name, this
727 is an indication that an exit-phi in GUARD_BB was not yet
728 created, so we take care of it here. */
729 if (guard_arg == new_name2)
730 continue;
731 arg = guard_arg;
733 /* 3.2. Generate new phi node in GUARD_BB: */
734 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
735 guard_edge->src);
737 /* 3.3. GUARD_BB has one incoming edge: */
738 gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
739 add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0),
740 UNKNOWN_LOCATION);
742 /* 3.4. Update phi in successor of GUARD_BB: */
743 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
744 == guard_arg);
745 adjust_phi_and_debug_stmts (update_phi2, guard_edge,
746 PHI_RESULT (new_phi));
751 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
752 that starts at zero, increases by one and its limit is NITERS.
754 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
756 void
757 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
759 tree indx_before_incr, indx_after_incr;
760 gimple cond_stmt;
761 gimple orig_cond;
762 edge exit_edge = single_exit (loop);
763 gimple_stmt_iterator loop_cond_gsi;
764 gimple_stmt_iterator incr_gsi;
765 bool insert_after;
766 tree init = build_int_cst (TREE_TYPE (niters), 0);
767 tree step = build_int_cst (TREE_TYPE (niters), 1);
768 LOC loop_loc;
769 enum tree_code code;
771 orig_cond = get_loop_exit_condition (loop);
772 gcc_assert (orig_cond);
773 loop_cond_gsi = gsi_for_stmt (orig_cond);
775 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
776 create_iv (init, step, NULL_TREE, loop,
777 &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
779 indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
780 true, NULL_TREE, true,
781 GSI_SAME_STMT);
782 niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
783 true, GSI_SAME_STMT);
785 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
786 cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
787 NULL_TREE);
789 gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
791 /* Remove old loop exit test: */
792 gsi_remove (&loop_cond_gsi, true);
794 loop_loc = find_loop_location (loop);
795 if (dump_file && (dump_flags & TDF_DETAILS))
797 if (loop_loc != UNKNOWN_LOC)
798 fprintf (dump_file, "\nloop at %s:%d: ",
799 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
800 print_gimple_stmt (dump_file, cond_stmt, 0, TDF_SLIM);
803 loop->nb_iterations = niters;
807 /* Given LOOP this function generates a new copy of it and puts it
808 on E which is either the entry or exit of LOOP. */
810 struct loop *
811 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, edge e)
813 struct loop *new_loop;
814 basic_block *new_bbs, *bbs;
815 bool at_exit;
816 bool was_imm_dom;
817 basic_block exit_dest;
818 gimple phi;
819 tree phi_arg;
820 edge exit, new_exit;
821 gimple_stmt_iterator gsi;
823 at_exit = (e == single_exit (loop));
824 if (!at_exit && e != loop_preheader_edge (loop))
825 return NULL;
827 bbs = get_loop_body (loop);
829 /* Check whether duplication is possible. */
830 if (!can_copy_bbs_p (bbs, loop->num_nodes))
832 free (bbs);
833 return NULL;
836 /* Generate new loop structure. */
837 new_loop = duplicate_loop (loop, loop_outer (loop));
838 if (!new_loop)
840 free (bbs);
841 return NULL;
844 exit_dest = single_exit (loop)->dest;
845 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
846 exit_dest) == loop->header ?
847 true : false);
849 new_bbs = XNEWVEC (basic_block, loop->num_nodes);
851 exit = single_exit (loop);
852 copy_bbs (bbs, loop->num_nodes, new_bbs,
853 &exit, 1, &new_exit, NULL,
854 e->src);
856 /* Duplicating phi args at exit bbs as coming
857 also from exit of duplicated loop. */
858 for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi); gsi_next (&gsi))
860 phi = gsi_stmt (gsi);
861 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, single_exit (loop));
862 if (phi_arg)
864 edge new_loop_exit_edge;
865 source_location locus;
867 locus = gimple_phi_arg_location_from_edge (phi, single_exit (loop));
868 if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
869 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
870 else
871 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
873 add_phi_arg (phi, phi_arg, new_loop_exit_edge, locus);
877 if (at_exit) /* Add the loop copy at exit. */
879 redirect_edge_and_branch_force (e, new_loop->header);
880 PENDING_STMT (e) = NULL;
881 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
882 if (was_imm_dom)
883 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
885 else /* Add the copy at entry. */
887 edge new_exit_e;
888 edge entry_e = loop_preheader_edge (loop);
889 basic_block preheader = entry_e->src;
891 if (!flow_bb_inside_loop_p (new_loop,
892 EDGE_SUCC (new_loop->header, 0)->dest))
893 new_exit_e = EDGE_SUCC (new_loop->header, 0);
894 else
895 new_exit_e = EDGE_SUCC (new_loop->header, 1);
897 redirect_edge_and_branch_force (new_exit_e, loop->header);
898 PENDING_STMT (new_exit_e) = NULL;
899 set_immediate_dominator (CDI_DOMINATORS, loop->header,
900 new_exit_e->src);
902 /* We have to add phi args to the loop->header here as coming
903 from new_exit_e edge. */
904 for (gsi = gsi_start_phis (loop->header);
905 !gsi_end_p (gsi);
906 gsi_next (&gsi))
908 phi = gsi_stmt (gsi);
909 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
910 if (phi_arg)
911 add_phi_arg (phi, phi_arg, new_exit_e,
912 gimple_phi_arg_location_from_edge (phi, entry_e));
915 redirect_edge_and_branch_force (entry_e, new_loop->header);
916 PENDING_STMT (entry_e) = NULL;
917 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
920 free (new_bbs);
921 free (bbs);
923 return new_loop;
927 /* Given the condition statement COND, put it as the last statement
928 of GUARD_BB; EXIT_BB is the basic block to skip the loop;
929 Assumes that this is the single exit of the guarded loop.
930 Returns the skip edge, inserts new stmts on the COND_EXPR_STMT_LIST. */
932 static edge
933 slpeel_add_loop_guard (basic_block guard_bb, tree cond,
934 gimple_seq cond_expr_stmt_list,
935 basic_block exit_bb, basic_block dom_bb)
937 gimple_stmt_iterator gsi;
938 edge new_e, enter_e;
939 gimple cond_stmt;
940 gimple_seq gimplify_stmt_list = NULL;
942 enter_e = EDGE_SUCC (guard_bb, 0);
943 enter_e->flags &= ~EDGE_FALLTHRU;
944 enter_e->flags |= EDGE_FALSE_VALUE;
945 gsi = gsi_last_bb (guard_bb);
947 cond = force_gimple_operand (cond, &gimplify_stmt_list, true, NULL_TREE);
948 if (gimplify_stmt_list)
949 gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
950 cond_stmt = gimple_build_cond (NE_EXPR,
951 cond, build_int_cst (TREE_TYPE (cond), 0),
952 NULL_TREE, NULL_TREE);
953 if (cond_expr_stmt_list)
954 gsi_insert_seq_after (&gsi, cond_expr_stmt_list, GSI_NEW_STMT);
956 gsi = gsi_last_bb (guard_bb);
957 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
959 /* Add new edge to connect guard block to the merge/loop-exit block. */
960 new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
961 set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
962 return new_e;
966 /* This function verifies that the following restrictions apply to LOOP:
967 (1) it is innermost
968 (2) it consists of exactly 2 basic blocks - header, and an empty latch.
969 (3) it is single entry, single exit
970 (4) its exit condition is the last stmt in the header
971 (5) E is the entry/exit edge of LOOP.
974 bool
975 slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
977 edge exit_e = single_exit (loop);
978 edge entry_e = loop_preheader_edge (loop);
979 gimple orig_cond = get_loop_exit_condition (loop);
980 gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
982 if (need_ssa_update_p (cfun))
983 return false;
985 if (loop->inner
986 /* All loops have an outer scope; the only case loop->outer is NULL is for
987 the function itself. */
988 || !loop_outer (loop)
989 || loop->num_nodes != 2
990 || !empty_block_p (loop->latch)
991 || !single_exit (loop)
992 /* Verify that new loop exit condition can be trivially modified. */
993 || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
994 || (e != exit_e && e != entry_e))
995 return false;
997 return true;
1000 #ifdef ENABLE_CHECKING
1001 static void
1002 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
1003 struct loop *second_loop)
1005 basic_block loop1_exit_bb = single_exit (first_loop)->dest;
1006 basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
1007 basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
1009 /* A guard that controls whether the second_loop is to be executed or skipped
1010 is placed in first_loop->exit. first_loop->exit therefore has two
1011 successors - one is the preheader of second_loop, and the other is a bb
1012 after second_loop.
1014 gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
1016 /* 1. Verify that one of the successors of first_loop->exit is the preheader
1017 of second_loop. */
1019 /* The preheader of new_loop is expected to have two predecessors:
1020 first_loop->exit and the block that precedes first_loop. */
1022 gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
1023 && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
1024 && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
1025 || (EDGE_PRED (loop2_entry_bb, 1)->src == loop1_exit_bb
1026 && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
1028 /* Verify that the other successor of first_loop->exit is after the
1029 second_loop. */
1030 /* TODO */
1032 #endif
1034 /* If the run time cost model check determines that vectorization is
1035 not profitable and hence scalar loop should be generated then set
1036 FIRST_NITERS to prologue peeled iterations. This will allow all the
1037 iterations to be executed in the prologue peeled scalar loop. */
1039 static void
1040 set_prologue_iterations (basic_block bb_before_first_loop,
1041 tree first_niters,
1042 struct loop *loop,
1043 unsigned int th)
1045 edge e;
1046 basic_block cond_bb, then_bb;
1047 tree var, prologue_after_cost_adjust_name;
1048 gimple_stmt_iterator gsi;
1049 gimple newphi;
1050 edge e_true, e_false, e_fallthru;
1051 gimple cond_stmt;
1052 gimple_seq gimplify_stmt_list = NULL, stmts = NULL;
1053 tree cost_pre_condition = NULL_TREE;
1054 tree scalar_loop_iters =
1055 unshare_expr (LOOP_VINFO_NITERS_UNCHANGED (loop_vec_info_for_loop (loop)));
1057 e = single_pred_edge (bb_before_first_loop);
1058 cond_bb = split_edge(e);
1060 e = single_pred_edge (bb_before_first_loop);
1061 then_bb = split_edge(e);
1062 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
1064 e_false = make_single_succ_edge (cond_bb, bb_before_first_loop,
1065 EDGE_FALSE_VALUE);
1066 set_immediate_dominator (CDI_DOMINATORS, bb_before_first_loop, cond_bb);
1068 e_true = EDGE_PRED (then_bb, 0);
1069 e_true->flags &= ~EDGE_FALLTHRU;
1070 e_true->flags |= EDGE_TRUE_VALUE;
1072 e_fallthru = EDGE_SUCC (then_bb, 0);
1074 cost_pre_condition =
1075 fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
1076 build_int_cst (TREE_TYPE (scalar_loop_iters), th));
1077 cost_pre_condition =
1078 force_gimple_operand (cost_pre_condition, &gimplify_stmt_list,
1079 true, NULL_TREE);
1080 cond_stmt = gimple_build_cond (NE_EXPR, cost_pre_condition,
1081 build_int_cst (TREE_TYPE (cost_pre_condition),
1082 0), NULL_TREE, NULL_TREE);
1084 gsi = gsi_last_bb (cond_bb);
1085 if (gimplify_stmt_list)
1086 gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
1088 gsi = gsi_last_bb (cond_bb);
1089 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
1091 var = create_tmp_var (TREE_TYPE (scalar_loop_iters),
1092 "prologue_after_cost_adjust");
1093 add_referenced_var (var);
1094 prologue_after_cost_adjust_name =
1095 force_gimple_operand (scalar_loop_iters, &stmts, false, var);
1097 gsi = gsi_last_bb (then_bb);
1098 if (stmts)
1099 gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
1101 newphi = create_phi_node (var, bb_before_first_loop);
1102 add_phi_arg (newphi, prologue_after_cost_adjust_name, e_fallthru,
1103 UNKNOWN_LOCATION);
1104 add_phi_arg (newphi, first_niters, e_false, UNKNOWN_LOCATION);
1106 first_niters = PHI_RESULT (newphi);
1110 /* Function slpeel_tree_peel_loop_to_edge.
1112 Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
1113 that is placed on the entry (exit) edge E of LOOP. After this transformation
1114 we have two loops one after the other - first-loop iterates FIRST_NITERS
1115 times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
1116 If the cost model indicates that it is profitable to emit a scalar
1117 loop instead of the vector one, then the prolog (epilog) loop will iterate
1118 for the entire unchanged scalar iterations of the loop.
1120 Input:
1121 - LOOP: the loop to be peeled.
1122 - E: the exit or entry edge of LOOP.
1123 If it is the entry edge, we peel the first iterations of LOOP. In this
1124 case first-loop is LOOP, and second-loop is the newly created loop.
1125 If it is the exit edge, we peel the last iterations of LOOP. In this
1126 case, first-loop is the newly created loop, and second-loop is LOOP.
1127 - NITERS: the number of iterations that LOOP iterates.
1128 - FIRST_NITERS: the number of iterations that the first-loop should iterate.
1129 - UPDATE_FIRST_LOOP_COUNT: specified whether this function is responsible
1130 for updating the loop bound of the first-loop to FIRST_NITERS. If it
1131 is false, the caller of this function may want to take care of this
1132 (this can be useful if we don't want new stmts added to first-loop).
1133 - TH: cost model profitability threshold of iterations for vectorization.
1134 - CHECK_PROFITABILITY: specify whether cost model check has not occurred
1135 during versioning and hence needs to occur during
1136 prologue generation or whether cost model check
1137 has not occurred during prologue generation and hence
1138 needs to occur during epilogue generation.
1141 Output:
1142 The function returns a pointer to the new loop-copy, or NULL if it failed
1143 to perform the transformation.
1145 The function generates two if-then-else guards: one before the first loop,
1146 and the other before the second loop:
1147 The first guard is:
1148 if (FIRST_NITERS == 0) then skip the first loop,
1149 and go directly to the second loop.
1150 The second guard is:
1151 if (FIRST_NITERS == NITERS) then skip the second loop.
1153 If the optional COND_EXPR and COND_EXPR_STMT_LIST arguments are given
1154 then the generated condition is combined with COND_EXPR and the
1155 statements in COND_EXPR_STMT_LIST are emitted together with it.
1157 FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1158 FORNOW the resulting code will not be in loop-closed-ssa form.
1161 static struct loop*
1162 slpeel_tree_peel_loop_to_edge (struct loop *loop,
1163 edge e, tree first_niters,
1164 tree niters, bool update_first_loop_count,
1165 unsigned int th, bool check_profitability,
1166 tree cond_expr, gimple_seq cond_expr_stmt_list)
1168 struct loop *new_loop = NULL, *first_loop, *second_loop;
1169 edge skip_e;
1170 tree pre_condition = NULL_TREE;
1171 bitmap definitions;
1172 basic_block bb_before_second_loop, bb_after_second_loop;
1173 basic_block bb_before_first_loop;
1174 basic_block bb_between_loops;
1175 basic_block new_exit_bb;
1176 edge exit_e = single_exit (loop);
1177 LOC loop_loc;
1178 tree cost_pre_condition = NULL_TREE;
1180 if (!slpeel_can_duplicate_loop_p (loop, e))
1181 return NULL;
1183 /* We have to initialize cfg_hooks. Then, when calling
1184 cfg_hooks->split_edge, the function tree_split_edge
1185 is actually called and, when calling cfg_hooks->duplicate_block,
1186 the function tree_duplicate_bb is called. */
1187 gimple_register_cfg_hooks ();
1190 /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1191 Resulting CFG would be:
1193 first_loop:
1194 do {
1195 } while ...
1197 second_loop:
1198 do {
1199 } while ...
1201 orig_exit_bb:
1204 if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, e)))
1206 loop_loc = find_loop_location (loop);
1207 if (dump_file && (dump_flags & TDF_DETAILS))
1209 if (loop_loc != UNKNOWN_LOC)
1210 fprintf (dump_file, "\n%s:%d: note: ",
1211 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
1212 fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
1214 return NULL;
1217 if (MAY_HAVE_DEBUG_STMTS)
1219 gcc_assert (!adjust_vec);
1220 adjust_vec = VEC_alloc (adjust_info, stack, 32);
1223 if (e == exit_e)
1225 /* NEW_LOOP was placed after LOOP. */
1226 first_loop = loop;
1227 second_loop = new_loop;
1229 else
1231 /* NEW_LOOP was placed before LOOP. */
1232 first_loop = new_loop;
1233 second_loop = loop;
1236 definitions = ssa_names_to_replace ();
1237 slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1238 rename_variables_in_loop (new_loop);
1241 /* 2. Add the guard code in one of the following ways:
1243 2.a Add the guard that controls whether the first loop is executed.
1244 This occurs when this function is invoked for prologue or epilogue
1245 generation and when the cost model check can be done at compile time.
1247 Resulting CFG would be:
1249 bb_before_first_loop:
1250 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1251 GOTO first-loop
1253 first_loop:
1254 do {
1255 } while ...
1257 bb_before_second_loop:
1259 second_loop:
1260 do {
1261 } while ...
1263 orig_exit_bb:
1265 2.b Add the cost model check that allows the prologue
1266 to iterate for the entire unchanged scalar
1267 iterations of the loop in the event that the cost
1268 model indicates that the scalar loop is more
1269 profitable than the vector one. This occurs when
1270 this function is invoked for prologue generation
1271 and the cost model check needs to be done at run
1272 time.
1274 Resulting CFG after prologue peeling would be:
1276 if (scalar_loop_iterations <= th)
1277 FIRST_NITERS = scalar_loop_iterations
1279 bb_before_first_loop:
1280 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1281 GOTO first-loop
1283 first_loop:
1284 do {
1285 } while ...
1287 bb_before_second_loop:
1289 second_loop:
1290 do {
1291 } while ...
1293 orig_exit_bb:
1295 2.c Add the cost model check that allows the epilogue
1296 to iterate for the entire unchanged scalar
1297 iterations of the loop in the event that the cost
1298 model indicates that the scalar loop is more
1299 profitable than the vector one. This occurs when
1300 this function is invoked for epilogue generation
1301 and the cost model check needs to be done at run
1302 time. This check is combined with any pre-existing
1303 check in COND_EXPR to avoid versioning.
1305 Resulting CFG after prologue peeling would be:
1307 bb_before_first_loop:
1308 if ((scalar_loop_iterations <= th)
1310 FIRST_NITERS == 0) GOTO bb_before_second_loop
1311 GOTO first-loop
1313 first_loop:
1314 do {
1315 } while ...
1317 bb_before_second_loop:
1319 second_loop:
1320 do {
1321 } while ...
1323 orig_exit_bb:
1326 bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1327 bb_before_second_loop = split_edge (single_exit (first_loop));
1329 /* Epilogue peeling. */
1330 if (!update_first_loop_count)
1332 pre_condition =
1333 fold_build2 (LE_EXPR, boolean_type_node, first_niters,
1334 build_int_cst (TREE_TYPE (first_niters), 0));
1335 if (check_profitability)
1337 tree scalar_loop_iters
1338 = unshare_expr (LOOP_VINFO_NITERS_UNCHANGED
1339 (loop_vec_info_for_loop (loop)));
1340 cost_pre_condition =
1341 fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
1342 build_int_cst (TREE_TYPE (scalar_loop_iters), th));
1344 pre_condition = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1345 cost_pre_condition, pre_condition);
1347 if (cond_expr)
1349 pre_condition =
1350 fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1351 pre_condition,
1352 fold_build1 (TRUTH_NOT_EXPR, boolean_type_node,
1353 cond_expr));
1357 /* Prologue peeling. */
1358 else
1360 if (check_profitability)
1361 set_prologue_iterations (bb_before_first_loop, first_niters,
1362 loop, th);
1364 pre_condition =
1365 fold_build2 (LE_EXPR, boolean_type_node, first_niters,
1366 build_int_cst (TREE_TYPE (first_niters), 0));
1369 skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1370 cond_expr_stmt_list,
1371 bb_before_second_loop, bb_before_first_loop);
1372 slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
1373 first_loop == new_loop,
1374 &new_exit_bb, &definitions);
1377 /* 3. Add the guard that controls whether the second loop is executed.
1378 Resulting CFG would be:
1380 bb_before_first_loop:
1381 if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1382 GOTO first-loop
1384 first_loop:
1385 do {
1386 } while ...
1388 bb_between_loops:
1389 if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1390 GOTO bb_before_second_loop
1392 bb_before_second_loop:
1394 second_loop:
1395 do {
1396 } while ...
1398 bb_after_second_loop:
1400 orig_exit_bb:
1403 bb_between_loops = new_exit_bb;
1404 bb_after_second_loop = split_edge (single_exit (second_loop));
1406 pre_condition =
1407 fold_build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
1408 skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, NULL,
1409 bb_after_second_loop, bb_before_first_loop);
1410 slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
1411 second_loop == new_loop, &new_exit_bb);
1413 /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1415 if (update_first_loop_count)
1416 slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1418 adjust_vec_debug_stmts ();
1420 BITMAP_FREE (definitions);
1421 delete_update_ssa ();
1423 return new_loop;
1426 /* Function vect_get_loop_location.
1428 Extract the location of the loop in the source code.
1429 If the loop is not well formed for vectorization, an estimated
1430 location is calculated.
1431 Return the loop location if succeed and NULL if not. */
1434 find_loop_location (struct loop *loop)
1436 gimple stmt = NULL;
1437 basic_block bb;
1438 gimple_stmt_iterator si;
1440 if (!loop)
1441 return UNKNOWN_LOC;
1443 stmt = get_loop_exit_condition (loop);
1445 if (stmt && gimple_location (stmt) != UNKNOWN_LOC)
1446 return gimple_location (stmt);
1448 /* If we got here the loop is probably not "well formed",
1449 try to estimate the loop location */
1451 if (!loop->header)
1452 return UNKNOWN_LOC;
1454 bb = loop->header;
1456 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1458 stmt = gsi_stmt (si);
1459 if (gimple_location (stmt) != UNKNOWN_LOC)
1460 return gimple_location (stmt);
1463 return UNKNOWN_LOC;
1467 /* This function builds ni_name = number of iterations loop executes
1468 on the loop preheader. If SEQ is given the stmt is instead emitted
1469 there. */
1471 static tree
1472 vect_build_loop_niters (loop_vec_info loop_vinfo, gimple_seq seq)
1474 tree ni_name, var;
1475 gimple_seq stmts = NULL;
1476 edge pe;
1477 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1478 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1480 var = create_tmp_var (TREE_TYPE (ni), "niters");
1481 add_referenced_var (var);
1482 ni_name = force_gimple_operand (ni, &stmts, false, var);
1484 pe = loop_preheader_edge (loop);
1485 if (stmts)
1487 if (seq)
1488 gimple_seq_add_seq (&seq, stmts);
1489 else
1491 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1492 gcc_assert (!new_bb);
1496 return ni_name;
1500 /* This function generates the following statements:
1502 ni_name = number of iterations loop executes
1503 ratio = ni_name / vf
1504 ratio_mult_vf_name = ratio * vf
1506 and places them at the loop preheader edge or in COND_EXPR_STMT_LIST
1507 if that is non-NULL. */
1509 static void
1510 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
1511 tree *ni_name_ptr,
1512 tree *ratio_mult_vf_name_ptr,
1513 tree *ratio_name_ptr,
1514 gimple_seq cond_expr_stmt_list)
1517 edge pe;
1518 basic_block new_bb;
1519 gimple_seq stmts;
1520 tree ni_name;
1521 tree var;
1522 tree ratio_name;
1523 tree ratio_mult_vf_name;
1524 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1525 tree ni = LOOP_VINFO_NITERS (loop_vinfo);
1526 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1527 tree log_vf;
1529 pe = loop_preheader_edge (loop);
1531 /* Generate temporary variable that contains
1532 number of iterations loop executes. */
1534 ni_name = vect_build_loop_niters (loop_vinfo, cond_expr_stmt_list);
1535 log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
1537 /* Create: ratio = ni >> log2(vf) */
1539 ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf);
1540 if (!is_gimple_val (ratio_name))
1542 var = create_tmp_var (TREE_TYPE (ni), "bnd");
1543 add_referenced_var (var);
1545 stmts = NULL;
1546 ratio_name = force_gimple_operand (ratio_name, &stmts, true, var);
1547 if (cond_expr_stmt_list)
1548 gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
1549 else
1551 pe = loop_preheader_edge (loop);
1552 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1553 gcc_assert (!new_bb);
1557 /* Create: ratio_mult_vf = ratio << log2 (vf). */
1559 ratio_mult_vf_name = fold_build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name),
1560 ratio_name, log_vf);
1561 if (!is_gimple_val (ratio_mult_vf_name))
1563 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
1564 add_referenced_var (var);
1566 stmts = NULL;
1567 ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts,
1568 true, var);
1569 if (cond_expr_stmt_list)
1570 gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
1571 else
1573 pe = loop_preheader_edge (loop);
1574 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1575 gcc_assert (!new_bb);
1579 *ni_name_ptr = ni_name;
1580 *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
1581 *ratio_name_ptr = ratio_name;
1583 return;
1586 /* Function vect_can_advance_ivs_p
1588 In case the number of iterations that LOOP iterates is unknown at compile
1589 time, an epilog loop will be generated, and the loop induction variables
1590 (IVs) will be "advanced" to the value they are supposed to take just before
1591 the epilog loop. Here we check that the access function of the loop IVs
1592 and the expression that represents the loop bound are simple enough.
1593 These restrictions will be relaxed in the future. */
1595 bool
1596 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1598 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1599 basic_block bb = loop->header;
1600 gimple phi;
1601 gimple_stmt_iterator gsi;
1603 /* Analyze phi functions of the loop header. */
1605 if (vect_print_dump_info (REPORT_DETAILS))
1606 fprintf (vect_dump, "vect_can_advance_ivs_p:");
1608 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1610 tree access_fn = NULL;
1611 tree evolution_part;
1613 phi = gsi_stmt (gsi);
1614 if (vect_print_dump_info (REPORT_DETAILS))
1616 fprintf (vect_dump, "Analyze phi: ");
1617 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
1620 /* Skip virtual phi's. The data dependences that are associated with
1621 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
1623 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
1625 if (vect_print_dump_info (REPORT_DETAILS))
1626 fprintf (vect_dump, "virtual phi. skip.");
1627 continue;
1630 /* Skip reduction phis. */
1632 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1634 if (vect_print_dump_info (REPORT_DETAILS))
1635 fprintf (vect_dump, "reduc phi. skip.");
1636 continue;
1639 /* Analyze the evolution function. */
1641 access_fn = instantiate_parameters
1642 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1644 if (!access_fn)
1646 if (vect_print_dump_info (REPORT_DETAILS))
1647 fprintf (vect_dump, "No Access function.");
1648 return false;
1651 if (vect_print_dump_info (REPORT_DETAILS))
1653 fprintf (vect_dump, "Access function of PHI: ");
1654 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
1657 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
1659 if (evolution_part == NULL_TREE)
1661 if (vect_print_dump_info (REPORT_DETAILS))
1662 fprintf (vect_dump, "No evolution.");
1663 return false;
1666 /* FORNOW: We do not transform initial conditions of IVs
1667 which evolution functions are a polynomial of degree >= 2. */
1669 if (tree_is_chrec (evolution_part))
1670 return false;
1673 return true;
1677 /* Function vect_update_ivs_after_vectorizer.
1679 "Advance" the induction variables of LOOP to the value they should take
1680 after the execution of LOOP. This is currently necessary because the
1681 vectorizer does not handle induction variables that are used after the
1682 loop. Such a situation occurs when the last iterations of LOOP are
1683 peeled, because:
1684 1. We introduced new uses after LOOP for IVs that were not originally used
1685 after LOOP: the IVs of LOOP are now used by an epilog loop.
1686 2. LOOP is going to be vectorized; this means that it will iterate N/VF
1687 times, whereas the loop IVs should be bumped N times.
1689 Input:
1690 - LOOP - a loop that is going to be vectorized. The last few iterations
1691 of LOOP were peeled.
1692 - NITERS - the number of iterations that LOOP executes (before it is
1693 vectorized). i.e, the number of times the ivs should be bumped.
1694 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
1695 coming out from LOOP on which there are uses of the LOOP ivs
1696 (this is the path from LOOP->exit to epilog_loop->preheader).
1698 The new definitions of the ivs are placed in LOOP->exit.
1699 The phi args associated with the edge UPDATE_E in the bb
1700 UPDATE_E->dest are updated accordingly.
1702 Assumption 1: Like the rest of the vectorizer, this function assumes
1703 a single loop exit that has a single predecessor.
1705 Assumption 2: The phi nodes in the LOOP header and in update_bb are
1706 organized in the same order.
1708 Assumption 3: The access function of the ivs is simple enough (see
1709 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
1711 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
1712 coming out of LOOP on which the ivs of LOOP are used (this is the path
1713 that leads to the epilog loop; other paths skip the epilog loop). This
1714 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
1715 needs to have its phis updated.
1718 static void
1719 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
1720 edge update_e)
1722 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1723 basic_block exit_bb = single_exit (loop)->dest;
1724 gimple phi, phi1;
1725 gimple_stmt_iterator gsi, gsi1;
1726 basic_block update_bb = update_e->dest;
1728 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
1730 /* Make sure there exists a single-predecessor exit bb: */
1731 gcc_assert (single_pred_p (exit_bb));
1733 for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
1734 !gsi_end_p (gsi) && !gsi_end_p (gsi1);
1735 gsi_next (&gsi), gsi_next (&gsi1))
1737 tree access_fn = NULL;
1738 tree evolution_part;
1739 tree init_expr;
1740 tree step_expr, off;
1741 tree type;
1742 tree var, ni, ni_name;
1743 gimple_stmt_iterator last_gsi;
1745 phi = gsi_stmt (gsi);
1746 phi1 = gsi_stmt (gsi1);
1747 if (vect_print_dump_info (REPORT_DETAILS))
1749 fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: ");
1750 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
1753 /* Skip virtual phi's. */
1754 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
1756 if (vect_print_dump_info (REPORT_DETAILS))
1757 fprintf (vect_dump, "virtual phi. skip.");
1758 continue;
1761 /* Skip reduction phis. */
1762 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1764 if (vect_print_dump_info (REPORT_DETAILS))
1765 fprintf (vect_dump, "reduc phi. skip.");
1766 continue;
1769 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
1770 gcc_assert (access_fn);
1771 /* We can end up with an access_fn like
1772 (short int) {(short unsigned int) i_49, +, 1}_1
1773 for further analysis we need to strip the outer cast but we
1774 need to preserve the original type. */
1775 type = TREE_TYPE (access_fn);
1776 STRIP_NOPS (access_fn);
1777 evolution_part =
1778 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
1779 gcc_assert (evolution_part != NULL_TREE);
1781 /* FORNOW: We do not support IVs whose evolution function is a polynomial
1782 of degree >= 2 or exponential. */
1783 gcc_assert (!tree_is_chrec (evolution_part));
1785 step_expr = evolution_part;
1786 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
1787 loop->num));
1788 init_expr = fold_convert (type, init_expr);
1790 off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
1791 fold_convert (TREE_TYPE (step_expr), niters),
1792 step_expr);
1793 if (POINTER_TYPE_P (TREE_TYPE (init_expr)))
1794 ni = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (init_expr),
1795 init_expr,
1796 fold_convert (sizetype, off));
1797 else
1798 ni = fold_build2 (PLUS_EXPR, TREE_TYPE (init_expr),
1799 init_expr,
1800 fold_convert (TREE_TYPE (init_expr), off));
1802 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
1803 add_referenced_var (var);
1805 last_gsi = gsi_last_bb (exit_bb);
1806 ni_name = force_gimple_operand_gsi (&last_gsi, ni, false, var,
1807 true, GSI_SAME_STMT);
1809 /* Fix phi expressions in the successor bb. */
1810 adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
1814 /* Return the more conservative threshold between the
1815 min_profitable_iters returned by the cost model and the user
1816 specified threshold, if provided. */
1818 static unsigned int
1819 conservative_cost_threshold (loop_vec_info loop_vinfo,
1820 int min_profitable_iters)
1822 unsigned int th;
1823 int min_scalar_loop_bound;
1825 min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
1826 * LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 1);
1828 /* Use the cost model only if it is more conservative than user specified
1829 threshold. */
1830 th = (unsigned) min_scalar_loop_bound;
1831 if (min_profitable_iters
1832 && (!min_scalar_loop_bound
1833 || min_profitable_iters > min_scalar_loop_bound))
1834 th = (unsigned) min_profitable_iters;
1836 if (th && vect_print_dump_info (REPORT_COST))
1837 fprintf (vect_dump, "Profitability threshold is %u loop iterations.", th);
1839 return th;
1842 /* Function vect_do_peeling_for_loop_bound
1844 Peel the last iterations of the loop represented by LOOP_VINFO.
1845 The peeled iterations form a new epilog loop. Given that the loop now
1846 iterates NITERS times, the new epilog loop iterates
1847 NITERS % VECTORIZATION_FACTOR times.
1849 The original loop will later be made to iterate
1850 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).
1852 COND_EXPR and COND_EXPR_STMT_LIST are combined with a new generated
1853 test. */
1855 void
1856 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
1857 tree cond_expr, gimple_seq cond_expr_stmt_list)
1859 tree ni_name, ratio_mult_vf_name;
1860 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1861 struct loop *new_loop;
1862 edge update_e;
1863 basic_block preheader;
1864 int loop_num;
1865 bool check_profitability = false;
1866 unsigned int th = 0;
1867 int min_profitable_iters;
1869 if (vect_print_dump_info (REPORT_DETAILS))
1870 fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
1872 initialize_original_copy_tables ();
1874 /* Generate the following variables on the preheader of original loop:
1876 ni_name = number of iteration the original loop executes
1877 ratio = ni_name / vf
1878 ratio_mult_vf_name = ratio * vf */
1879 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
1880 &ratio_mult_vf_name, ratio,
1881 cond_expr_stmt_list);
1883 loop_num = loop->num;
1885 /* If cost model check not done during versioning and
1886 peeling for alignment. */
1887 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
1888 && !LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo)
1889 && !LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo)
1890 && !cond_expr)
1892 check_profitability = true;
1894 /* Get profitability threshold for vectorized loop. */
1895 min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
1897 th = conservative_cost_threshold (loop_vinfo,
1898 min_profitable_iters);
1901 new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
1902 ratio_mult_vf_name, ni_name, false,
1903 th, check_profitability,
1904 cond_expr, cond_expr_stmt_list);
1905 gcc_assert (new_loop);
1906 gcc_assert (loop_num == loop->num);
1907 #ifdef ENABLE_CHECKING
1908 slpeel_verify_cfg_after_peeling (loop, new_loop);
1909 #endif
1911 /* A guard that controls whether the new_loop is to be executed or skipped
1912 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
1913 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
1914 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
1915 is on the path where the LOOP IVs are used and need to be updated. */
1917 preheader = loop_preheader_edge (new_loop)->src;
1918 if (EDGE_PRED (preheader, 0)->src == single_exit (loop)->dest)
1919 update_e = EDGE_PRED (preheader, 0);
1920 else
1921 update_e = EDGE_PRED (preheader, 1);
1923 /* Update IVs of original loop as if they were advanced
1924 by ratio_mult_vf_name steps. */
1925 vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
1927 /* After peeling we have to reset scalar evolution analyzer. */
1928 scev_reset ();
1930 free_original_copy_tables ();
1934 /* Function vect_gen_niters_for_prolog_loop
1936 Set the number of iterations for the loop represented by LOOP_VINFO
1937 to the minimum between LOOP_NITERS (the original iteration count of the loop)
1938 and the misalignment of DR - the data reference recorded in
1939 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
1940 this loop, the data reference DR will refer to an aligned location.
1942 The following computation is generated:
1944 If the misalignment of DR is known at compile time:
1945 addr_mis = int mis = DR_MISALIGNMENT (dr);
1946 Else, compute address misalignment in bytes:
1947 addr_mis = addr & (vectype_size - 1)
1949 prolog_niters = min (LOOP_NITERS, ((VF - addr_mis/elem_size)&(VF-1))/step)
1951 (elem_size = element type size; an element is the scalar element whose type
1952 is the inner type of the vectype)
1954 When the step of the data-ref in the loop is not 1 (as in interleaved data
1955 and SLP), the number of iterations of the prolog must be divided by the step
1956 (which is equal to the size of interleaved group).
1958 The above formulas assume that VF == number of elements in the vector. This
1959 may not hold when there are multiple-types in the loop.
1960 In this case, for some data-references in the loop the VF does not represent
1961 the number of elements that fit in the vector. Therefore, instead of VF we
1962 use TYPE_VECTOR_SUBPARTS. */
1964 static tree
1965 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters,
1966 tree *wide_prolog_niters)
1968 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
1969 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1970 tree var;
1971 gimple_seq stmts;
1972 tree iters, iters_name;
1973 edge pe;
1974 basic_block new_bb;
1975 gimple dr_stmt = DR_STMT (dr);
1976 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
1977 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1978 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
1979 tree niters_type = TREE_TYPE (loop_niters);
1980 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1982 pe = loop_preheader_edge (loop);
1984 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
1986 int npeel = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
1988 if (vect_print_dump_info (REPORT_DETAILS))
1989 fprintf (vect_dump, "known peeling = %d.", npeel);
1991 iters = build_int_cst (niters_type, npeel);
1993 else
1995 gimple_seq new_stmts = NULL;
1996 tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
1997 &new_stmts, NULL_TREE, loop);
1998 tree ptr_type = TREE_TYPE (start_addr);
1999 tree size = TYPE_SIZE (ptr_type);
2000 tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
2001 tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
2002 tree elem_size_log =
2003 build_int_cst (type, exact_log2 (vectype_align/nelements));
2004 tree nelements_minus_1 = build_int_cst (type, nelements - 1);
2005 tree nelements_tree = build_int_cst (type, nelements);
2006 tree byte_misalign;
2007 tree elem_misalign;
2009 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmts);
2010 gcc_assert (!new_bb);
2012 /* Create: byte_misalign = addr & (vectype_size - 1) */
2013 byte_misalign =
2014 fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
2015 vectype_size_minus_1);
2017 /* Create: elem_misalign = byte_misalign / element_size */
2018 elem_misalign =
2019 fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
2021 /* Create: (niters_type) (nelements - elem_misalign)&(nelements - 1) */
2022 iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
2023 iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
2024 iters = fold_convert (niters_type, iters);
2027 /* Create: prolog_loop_niters = min (iters, loop_niters) */
2028 /* If the loop bound is known at compile time we already verified that it is
2029 greater than vf; since the misalignment ('iters') is at most vf, there's
2030 no need to generate the MIN_EXPR in this case. */
2031 if (TREE_CODE (loop_niters) != INTEGER_CST)
2032 iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters);
2034 if (vect_print_dump_info (REPORT_DETAILS))
2036 fprintf (vect_dump, "niters for prolog loop: ");
2037 print_generic_expr (vect_dump, iters, TDF_SLIM);
2040 var = create_tmp_var (niters_type, "prolog_loop_niters");
2041 add_referenced_var (var);
2042 stmts = NULL;
2043 iters_name = force_gimple_operand (iters, &stmts, false, var);
2044 if (types_compatible_p (sizetype, niters_type))
2045 *wide_prolog_niters = iters_name;
2046 else
2048 gimple_seq seq = NULL;
2049 tree wide_iters = fold_convert (sizetype, iters);
2050 var = create_tmp_var (sizetype, "prolog_loop_niters");
2051 add_referenced_var (var);
2052 *wide_prolog_niters = force_gimple_operand (wide_iters, &seq, false,
2053 var);
2054 if (seq)
2055 gimple_seq_add_seq (&stmts, seq);
2058 /* Insert stmt on loop preheader edge. */
2059 if (stmts)
2061 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2062 gcc_assert (!new_bb);
2065 return iters_name;
2069 /* Function vect_update_init_of_dr
2071 NITERS iterations were peeled from LOOP. DR represents a data reference
2072 in LOOP. This function updates the information recorded in DR to
2073 account for the fact that the first NITERS iterations had already been
2074 executed. Specifically, it updates the OFFSET field of DR. */
2076 static void
2077 vect_update_init_of_dr (struct data_reference *dr, tree niters)
2079 tree offset = DR_OFFSET (dr);
2081 niters = fold_build2 (MULT_EXPR, sizetype,
2082 fold_convert (sizetype, niters),
2083 fold_convert (sizetype, DR_STEP (dr)));
2084 offset = fold_build2 (PLUS_EXPR, sizetype,
2085 fold_convert (sizetype, offset), niters);
2086 DR_OFFSET (dr) = offset;
2090 /* Function vect_update_inits_of_drs
2092 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
2093 This function updates the information recorded for the data references in
2094 the loop to account for the fact that the first NITERS iterations had
2095 already been executed. Specifically, it updates the initial_condition of
2096 the access_function of all the data_references in the loop. */
2098 static void
2099 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
2101 unsigned int i;
2102 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2103 struct data_reference *dr;
2105 if (vect_print_dump_info (REPORT_DETAILS))
2106 fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
2108 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
2109 vect_update_init_of_dr (dr, niters);
2113 /* Function vect_do_peeling_for_alignment
2115 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
2116 'niters' is set to the misalignment of one of the data references in the
2117 loop, thereby forcing it to refer to an aligned location at the beginning
2118 of the execution of this loop. The data reference for which we are
2119 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
2121 void
2122 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo)
2124 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2125 tree niters_of_prolog_loop, ni_name;
2126 tree n_iters;
2127 tree wide_prolog_niters;
2128 struct loop *new_loop;
2129 unsigned int th = 0;
2130 int min_profitable_iters;
2132 if (vect_print_dump_info (REPORT_DETAILS))
2133 fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
2135 initialize_original_copy_tables ();
2137 ni_name = vect_build_loop_niters (loop_vinfo, NULL);
2138 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name,
2139 &wide_prolog_niters);
2142 /* Get profitability threshold for vectorized loop. */
2143 min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
2144 th = conservative_cost_threshold (loop_vinfo,
2145 min_profitable_iters);
2147 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
2148 new_loop =
2149 slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop),
2150 niters_of_prolog_loop, ni_name, true,
2151 th, true, NULL_TREE, NULL);
2153 gcc_assert (new_loop);
2154 #ifdef ENABLE_CHECKING
2155 slpeel_verify_cfg_after_peeling (new_loop, loop);
2156 #endif
2158 /* Update number of times loop executes. */
2159 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
2160 LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
2161 TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
2163 /* Update the init conditions of the access functions of all data refs. */
2164 vect_update_inits_of_drs (loop_vinfo, wide_prolog_niters);
2166 /* After peeling we have to reset scalar evolution analyzer. */
2167 scev_reset ();
2169 free_original_copy_tables ();
2173 /* Function vect_create_cond_for_align_checks.
2175 Create a conditional expression that represents the alignment checks for
2176 all of data references (array element references) whose alignment must be
2177 checked at runtime.
2179 Input:
2180 COND_EXPR - input conditional expression. New conditions will be chained
2181 with logical AND operation.
2182 LOOP_VINFO - two fields of the loop information are used.
2183 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
2184 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
2186 Output:
2187 COND_EXPR_STMT_LIST - statements needed to construct the conditional
2188 expression.
2189 The returned value is the conditional expression to be used in the if
2190 statement that controls which version of the loop gets executed at runtime.
2192 The algorithm makes two assumptions:
2193 1) The number of bytes "n" in a vector is a power of 2.
2194 2) An address "a" is aligned if a%n is zero and that this
2195 test can be done as a&(n-1) == 0. For example, for 16
2196 byte vectors the test is a&0xf == 0. */
2198 static void
2199 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
2200 tree *cond_expr,
2201 gimple_seq *cond_expr_stmt_list)
2203 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2204 VEC(gimple,heap) *may_misalign_stmts
2205 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2206 gimple ref_stmt;
2207 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
2208 tree mask_cst;
2209 unsigned int i;
2210 tree psize;
2211 tree int_ptrsize_type;
2212 char tmp_name[20];
2213 tree or_tmp_name = NULL_TREE;
2214 tree and_tmp, and_tmp_name;
2215 gimple and_stmt;
2216 tree ptrsize_zero;
2217 tree part_cond_expr;
2219 /* Check that mask is one less than a power of 2, i.e., mask is
2220 all zeros followed by all ones. */
2221 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2223 /* CHECKME: what is the best integer or unsigned type to use to hold a
2224 cast from a pointer value? */
2225 psize = TYPE_SIZE (ptr_type_node);
2226 int_ptrsize_type
2227 = lang_hooks.types.type_for_size (tree_low_cst (psize, 1), 0);
2229 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2230 of the first vector of the i'th data reference. */
2232 for (i = 0; VEC_iterate (gimple, may_misalign_stmts, i, ref_stmt); i++)
2234 gimple_seq new_stmt_list = NULL;
2235 tree addr_base;
2236 tree addr_tmp, addr_tmp_name;
2237 tree or_tmp, new_or_tmp_name;
2238 gimple addr_stmt, or_stmt;
2240 /* create: addr_tmp = (int)(address_of_first_vector) */
2241 addr_base =
2242 vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
2243 NULL_TREE, loop);
2244 if (new_stmt_list != NULL)
2245 gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
2247 sprintf (tmp_name, "%s%d", "addr2int", i);
2248 addr_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
2249 add_referenced_var (addr_tmp);
2250 addr_tmp_name = make_ssa_name (addr_tmp, NULL);
2251 addr_stmt = gimple_build_assign_with_ops (NOP_EXPR, addr_tmp_name,
2252 addr_base, NULL_TREE);
2253 SSA_NAME_DEF_STMT (addr_tmp_name) = addr_stmt;
2254 gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
2256 /* The addresses are OR together. */
2258 if (or_tmp_name != NULL_TREE)
2260 /* create: or_tmp = or_tmp | addr_tmp */
2261 sprintf (tmp_name, "%s%d", "orptrs", i);
2262 or_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
2263 add_referenced_var (or_tmp);
2264 new_or_tmp_name = make_ssa_name (or_tmp, NULL);
2265 or_stmt = gimple_build_assign_with_ops (BIT_IOR_EXPR,
2266 new_or_tmp_name,
2267 or_tmp_name, addr_tmp_name);
2268 SSA_NAME_DEF_STMT (new_or_tmp_name) = or_stmt;
2269 gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
2270 or_tmp_name = new_or_tmp_name;
2272 else
2273 or_tmp_name = addr_tmp_name;
2275 } /* end for i */
2277 mask_cst = build_int_cst (int_ptrsize_type, mask);
2279 /* create: and_tmp = or_tmp & mask */
2280 and_tmp = create_tmp_var (int_ptrsize_type, "andmask" );
2281 add_referenced_var (and_tmp);
2282 and_tmp_name = make_ssa_name (and_tmp, NULL);
2284 and_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, and_tmp_name,
2285 or_tmp_name, mask_cst);
2286 SSA_NAME_DEF_STMT (and_tmp_name) = and_stmt;
2287 gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
2289 /* Make and_tmp the left operand of the conditional test against zero.
2290 if and_tmp has a nonzero bit then some address is unaligned. */
2291 ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2292 part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
2293 and_tmp_name, ptrsize_zero);
2294 if (*cond_expr)
2295 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2296 *cond_expr, part_cond_expr);
2297 else
2298 *cond_expr = part_cond_expr;
2302 /* Function vect_vfa_segment_size.
2304 Create an expression that computes the size of segment
2305 that will be accessed for a data reference. The functions takes into
2306 account that realignment loads may access one more vector.
2308 Input:
2309 DR: The data reference.
2310 VECT_FACTOR: vectorization factor.
2312 Return an expression whose value is the size of segment which will be
2313 accessed by DR. */
2315 static tree
2316 vect_vfa_segment_size (struct data_reference *dr, tree vect_factor)
2318 tree segment_length = fold_build2 (MULT_EXPR, integer_type_node,
2319 DR_STEP (dr), vect_factor);
2321 if (vect_supportable_dr_alignment (dr, false)
2322 == dr_explicit_realign_optimized)
2324 tree vector_size = TYPE_SIZE_UNIT
2325 (STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr))));
2327 segment_length = fold_build2 (PLUS_EXPR, integer_type_node,
2328 segment_length, vector_size);
2330 return fold_convert (sizetype, segment_length);
2334 /* Function vect_create_cond_for_alias_checks.
2336 Create a conditional expression that represents the run-time checks for
2337 overlapping of address ranges represented by a list of data references
2338 relations passed as input.
2340 Input:
2341 COND_EXPR - input conditional expression. New conditions will be chained
2342 with logical AND operation.
2343 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2344 to be checked.
2346 Output:
2347 COND_EXPR - conditional expression.
2348 COND_EXPR_STMT_LIST - statements needed to construct the conditional
2349 expression.
2352 The returned value is the conditional expression to be used in the if
2353 statement that controls which version of the loop gets executed at runtime.
2356 static void
2357 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo,
2358 tree * cond_expr,
2359 gimple_seq * cond_expr_stmt_list)
2361 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2362 VEC (ddr_p, heap) * may_alias_ddrs =
2363 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2364 tree vect_factor =
2365 build_int_cst (integer_type_node, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
2367 ddr_p ddr;
2368 unsigned int i;
2369 tree part_cond_expr;
2371 /* Create expression
2372 ((store_ptr_0 + store_segment_length_0) < load_ptr_0)
2373 || (load_ptr_0 + load_segment_length_0) < store_ptr_0))
2377 ((store_ptr_n + store_segment_length_n) < load_ptr_n)
2378 || (load_ptr_n + load_segment_length_n) < store_ptr_n)) */
2380 if (VEC_empty (ddr_p, may_alias_ddrs))
2381 return;
2383 for (i = 0; VEC_iterate (ddr_p, may_alias_ddrs, i, ddr); i++)
2385 struct data_reference *dr_a, *dr_b;
2386 gimple dr_group_first_a, dr_group_first_b;
2387 tree addr_base_a, addr_base_b;
2388 tree segment_length_a, segment_length_b;
2389 gimple stmt_a, stmt_b;
2391 dr_a = DDR_A (ddr);
2392 stmt_a = DR_STMT (DDR_A (ddr));
2393 dr_group_first_a = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_a));
2394 if (dr_group_first_a)
2396 stmt_a = dr_group_first_a;
2397 dr_a = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_a));
2400 dr_b = DDR_B (ddr);
2401 stmt_b = DR_STMT (DDR_B (ddr));
2402 dr_group_first_b = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_b));
2403 if (dr_group_first_b)
2405 stmt_b = dr_group_first_b;
2406 dr_b = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_b));
2409 addr_base_a =
2410 vect_create_addr_base_for_vector_ref (stmt_a, cond_expr_stmt_list,
2411 NULL_TREE, loop);
2412 addr_base_b =
2413 vect_create_addr_base_for_vector_ref (stmt_b, cond_expr_stmt_list,
2414 NULL_TREE, loop);
2416 segment_length_a = vect_vfa_segment_size (dr_a, vect_factor);
2417 segment_length_b = vect_vfa_segment_size (dr_b, vect_factor);
2419 if (vect_print_dump_info (REPORT_DR_DETAILS))
2421 fprintf (vect_dump,
2422 "create runtime check for data references ");
2423 print_generic_expr (vect_dump, DR_REF (dr_a), TDF_SLIM);
2424 fprintf (vect_dump, " and ");
2425 print_generic_expr (vect_dump, DR_REF (dr_b), TDF_SLIM);
2429 part_cond_expr =
2430 fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2431 fold_build2 (LT_EXPR, boolean_type_node,
2432 fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (addr_base_a),
2433 addr_base_a,
2434 segment_length_a),
2435 addr_base_b),
2436 fold_build2 (LT_EXPR, boolean_type_node,
2437 fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (addr_base_b),
2438 addr_base_b,
2439 segment_length_b),
2440 addr_base_a));
2442 if (*cond_expr)
2443 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2444 *cond_expr, part_cond_expr);
2445 else
2446 *cond_expr = part_cond_expr;
2449 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS))
2450 fprintf (vect_dump, "created %u versioning for alias checks.\n",
2451 VEC_length (ddr_p, may_alias_ddrs));
2455 /* Function vect_loop_versioning.
2457 If the loop has data references that may or may not be aligned or/and
2458 has data reference relations whose independence was not proven then
2459 two versions of the loop need to be generated, one which is vectorized
2460 and one which isn't. A test is then generated to control which of the
2461 loops is executed. The test checks for the alignment of all of the
2462 data references that may or may not be aligned. An additional
2463 sequence of runtime tests is generated for each pairs of DDRs whose
2464 independence was not proven. The vectorized version of loop is
2465 executed only if both alias and alignment tests are passed.
2467 The test generated to check which version of loop is executed
2468 is modified to also check for profitability as indicated by the
2469 cost model initially.
2471 The versioning precondition(s) are placed in *COND_EXPR and
2472 *COND_EXPR_STMT_LIST. If DO_VERSIONING is true versioning is
2473 also performed, otherwise only the conditions are generated. */
2475 void
2476 vect_loop_versioning (loop_vec_info loop_vinfo, bool do_versioning,
2477 tree *cond_expr, gimple_seq *cond_expr_stmt_list)
2479 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2480 basic_block condition_bb;
2481 gimple_stmt_iterator gsi, cond_exp_gsi;
2482 basic_block merge_bb;
2483 basic_block new_exit_bb;
2484 edge new_exit_e, e;
2485 gimple orig_phi, new_phi;
2486 tree arg;
2487 unsigned prob = 4 * REG_BR_PROB_BASE / 5;
2488 gimple_seq gimplify_stmt_list = NULL;
2489 tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2490 int min_profitable_iters = 0;
2491 unsigned int th;
2493 /* Get profitability threshold for vectorized loop. */
2494 min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
2496 th = conservative_cost_threshold (loop_vinfo,
2497 min_profitable_iters);
2499 *cond_expr =
2500 fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
2501 build_int_cst (TREE_TYPE (scalar_loop_iters), th));
2503 *cond_expr = force_gimple_operand (*cond_expr, cond_expr_stmt_list,
2504 false, NULL_TREE);
2506 if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
2507 vect_create_cond_for_align_checks (loop_vinfo, cond_expr,
2508 cond_expr_stmt_list);
2510 if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2511 vect_create_cond_for_alias_checks (loop_vinfo, cond_expr,
2512 cond_expr_stmt_list);
2514 *cond_expr =
2515 fold_build2 (NE_EXPR, boolean_type_node, *cond_expr, integer_zero_node);
2516 *cond_expr =
2517 force_gimple_operand (*cond_expr, &gimplify_stmt_list, true, NULL_TREE);
2518 gimple_seq_add_seq (cond_expr_stmt_list, gimplify_stmt_list);
2520 /* If we only needed the extra conditions and a new loop copy
2521 bail out here. */
2522 if (!do_versioning)
2523 return;
2525 initialize_original_copy_tables ();
2526 loop_version (loop, *cond_expr, &condition_bb,
2527 prob, prob, REG_BR_PROB_BASE - prob, true);
2528 free_original_copy_tables();
2530 /* Loop versioning violates an assumption we try to maintain during
2531 vectorization - that the loop exit block has a single predecessor.
2532 After versioning, the exit block of both loop versions is the same
2533 basic block (i.e. it has two predecessors). Just in order to simplify
2534 following transformations in the vectorizer, we fix this situation
2535 here by adding a new (empty) block on the exit-edge of the loop,
2536 with the proper loop-exit phis to maintain loop-closed-form. */
2538 merge_bb = single_exit (loop)->dest;
2539 gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
2540 new_exit_bb = split_edge (single_exit (loop));
2541 new_exit_e = single_exit (loop);
2542 e = EDGE_SUCC (new_exit_bb, 0);
2544 for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2546 orig_phi = gsi_stmt (gsi);
2547 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
2548 new_exit_bb);
2549 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2550 add_phi_arg (new_phi, arg, new_exit_e,
2551 gimple_phi_arg_location_from_edge (orig_phi, e));
2552 adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
2555 /* End loop-exit-fixes after versioning. */
2557 update_ssa (TODO_update_ssa);
2558 if (*cond_expr_stmt_list)
2560 cond_exp_gsi = gsi_last_bb (condition_bb);
2561 gsi_insert_seq_before (&cond_exp_gsi, *cond_expr_stmt_list,
2562 GSI_SAME_STMT);
2563 *cond_expr_stmt_list = NULL;
2565 *cond_expr = NULL_TREE;