2013-02-12 Richard Biener <rguenther@suse.de>
[official-gcc.git] / gcc / tree-vect-loop-manip.c
blob8a8982ad2e0dfba33ded0374c7e1d26400aa25a6
1 /* Vectorizer Specific Loop Manipulations
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "basic-block.h"
30 #include "gimple-pretty-print.h"
31 #include "tree-flow.h"
32 #include "tree-pass.h"
33 #include "cfgloop.h"
34 #include "diagnostic-core.h"
35 #include "tree-scalar-evolution.h"
36 #include "tree-vectorizer.h"
37 #include "langhooks.h"
39 /*************************************************************************
40 Simple Loop Peeling Utilities
42 Utilities to support loop peeling for vectorization purposes.
43 *************************************************************************/
46 /* Renames the use *OP_P. */
48 static void
49 rename_use_op (use_operand_p op_p)
51 tree new_name;
53 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
54 return;
56 new_name = get_current_def (USE_FROM_PTR (op_p));
58 /* Something defined outside of the loop. */
59 if (!new_name)
60 return;
62 /* An ordinary ssa name defined in the loop. */
64 SET_USE (op_p, new_name);
68 /* Renames the variables in basic block BB. */
70 static void
71 rename_variables_in_bb (basic_block bb)
73 gimple_stmt_iterator gsi;
74 gimple stmt;
75 use_operand_p use_p;
76 ssa_op_iter iter;
77 edge e;
78 edge_iterator ei;
79 struct loop *loop = bb->loop_father;
81 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
83 stmt = gsi_stmt (gsi);
84 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
85 rename_use_op (use_p);
88 FOR_EACH_EDGE (e, ei, bb->preds)
90 if (!flow_bb_inside_loop_p (loop, e->src))
91 continue;
92 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
93 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi_stmt (gsi), e));
98 typedef struct
100 tree from, to;
101 basic_block bb;
102 } adjust_info;
104 /* A stack of values to be adjusted in debug stmts. We have to
105 process them LIFO, so that the closest substitution applies. If we
106 processed them FIFO, without the stack, we might substitute uses
107 with a PHI DEF that would soon become non-dominant, and when we got
108 to the suitable one, it wouldn't have anything to substitute any
109 more. */
110 static vec<adjust_info, va_stack> adjust_vec;
112 /* Adjust any debug stmts that referenced AI->from values to use the
113 loop-closed AI->to, if the references are dominated by AI->bb and
114 not by the definition of AI->from. */
116 static void
117 adjust_debug_stmts_now (adjust_info *ai)
119 basic_block bbphi = ai->bb;
120 tree orig_def = ai->from;
121 tree new_def = ai->to;
122 imm_use_iterator imm_iter;
123 gimple stmt;
124 basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
126 gcc_assert (dom_info_available_p (CDI_DOMINATORS));
128 /* Adjust any debug stmts that held onto non-loop-closed
129 references. */
130 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
132 use_operand_p use_p;
133 basic_block bbuse;
135 if (!is_gimple_debug (stmt))
136 continue;
138 gcc_assert (gimple_debug_bind_p (stmt));
140 bbuse = gimple_bb (stmt);
142 if ((bbuse == bbphi
143 || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
144 && !(bbuse == bbdef
145 || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
147 if (new_def)
148 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
149 SET_USE (use_p, new_def);
150 else
152 gimple_debug_bind_reset_value (stmt);
153 update_stmt (stmt);
159 /* Adjust debug stmts as scheduled before. */
161 static void
162 adjust_vec_debug_stmts (void)
164 if (!MAY_HAVE_DEBUG_STMTS)
165 return;
167 gcc_assert (adjust_vec.exists ());
169 while (!adjust_vec.is_empty ())
171 adjust_debug_stmts_now (&adjust_vec.last ());
172 adjust_vec.pop ();
175 adjust_vec.release ();
178 /* Adjust any debug stmts that referenced FROM values to use the
179 loop-closed TO, if the references are dominated by BB and not by
180 the definition of FROM. If adjust_vec is non-NULL, adjustments
181 will be postponed until adjust_vec_debug_stmts is called. */
183 static void
184 adjust_debug_stmts (tree from, tree to, basic_block bb)
186 adjust_info ai;
188 if (MAY_HAVE_DEBUG_STMTS
189 && TREE_CODE (from) == SSA_NAME
190 && ! virtual_operand_p (from))
192 ai.from = from;
193 ai.to = to;
194 ai.bb = bb;
196 if (adjust_vec.exists ())
197 adjust_vec.safe_push (ai);
198 else
199 adjust_debug_stmts_now (&ai);
203 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
204 to adjust any debug stmts that referenced the old phi arg,
205 presumably non-loop-closed references left over from other
206 transformations. */
208 static void
209 adjust_phi_and_debug_stmts (gimple update_phi, edge e, tree new_def)
211 tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
213 SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
215 if (MAY_HAVE_DEBUG_STMTS)
216 adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
217 gimple_bb (update_phi));
221 /* Update PHI nodes for a guard of the LOOP.
223 Input:
224 - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
225 controls whether LOOP is to be executed. GUARD_EDGE is the edge that
226 originates from the guard-bb, skips LOOP and reaches the (unique) exit
227 bb of LOOP. This loop-exit-bb is an empty bb with one successor.
228 We denote this bb NEW_MERGE_BB because before the guard code was added
229 it had a single predecessor (the LOOP header), and now it became a merge
230 point of two paths - the path that ends with the LOOP exit-edge, and
231 the path that ends with GUARD_EDGE.
232 - NEW_EXIT_BB: New basic block that is added by this function between LOOP
233 and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
235 ===> The CFG before the guard-code was added:
236 LOOP_header_bb:
237 loop_body
238 if (exit_loop) goto update_bb
239 else goto LOOP_header_bb
240 update_bb:
242 ==> The CFG after the guard-code was added:
243 guard_bb:
244 if (LOOP_guard_condition) goto new_merge_bb
245 else goto LOOP_header_bb
246 LOOP_header_bb:
247 loop_body
248 if (exit_loop_condition) goto new_merge_bb
249 else goto LOOP_header_bb
250 new_merge_bb:
251 goto update_bb
252 update_bb:
254 ==> The CFG after this function:
255 guard_bb:
256 if (LOOP_guard_condition) goto new_merge_bb
257 else goto LOOP_header_bb
258 LOOP_header_bb:
259 loop_body
260 if (exit_loop_condition) goto new_exit_bb
261 else goto LOOP_header_bb
262 new_exit_bb:
263 new_merge_bb:
264 goto update_bb
265 update_bb:
267 This function:
268 1. creates and updates the relevant phi nodes to account for the new
269 incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
270 1.1. Create phi nodes at NEW_MERGE_BB.
271 1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
272 UPDATE_BB). UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
273 2. preserves loop-closed-ssa-form by creating the required phi nodes
274 at the exit of LOOP (i.e, in NEW_EXIT_BB).
276 There are two flavors to this function:
278 slpeel_update_phi_nodes_for_guard1:
279 Here the guard controls whether we enter or skip LOOP, where LOOP is a
280 prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
281 for variables that have phis in the loop header.
283 slpeel_update_phi_nodes_for_guard2:
284 Here the guard controls whether we enter or skip LOOP, where LOOP is an
285 epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
286 for variables that have phis in the loop exit.
288 I.E., the overall structure is:
290 loop1_preheader_bb:
291 guard1 (goto loop1/merge1_bb)
292 loop1
293 loop1_exit_bb:
294 guard2 (goto merge1_bb/merge2_bb)
295 merge1_bb
296 loop2
297 loop2_exit_bb
298 merge2_bb
299 next_bb
301 slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
302 loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
303 that have phis in loop1->header).
305 slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
306 loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
307 that have phis in next_bb). It also adds some of these phis to
308 loop1_exit_bb.
310 slpeel_update_phi_nodes_for_guard1 is always called before
311 slpeel_update_phi_nodes_for_guard2. They are both needed in order
312 to create correct data-flow and loop-closed-ssa-form.
314 Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
315 that change between iterations of a loop (and therefore have a phi-node
316 at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
317 phis for variables that are used out of the loop (and therefore have
318 loop-closed exit phis). Some variables may be both updated between
319 iterations and used after the loop. This is why in loop1_exit_bb we
320 may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
321 and exit phis (created by slpeel_update_phi_nodes_for_guard2).
323 - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
324 an original loop. i.e., we have:
326 orig_loop
327 guard_bb (goto LOOP/new_merge)
328 new_loop <-- LOOP
329 new_exit
330 new_merge
331 next_bb
333 If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
334 have:
336 new_loop
337 guard_bb (goto LOOP/new_merge)
338 orig_loop <-- LOOP
339 new_exit
340 new_merge
341 next_bb
343 The SSA names defined in the original loop have a current
344 reaching definition that that records the corresponding new
345 ssa-name used in the new duplicated loop copy.
348 /* Function slpeel_update_phi_nodes_for_guard1
350 Input:
351 - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
352 - DEFS - a bitmap of ssa names to mark new names for which we recorded
353 information.
355 In the context of the overall structure, we have:
357 loop1_preheader_bb:
358 guard1 (goto loop1/merge1_bb)
359 LOOP-> loop1
360 loop1_exit_bb:
361 guard2 (goto merge1_bb/merge2_bb)
362 merge1_bb
363 loop2
364 loop2_exit_bb
365 merge2_bb
366 next_bb
368 For each name updated between loop iterations (i.e - for each name that has
369 an entry (loop-header) phi in LOOP) we create a new phi in:
370 1. merge1_bb (to account for the edge from guard1)
371 2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
374 static void
375 slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
376 bool is_new_loop, basic_block *new_exit_bb)
378 gimple orig_phi, new_phi;
379 gimple update_phi, update_phi2;
380 tree guard_arg, loop_arg;
381 basic_block new_merge_bb = guard_edge->dest;
382 edge e = EDGE_SUCC (new_merge_bb, 0);
383 basic_block update_bb = e->dest;
384 basic_block orig_bb = loop->header;
385 edge new_exit_e;
386 tree current_new_name;
387 gimple_stmt_iterator gsi_orig, gsi_update;
389 /* Create new bb between loop and new_merge_bb. */
390 *new_exit_bb = split_edge (single_exit (loop));
392 new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
394 for (gsi_orig = gsi_start_phis (orig_bb),
395 gsi_update = gsi_start_phis (update_bb);
396 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
397 gsi_next (&gsi_orig), gsi_next (&gsi_update))
399 source_location loop_locus, guard_locus;
400 tree new_res;
401 orig_phi = gsi_stmt (gsi_orig);
402 update_phi = gsi_stmt (gsi_update);
404 /** 1. Handle new-merge-point phis **/
406 /* 1.1. Generate new phi node in NEW_MERGE_BB: */
407 new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
408 new_phi = create_phi_node (new_res, new_merge_bb);
410 /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
411 of LOOP. Set the two phi args in NEW_PHI for these edges: */
412 loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
413 loop_locus = gimple_phi_arg_location_from_edge (orig_phi,
414 EDGE_SUCC (loop->latch,
415 0));
416 guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
417 guard_locus
418 = gimple_phi_arg_location_from_edge (orig_phi,
419 loop_preheader_edge (loop));
421 add_phi_arg (new_phi, loop_arg, new_exit_e, loop_locus);
422 add_phi_arg (new_phi, guard_arg, guard_edge, guard_locus);
424 /* 1.3. Update phi in successor block. */
425 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
426 || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
427 adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
428 update_phi2 = new_phi;
431 /** 2. Handle loop-closed-ssa-form phis **/
433 if (virtual_operand_p (PHI_RESULT (orig_phi)))
434 continue;
436 /* 2.1. Generate new phi node in NEW_EXIT_BB: */
437 new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
438 new_phi = create_phi_node (new_res, *new_exit_bb);
440 /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */
441 add_phi_arg (new_phi, loop_arg, single_exit (loop), loop_locus);
443 /* 2.3. Update phi in successor of NEW_EXIT_BB: */
444 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
445 adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
446 PHI_RESULT (new_phi));
448 /* 2.4. Record the newly created name with set_current_def.
449 We want to find a name such that
450 name = get_current_def (orig_loop_name)
451 and to set its current definition as follows:
452 set_current_def (name, new_phi_name)
454 If LOOP is a new loop then loop_arg is already the name we're
455 looking for. If LOOP is the original loop, then loop_arg is
456 the orig_loop_name and the relevant name is recorded in its
457 current reaching definition. */
458 if (is_new_loop)
459 current_new_name = loop_arg;
460 else
462 current_new_name = get_current_def (loop_arg);
463 /* current_def is not available only if the variable does not
464 change inside the loop, in which case we also don't care
465 about recording a current_def for it because we won't be
466 trying to create loop-exit-phis for it. */
467 if (!current_new_name)
468 continue;
470 gcc_assert (get_current_def (current_new_name) == NULL_TREE);
472 set_current_def (current_new_name, PHI_RESULT (new_phi));
477 /* Function slpeel_update_phi_nodes_for_guard2
479 Input:
480 - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
482 In the context of the overall structure, we have:
484 loop1_preheader_bb:
485 guard1 (goto loop1/merge1_bb)
486 loop1
487 loop1_exit_bb:
488 guard2 (goto merge1_bb/merge2_bb)
489 merge1_bb
490 LOOP-> loop2
491 loop2_exit_bb
492 merge2_bb
493 next_bb
495 For each name used out side the loop (i.e - for each name that has an exit
496 phi in next_bb) we create a new phi in:
497 1. merge2_bb (to account for the edge from guard_bb)
498 2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
499 3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
500 if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
503 static void
504 slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
505 bool is_new_loop, basic_block *new_exit_bb)
507 gimple orig_phi, new_phi;
508 gimple update_phi, update_phi2;
509 tree guard_arg, loop_arg;
510 basic_block new_merge_bb = guard_edge->dest;
511 edge e = EDGE_SUCC (new_merge_bb, 0);
512 basic_block update_bb = e->dest;
513 edge new_exit_e;
514 tree orig_def, orig_def_new_name;
515 tree new_name, new_name2;
516 tree arg;
517 gimple_stmt_iterator gsi;
519 /* Create new bb between loop and new_merge_bb. */
520 *new_exit_bb = split_edge (single_exit (loop));
522 new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
524 for (gsi = gsi_start_phis (update_bb); !gsi_end_p (gsi); gsi_next (&gsi))
526 tree new_res;
527 update_phi = gsi_stmt (gsi);
528 orig_phi = update_phi;
529 orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
530 /* This loop-closed-phi actually doesn't represent a use
531 out of the loop - the phi arg is a constant. */
532 if (TREE_CODE (orig_def) != SSA_NAME)
533 continue;
534 orig_def_new_name = get_current_def (orig_def);
535 arg = NULL_TREE;
537 /** 1. Handle new-merge-point phis **/
539 /* 1.1. Generate new phi node in NEW_MERGE_BB: */
540 new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
541 new_phi = create_phi_node (new_res, new_merge_bb);
543 /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
544 of LOOP. Set the two PHI args in NEW_PHI for these edges: */
545 new_name = orig_def;
546 new_name2 = NULL_TREE;
547 if (orig_def_new_name)
549 new_name = orig_def_new_name;
550 /* Some variables have both loop-entry-phis and loop-exit-phis.
551 Such variables were given yet newer names by phis placed in
552 guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
553 new_name2 = get_current_def (get_current_def (orig_name)). */
554 new_name2 = get_current_def (new_name);
557 if (is_new_loop)
559 guard_arg = orig_def;
560 loop_arg = new_name;
562 else
564 guard_arg = new_name;
565 loop_arg = orig_def;
567 if (new_name2)
568 guard_arg = new_name2;
570 add_phi_arg (new_phi, loop_arg, new_exit_e, UNKNOWN_LOCATION);
571 add_phi_arg (new_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
573 /* 1.3. Update phi in successor block. */
574 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
575 adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
576 update_phi2 = new_phi;
579 /** 2. Handle loop-closed-ssa-form phis **/
581 /* 2.1. Generate new phi node in NEW_EXIT_BB: */
582 new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
583 new_phi = create_phi_node (new_res, *new_exit_bb);
585 /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */
586 add_phi_arg (new_phi, loop_arg, single_exit (loop), UNKNOWN_LOCATION);
588 /* 2.3. Update phi in successor of NEW_EXIT_BB: */
589 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
590 adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
591 PHI_RESULT (new_phi));
594 /** 3. Handle loop-closed-ssa-form phis for first loop **/
596 /* 3.1. Find the relevant names that need an exit-phi in
597 GUARD_BB, i.e. names for which
598 slpeel_update_phi_nodes_for_guard1 had not already created a
599 phi node. This is the case for names that are used outside
600 the loop (and therefore need an exit phi) but are not updated
601 across loop iterations (and therefore don't have a
602 loop-header-phi).
604 slpeel_update_phi_nodes_for_guard1 is responsible for
605 creating loop-exit phis in GUARD_BB for names that have a
606 loop-header-phi. When such a phi is created we also record
607 the new name in its current definition. If this new name
608 exists, then guard_arg was set to this new name (see 1.2
609 above). Therefore, if guard_arg is not this new name, this
610 is an indication that an exit-phi in GUARD_BB was not yet
611 created, so we take care of it here. */
612 if (guard_arg == new_name2)
613 continue;
614 arg = guard_arg;
616 /* 3.2. Generate new phi node in GUARD_BB: */
617 new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
618 new_phi = create_phi_node (new_res, guard_edge->src);
620 /* 3.3. GUARD_BB has one incoming edge: */
621 gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
622 add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0),
623 UNKNOWN_LOCATION);
625 /* 3.4. Update phi in successor of GUARD_BB: */
626 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
627 == guard_arg);
628 adjust_phi_and_debug_stmts (update_phi2, guard_edge,
629 PHI_RESULT (new_phi));
634 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
635 that starts at zero, increases by one and its limit is NITERS.
637 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
639 void
640 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
642 tree indx_before_incr, indx_after_incr;
643 gimple cond_stmt;
644 gimple orig_cond;
645 edge exit_edge = single_exit (loop);
646 gimple_stmt_iterator loop_cond_gsi;
647 gimple_stmt_iterator incr_gsi;
648 bool insert_after;
649 tree init = build_int_cst (TREE_TYPE (niters), 0);
650 tree step = build_int_cst (TREE_TYPE (niters), 1);
651 LOC loop_loc;
652 enum tree_code code;
654 orig_cond = get_loop_exit_condition (loop);
655 gcc_assert (orig_cond);
656 loop_cond_gsi = gsi_for_stmt (orig_cond);
658 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
659 create_iv (init, step, NULL_TREE, loop,
660 &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
662 indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
663 true, NULL_TREE, true,
664 GSI_SAME_STMT);
665 niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
666 true, GSI_SAME_STMT);
668 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
669 cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
670 NULL_TREE);
672 gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
674 /* Remove old loop exit test: */
675 gsi_remove (&loop_cond_gsi, true);
676 free_stmt_vec_info (orig_cond);
678 loop_loc = find_loop_location (loop);
679 if (dump_enabled_p ())
681 if (LOCATION_LOCUS (loop_loc) != UNKNOWN_LOC)
682 dump_printf (MSG_NOTE, "\nloop at %s:%d: ", LOC_FILE (loop_loc),
683 LOC_LINE (loop_loc));
684 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, cond_stmt, 0);
686 loop->nb_iterations = niters;
690 /* Given LOOP this function generates a new copy of it and puts it
691 on E which is either the entry or exit of LOOP. */
693 struct loop *
694 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, edge e)
696 struct loop *new_loop;
697 basic_block *new_bbs, *bbs;
698 bool at_exit;
699 bool was_imm_dom;
700 basic_block exit_dest;
701 edge exit, new_exit;
703 exit = single_exit (loop);
704 at_exit = (e == exit);
705 if (!at_exit && e != loop_preheader_edge (loop))
706 return NULL;
708 bbs = XNEWVEC (basic_block, loop->num_nodes + 1);
709 get_loop_body_with_size (loop, bbs, loop->num_nodes);
711 /* Check whether duplication is possible. */
712 if (!can_copy_bbs_p (bbs, loop->num_nodes))
714 free (bbs);
715 return NULL;
718 /* Generate new loop structure. */
719 new_loop = duplicate_loop (loop, loop_outer (loop));
720 duplicate_subloops (loop, new_loop);
722 exit_dest = exit->dest;
723 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
724 exit_dest) == loop->header ?
725 true : false);
727 /* Also copy the pre-header, this avoids jumping through hoops to
728 duplicate the loop entry PHI arguments. Create an empty
729 pre-header unconditionally for this. */
730 basic_block preheader = split_edge (loop_preheader_edge (loop));
731 edge entry_e = single_pred_edge (preheader);
732 bbs[loop->num_nodes] = preheader;
733 new_bbs = XNEWVEC (basic_block, loop->num_nodes + 1);
735 copy_bbs (bbs, loop->num_nodes + 1, new_bbs,
736 &exit, 1, &new_exit, NULL,
737 e->src);
738 basic_block new_preheader = new_bbs[loop->num_nodes];
740 add_phi_args_after_copy (new_bbs, loop->num_nodes + 1, NULL);
742 if (at_exit) /* Add the loop copy at exit. */
744 redirect_edge_and_branch_force (e, new_preheader);
745 flush_pending_stmts (e);
746 set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
747 if (was_imm_dom)
748 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
750 /* And remove the non-necessary forwarder again. Keep the other
751 one so we have a proper pre-header for the loop at the exit edge. */
752 redirect_edge_pred (single_succ_edge (preheader), single_pred (preheader));
753 delete_basic_block (preheader);
754 set_immediate_dominator (CDI_DOMINATORS, loop->header,
755 loop_preheader_edge (loop)->src);
757 else /* Add the copy at entry. */
759 redirect_edge_and_branch_force (entry_e, new_preheader);
760 flush_pending_stmts (entry_e);
761 set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
763 redirect_edge_and_branch_force (new_exit, preheader);
764 flush_pending_stmts (new_exit);
765 set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
767 /* And remove the non-necessary forwarder again. Keep the other
768 one so we have a proper pre-header for the loop at the exit edge. */
769 redirect_edge_pred (single_succ_edge (new_preheader), single_pred (new_preheader));
770 delete_basic_block (new_preheader);
771 set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
772 loop_preheader_edge (new_loop)->src);
775 for (unsigned i = 0; i < loop->num_nodes+1; i++)
776 rename_variables_in_bb (new_bbs[i]);
778 free (new_bbs);
779 free (bbs);
781 #ifdef ENABLE_CHECKING
782 verify_dominators (CDI_DOMINATORS);
783 #endif
785 return new_loop;
789 /* Given the condition statement COND, put it as the last statement
790 of GUARD_BB; EXIT_BB is the basic block to skip the loop;
791 Assumes that this is the single exit of the guarded loop.
792 Returns the skip edge, inserts new stmts on the COND_EXPR_STMT_LIST. */
794 static edge
795 slpeel_add_loop_guard (basic_block guard_bb, tree cond,
796 gimple_seq cond_expr_stmt_list,
797 basic_block exit_bb, basic_block dom_bb,
798 int probability)
800 gimple_stmt_iterator gsi;
801 edge new_e, enter_e;
802 gimple cond_stmt;
803 gimple_seq gimplify_stmt_list = NULL;
805 enter_e = EDGE_SUCC (guard_bb, 0);
806 enter_e->flags &= ~EDGE_FALLTHRU;
807 enter_e->flags |= EDGE_FALSE_VALUE;
808 gsi = gsi_last_bb (guard_bb);
810 cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
811 NULL_TREE);
812 if (gimplify_stmt_list)
813 gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
814 cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
815 if (cond_expr_stmt_list)
816 gsi_insert_seq_after (&gsi, cond_expr_stmt_list, GSI_NEW_STMT);
818 gsi = gsi_last_bb (guard_bb);
819 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
821 /* Add new edge to connect guard block to the merge/loop-exit block. */
822 new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
824 new_e->count = guard_bb->count;
825 new_e->probability = probability;
826 new_e->count = apply_probability (enter_e->count, probability);
827 enter_e->count -= new_e->count;
828 enter_e->probability = inverse_probability (probability);
829 set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
830 return new_e;
834 /* This function verifies that the following restrictions apply to LOOP:
835 (1) it is innermost
836 (2) it consists of exactly 2 basic blocks - header, and an empty latch.
837 (3) it is single entry, single exit
838 (4) its exit condition is the last stmt in the header
839 (5) E is the entry/exit edge of LOOP.
842 bool
843 slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
845 edge exit_e = single_exit (loop);
846 edge entry_e = loop_preheader_edge (loop);
847 gimple orig_cond = get_loop_exit_condition (loop);
848 gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
850 if (need_ssa_update_p (cfun))
851 return false;
853 if (loop->inner
854 /* All loops have an outer scope; the only case loop->outer is NULL is for
855 the function itself. */
856 || !loop_outer (loop)
857 || loop->num_nodes != 2
858 || !empty_block_p (loop->latch)
859 || !single_exit (loop)
860 /* Verify that new loop exit condition can be trivially modified. */
861 || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
862 || (e != exit_e && e != entry_e))
863 return false;
865 return true;
868 #ifdef ENABLE_CHECKING
869 static void
870 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
871 struct loop *second_loop)
873 basic_block loop1_exit_bb = single_exit (first_loop)->dest;
874 basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
875 basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
877 /* A guard that controls whether the second_loop is to be executed or skipped
878 is placed in first_loop->exit. first_loop->exit therefore has two
879 successors - one is the preheader of second_loop, and the other is a bb
880 after second_loop.
882 gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
884 /* 1. Verify that one of the successors of first_loop->exit is the preheader
885 of second_loop. */
887 /* The preheader of new_loop is expected to have two predecessors:
888 first_loop->exit and the block that precedes first_loop. */
890 gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
891 && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
892 && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
893 || (EDGE_PRED (loop2_entry_bb, 1)->src == loop1_exit_bb
894 && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
896 /* Verify that the other successor of first_loop->exit is after the
897 second_loop. */
898 /* TODO */
900 #endif
902 /* If the run time cost model check determines that vectorization is
903 not profitable and hence scalar loop should be generated then set
904 FIRST_NITERS to prologue peeled iterations. This will allow all the
905 iterations to be executed in the prologue peeled scalar loop. */
907 static void
908 set_prologue_iterations (basic_block bb_before_first_loop,
909 tree *first_niters,
910 struct loop *loop,
911 unsigned int th,
912 int probability)
914 edge e;
915 basic_block cond_bb, then_bb;
916 tree var, prologue_after_cost_adjust_name;
917 gimple_stmt_iterator gsi;
918 gimple newphi;
919 edge e_true, e_false, e_fallthru;
920 gimple cond_stmt;
921 gimple_seq stmts = NULL;
922 tree cost_pre_condition = NULL_TREE;
923 tree scalar_loop_iters =
924 unshare_expr (LOOP_VINFO_NITERS_UNCHANGED (loop_vec_info_for_loop (loop)));
926 e = single_pred_edge (bb_before_first_loop);
927 cond_bb = split_edge(e);
929 e = single_pred_edge (bb_before_first_loop);
930 then_bb = split_edge(e);
931 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
933 e_false = make_single_succ_edge (cond_bb, bb_before_first_loop,
934 EDGE_FALSE_VALUE);
935 set_immediate_dominator (CDI_DOMINATORS, bb_before_first_loop, cond_bb);
937 e_true = EDGE_PRED (then_bb, 0);
938 e_true->flags &= ~EDGE_FALLTHRU;
939 e_true->flags |= EDGE_TRUE_VALUE;
941 e_true->probability = probability;
942 e_false->probability = inverse_probability (probability);
943 e_true->count = apply_probability (cond_bb->count, probability);
944 e_false->count = cond_bb->count - e_true->count;
945 then_bb->frequency = EDGE_FREQUENCY (e_true);
946 then_bb->count = e_true->count;
948 e_fallthru = EDGE_SUCC (then_bb, 0);
949 e_fallthru->count = then_bb->count;
951 gsi = gsi_last_bb (cond_bb);
952 cost_pre_condition =
953 fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
954 build_int_cst (TREE_TYPE (scalar_loop_iters), th));
955 cost_pre_condition =
956 force_gimple_operand_gsi_1 (&gsi, cost_pre_condition, is_gimple_condexpr,
957 NULL_TREE, false, GSI_CONTINUE_LINKING);
958 cond_stmt = gimple_build_cond_from_tree (cost_pre_condition,
959 NULL_TREE, NULL_TREE);
960 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
962 var = create_tmp_var (TREE_TYPE (scalar_loop_iters),
963 "prologue_after_cost_adjust");
964 prologue_after_cost_adjust_name =
965 force_gimple_operand (scalar_loop_iters, &stmts, false, var);
967 gsi = gsi_last_bb (then_bb);
968 if (stmts)
969 gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
971 newphi = create_phi_node (var, bb_before_first_loop);
972 add_phi_arg (newphi, prologue_after_cost_adjust_name, e_fallthru,
973 UNKNOWN_LOCATION);
974 add_phi_arg (newphi, *first_niters, e_false, UNKNOWN_LOCATION);
976 *first_niters = PHI_RESULT (newphi);
979 /* Function slpeel_tree_peel_loop_to_edge.
981 Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
982 that is placed on the entry (exit) edge E of LOOP. After this transformation
983 we have two loops one after the other - first-loop iterates FIRST_NITERS
984 times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
985 If the cost model indicates that it is profitable to emit a scalar
986 loop instead of the vector one, then the prolog (epilog) loop will iterate
987 for the entire unchanged scalar iterations of the loop.
989 Input:
990 - LOOP: the loop to be peeled.
991 - E: the exit or entry edge of LOOP.
992 If it is the entry edge, we peel the first iterations of LOOP. In this
993 case first-loop is LOOP, and second-loop is the newly created loop.
994 If it is the exit edge, we peel the last iterations of LOOP. In this
995 case, first-loop is the newly created loop, and second-loop is LOOP.
996 - NITERS: the number of iterations that LOOP iterates.
997 - FIRST_NITERS: the number of iterations that the first-loop should iterate.
998 - UPDATE_FIRST_LOOP_COUNT: specified whether this function is responsible
999 for updating the loop bound of the first-loop to FIRST_NITERS. If it
1000 is false, the caller of this function may want to take care of this
1001 (this can be useful if we don't want new stmts added to first-loop).
1002 - TH: cost model profitability threshold of iterations for vectorization.
1003 - CHECK_PROFITABILITY: specify whether cost model check has not occurred
1004 during versioning and hence needs to occur during
1005 prologue generation or whether cost model check
1006 has not occurred during prologue generation and hence
1007 needs to occur during epilogue generation.
1008 - BOUND1 is the upper bound on number of iterations of the first loop (if known)
1009 - BOUND2 is the upper bound on number of iterations of the second loop (if known)
1012 Output:
1013 The function returns a pointer to the new loop-copy, or NULL if it failed
1014 to perform the transformation.
1016 The function generates two if-then-else guards: one before the first loop,
1017 and the other before the second loop:
1018 The first guard is:
1019 if (FIRST_NITERS == 0) then skip the first loop,
1020 and go directly to the second loop.
1021 The second guard is:
1022 if (FIRST_NITERS == NITERS) then skip the second loop.
1024 If the optional COND_EXPR and COND_EXPR_STMT_LIST arguments are given
1025 then the generated condition is combined with COND_EXPR and the
1026 statements in COND_EXPR_STMT_LIST are emitted together with it.
1028 FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1029 FORNOW the resulting code will not be in loop-closed-ssa form.
1032 static struct loop*
1033 slpeel_tree_peel_loop_to_edge (struct loop *loop,
1034 edge e, tree *first_niters,
1035 tree niters, bool update_first_loop_count,
1036 unsigned int th, bool check_profitability,
1037 tree cond_expr, gimple_seq cond_expr_stmt_list,
1038 int bound1, int bound2)
1040 struct loop *new_loop = NULL, *first_loop, *second_loop;
1041 edge skip_e;
1042 tree pre_condition = NULL_TREE;
1043 basic_block bb_before_second_loop, bb_after_second_loop;
1044 basic_block bb_before_first_loop;
1045 basic_block bb_between_loops;
1046 basic_block new_exit_bb;
1047 gimple_stmt_iterator gsi;
1048 edge exit_e = single_exit (loop);
1049 LOC loop_loc;
1050 tree cost_pre_condition = NULL_TREE;
1051 /* There are many aspects to how likely the first loop is going to be executed.
1052 Without histogram we can't really do good job. Simply set it to
1053 2/3, so the first loop is not reordered to the end of function and
1054 the hot path through stays short. */
1055 int first_guard_probability = 2 * REG_BR_PROB_BASE / 3;
1056 int second_guard_probability = 2 * REG_BR_PROB_BASE / 3;
1057 int probability_of_second_loop;
1059 if (!slpeel_can_duplicate_loop_p (loop, e))
1060 return NULL;
1062 /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
1063 in the exit bb and rename all the uses after the loop. This simplifies
1064 the *guard[12] routines, which assume loop closed SSA form for all PHIs
1065 (but normally loop closed SSA form doesn't require virtual PHIs to be
1066 in the same form). Doing this early simplifies the checking what
1067 uses should be renamed. */
1068 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1069 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
1071 gimple phi = gsi_stmt (gsi);
1072 for (gsi = gsi_start_phis (exit_e->dest);
1073 !gsi_end_p (gsi); gsi_next (&gsi))
1074 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
1075 break;
1076 if (gsi_end_p (gsi))
1078 tree new_vop = copy_ssa_name (PHI_RESULT (phi), NULL);
1079 gimple new_phi = create_phi_node (new_vop, exit_e->dest);
1080 tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
1081 imm_use_iterator imm_iter;
1082 gimple stmt;
1083 use_operand_p use_p;
1085 add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
1086 gimple_phi_set_result (new_phi, new_vop);
1087 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
1088 if (stmt != new_phi && gimple_bb (stmt) != loop->header)
1089 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1090 SET_USE (use_p, new_vop);
1092 break;
1095 /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1096 Resulting CFG would be:
1098 first_loop:
1099 do {
1100 } while ...
1102 second_loop:
1103 do {
1104 } while ...
1106 orig_exit_bb:
1109 if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, e)))
1111 loop_loc = find_loop_location (loop);
1112 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1113 "tree_duplicate_loop_to_edge_cfg failed.\n");
1114 return NULL;
1117 if (MAY_HAVE_DEBUG_STMTS)
1119 gcc_assert (!adjust_vec.exists ());
1120 vec_stack_alloc (adjust_info, adjust_vec, 32);
1123 if (e == exit_e)
1125 /* NEW_LOOP was placed after LOOP. */
1126 first_loop = loop;
1127 second_loop = new_loop;
1129 else
1131 /* NEW_LOOP was placed before LOOP. */
1132 first_loop = new_loop;
1133 second_loop = loop;
1136 /* 2. Add the guard code in one of the following ways:
1138 2.a Add the guard that controls whether the first loop is executed.
1139 This occurs when this function is invoked for prologue or epilogue
1140 generation and when the cost model check can be done at compile time.
1142 Resulting CFG would be:
1144 bb_before_first_loop:
1145 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1146 GOTO first-loop
1148 first_loop:
1149 do {
1150 } while ...
1152 bb_before_second_loop:
1154 second_loop:
1155 do {
1156 } while ...
1158 orig_exit_bb:
1160 2.b Add the cost model check that allows the prologue
1161 to iterate for the entire unchanged scalar
1162 iterations of the loop in the event that the cost
1163 model indicates that the scalar loop is more
1164 profitable than the vector one. This occurs when
1165 this function is invoked for prologue generation
1166 and the cost model check needs to be done at run
1167 time.
1169 Resulting CFG after prologue peeling would be:
1171 if (scalar_loop_iterations <= th)
1172 FIRST_NITERS = scalar_loop_iterations
1174 bb_before_first_loop:
1175 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1176 GOTO first-loop
1178 first_loop:
1179 do {
1180 } while ...
1182 bb_before_second_loop:
1184 second_loop:
1185 do {
1186 } while ...
1188 orig_exit_bb:
1190 2.c Add the cost model check that allows the epilogue
1191 to iterate for the entire unchanged scalar
1192 iterations of the loop in the event that the cost
1193 model indicates that the scalar loop is more
1194 profitable than the vector one. This occurs when
1195 this function is invoked for epilogue generation
1196 and the cost model check needs to be done at run
1197 time. This check is combined with any pre-existing
1198 check in COND_EXPR to avoid versioning.
1200 Resulting CFG after prologue peeling would be:
1202 bb_before_first_loop:
1203 if ((scalar_loop_iterations <= th)
1205 FIRST_NITERS == 0) GOTO bb_before_second_loop
1206 GOTO first-loop
1208 first_loop:
1209 do {
1210 } while ...
1212 bb_before_second_loop:
1214 second_loop:
1215 do {
1216 } while ...
1218 orig_exit_bb:
1221 bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1222 /* Loop copying insterted a forwarder block for us here. */
1223 bb_before_second_loop = single_exit (first_loop)->dest;
1225 probability_of_second_loop = (inverse_probability (first_guard_probability)
1226 + combine_probabilities (second_guard_probability,
1227 first_guard_probability));
1228 /* Theoretically preheader edge of first loop and exit edge should have
1229 same frequencies. Loop exit probablities are however easy to get wrong.
1230 It is safer to copy value from original loop entry. */
1231 bb_before_second_loop->frequency
1232 = apply_probability (bb_before_first_loop->frequency,
1233 probability_of_second_loop);
1234 bb_before_second_loop->count
1235 = apply_probability (bb_before_first_loop->count,
1236 probability_of_second_loop);
1237 single_succ_edge (bb_before_second_loop)->count
1238 = bb_before_second_loop->count;
1240 /* Epilogue peeling. */
1241 if (!update_first_loop_count)
1243 pre_condition =
1244 fold_build2 (LE_EXPR, boolean_type_node, *first_niters,
1245 build_int_cst (TREE_TYPE (*first_niters), 0));
1246 if (check_profitability)
1248 tree scalar_loop_iters
1249 = unshare_expr (LOOP_VINFO_NITERS_UNCHANGED
1250 (loop_vec_info_for_loop (loop)));
1251 cost_pre_condition =
1252 fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
1253 build_int_cst (TREE_TYPE (scalar_loop_iters), th));
1255 pre_condition = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1256 cost_pre_condition, pre_condition);
1258 if (cond_expr)
1260 pre_condition =
1261 fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1262 pre_condition,
1263 fold_build1 (TRUTH_NOT_EXPR, boolean_type_node,
1264 cond_expr));
1268 /* Prologue peeling. */
1269 else
1271 if (check_profitability)
1272 set_prologue_iterations (bb_before_first_loop, first_niters,
1273 loop, th, first_guard_probability);
1275 pre_condition =
1276 fold_build2 (LE_EXPR, boolean_type_node, *first_niters,
1277 build_int_cst (TREE_TYPE (*first_niters), 0));
1280 skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1281 cond_expr_stmt_list,
1282 bb_before_second_loop, bb_before_first_loop,
1283 inverse_probability (first_guard_probability));
1284 scale_loop_profile (first_loop, first_guard_probability,
1285 check_profitability && (int)th > bound1 ? th : bound1);
1286 slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
1287 first_loop == new_loop,
1288 &new_exit_bb);
1291 /* 3. Add the guard that controls whether the second loop is executed.
1292 Resulting CFG would be:
1294 bb_before_first_loop:
1295 if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1296 GOTO first-loop
1298 first_loop:
1299 do {
1300 } while ...
1302 bb_between_loops:
1303 if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1304 GOTO bb_before_second_loop
1306 bb_before_second_loop:
1308 second_loop:
1309 do {
1310 } while ...
1312 bb_after_second_loop:
1314 orig_exit_bb:
1317 bb_between_loops = new_exit_bb;
1318 bb_after_second_loop = split_edge (single_exit (second_loop));
1320 pre_condition =
1321 fold_build2 (EQ_EXPR, boolean_type_node, *first_niters, niters);
1322 skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, NULL,
1323 bb_after_second_loop, bb_before_first_loop,
1324 inverse_probability (second_guard_probability));
1325 scale_loop_profile (second_loop, probability_of_second_loop, bound2);
1326 slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
1327 second_loop == new_loop, &new_exit_bb);
1329 /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1331 if (update_first_loop_count)
1332 slpeel_make_loop_iterate_ntimes (first_loop, *first_niters);
1334 delete_update_ssa ();
1336 adjust_vec_debug_stmts ();
1338 return new_loop;
1341 /* Function vect_get_loop_location.
1343 Extract the location of the loop in the source code.
1344 If the loop is not well formed for vectorization, an estimated
1345 location is calculated.
1346 Return the loop location if succeed and NULL if not. */
1349 find_loop_location (struct loop *loop)
1351 gimple stmt = NULL;
1352 basic_block bb;
1353 gimple_stmt_iterator si;
1355 if (!loop)
1356 return UNKNOWN_LOC;
1358 stmt = get_loop_exit_condition (loop);
1360 if (stmt
1361 && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
1362 return gimple_location (stmt);
1364 /* If we got here the loop is probably not "well formed",
1365 try to estimate the loop location */
1367 if (!loop->header)
1368 return UNKNOWN_LOC;
1370 bb = loop->header;
1372 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1374 stmt = gsi_stmt (si);
1375 if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
1376 return gimple_location (stmt);
1379 return UNKNOWN_LOC;
1383 /* This function builds ni_name = number of iterations loop executes
1384 on the loop preheader. If SEQ is given the stmt is instead emitted
1385 there. */
1387 static tree
1388 vect_build_loop_niters (loop_vec_info loop_vinfo, gimple_seq seq)
1390 tree ni_name, var;
1391 gimple_seq stmts = NULL;
1392 edge pe;
1393 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1394 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1396 var = create_tmp_var (TREE_TYPE (ni), "niters");
1397 ni_name = force_gimple_operand (ni, &stmts, false, var);
1399 pe = loop_preheader_edge (loop);
1400 if (stmts)
1402 if (seq)
1403 gimple_seq_add_seq (&seq, stmts);
1404 else
1406 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1407 gcc_assert (!new_bb);
1411 return ni_name;
1415 /* This function generates the following statements:
1417 ni_name = number of iterations loop executes
1418 ratio = ni_name / vf
1419 ratio_mult_vf_name = ratio * vf
1421 and places them at the loop preheader edge or in COND_EXPR_STMT_LIST
1422 if that is non-NULL. */
1424 static void
1425 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
1426 tree *ni_name_ptr,
1427 tree *ratio_mult_vf_name_ptr,
1428 tree *ratio_name_ptr,
1429 gimple_seq cond_expr_stmt_list)
1432 edge pe;
1433 basic_block new_bb;
1434 gimple_seq stmts;
1435 tree ni_name, ni_minus_gap_name;
1436 tree var;
1437 tree ratio_name;
1438 tree ratio_mult_vf_name;
1439 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1440 tree ni = LOOP_VINFO_NITERS (loop_vinfo);
1441 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1442 tree log_vf;
1444 pe = loop_preheader_edge (loop);
1446 /* Generate temporary variable that contains
1447 number of iterations loop executes. */
1449 ni_name = vect_build_loop_niters (loop_vinfo, cond_expr_stmt_list);
1450 log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
1452 /* If epilogue loop is required because of data accesses with gaps, we
1453 subtract one iteration from the total number of iterations here for
1454 correct calculation of RATIO. */
1455 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1457 ni_minus_gap_name = fold_build2 (MINUS_EXPR, TREE_TYPE (ni_name),
1458 ni_name,
1459 build_one_cst (TREE_TYPE (ni_name)));
1460 if (!is_gimple_val (ni_minus_gap_name))
1462 var = create_tmp_var (TREE_TYPE (ni), "ni_gap");
1464 stmts = NULL;
1465 ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts,
1466 true, var);
1467 if (cond_expr_stmt_list)
1468 gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
1469 else
1471 pe = loop_preheader_edge (loop);
1472 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1473 gcc_assert (!new_bb);
1477 else
1478 ni_minus_gap_name = ni_name;
1480 /* Create: ratio = ni >> log2(vf) */
1482 ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_minus_gap_name),
1483 ni_minus_gap_name, log_vf);
1484 if (!is_gimple_val (ratio_name))
1486 var = create_tmp_var (TREE_TYPE (ni), "bnd");
1488 stmts = NULL;
1489 ratio_name = force_gimple_operand (ratio_name, &stmts, true, var);
1490 if (cond_expr_stmt_list)
1491 gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
1492 else
1494 pe = loop_preheader_edge (loop);
1495 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1496 gcc_assert (!new_bb);
1500 /* Create: ratio_mult_vf = ratio << log2 (vf). */
1502 ratio_mult_vf_name = fold_build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name),
1503 ratio_name, log_vf);
1504 if (!is_gimple_val (ratio_mult_vf_name))
1506 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
1508 stmts = NULL;
1509 ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts,
1510 true, var);
1511 if (cond_expr_stmt_list)
1512 gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
1513 else
1515 pe = loop_preheader_edge (loop);
1516 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1517 gcc_assert (!new_bb);
1521 *ni_name_ptr = ni_name;
1522 *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
1523 *ratio_name_ptr = ratio_name;
1525 return;
1528 /* Function vect_can_advance_ivs_p
1530 In case the number of iterations that LOOP iterates is unknown at compile
1531 time, an epilog loop will be generated, and the loop induction variables
1532 (IVs) will be "advanced" to the value they are supposed to take just before
1533 the epilog loop. Here we check that the access function of the loop IVs
1534 and the expression that represents the loop bound are simple enough.
1535 These restrictions will be relaxed in the future. */
1537 bool
1538 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1540 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1541 basic_block bb = loop->header;
1542 gimple phi;
1543 gimple_stmt_iterator gsi;
1545 /* Analyze phi functions of the loop header. */
1547 if (dump_enabled_p ())
1548 dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:");
1549 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1551 tree access_fn = NULL;
1552 tree evolution_part;
1554 phi = gsi_stmt (gsi);
1555 if (dump_enabled_p ())
1557 dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
1558 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
1561 /* Skip virtual phi's. The data dependences that are associated with
1562 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
1564 if (virtual_operand_p (PHI_RESULT (phi)))
1566 if (dump_enabled_p ())
1567 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1568 "virtual phi. skip.");
1569 continue;
1572 /* Skip reduction phis. */
1574 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1576 if (dump_enabled_p ())
1577 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1578 "reduc phi. skip.");
1579 continue;
1582 /* Analyze the evolution function. */
1584 access_fn = instantiate_parameters
1585 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1587 if (!access_fn)
1589 if (dump_enabled_p ())
1590 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1591 "No Access function.");
1592 return false;
1595 STRIP_NOPS (access_fn);
1596 if (dump_enabled_p ())
1598 dump_printf_loc (MSG_NOTE, vect_location,
1599 "Access function of PHI: ");
1600 dump_generic_expr (MSG_NOTE, TDF_SLIM, access_fn);
1603 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
1605 if (evolution_part == NULL_TREE)
1607 if (dump_enabled_p ())
1608 dump_printf (MSG_MISSED_OPTIMIZATION, "No evolution.");
1609 return false;
1612 /* FORNOW: We do not transform initial conditions of IVs
1613 which evolution functions are a polynomial of degree >= 2. */
1615 if (tree_is_chrec (evolution_part))
1616 return false;
1619 return true;
1623 /* Function vect_update_ivs_after_vectorizer.
1625 "Advance" the induction variables of LOOP to the value they should take
1626 after the execution of LOOP. This is currently necessary because the
1627 vectorizer does not handle induction variables that are used after the
1628 loop. Such a situation occurs when the last iterations of LOOP are
1629 peeled, because:
1630 1. We introduced new uses after LOOP for IVs that were not originally used
1631 after LOOP: the IVs of LOOP are now used by an epilog loop.
1632 2. LOOP is going to be vectorized; this means that it will iterate N/VF
1633 times, whereas the loop IVs should be bumped N times.
1635 Input:
1636 - LOOP - a loop that is going to be vectorized. The last few iterations
1637 of LOOP were peeled.
1638 - NITERS - the number of iterations that LOOP executes (before it is
1639 vectorized). i.e, the number of times the ivs should be bumped.
1640 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
1641 coming out from LOOP on which there are uses of the LOOP ivs
1642 (this is the path from LOOP->exit to epilog_loop->preheader).
1644 The new definitions of the ivs are placed in LOOP->exit.
1645 The phi args associated with the edge UPDATE_E in the bb
1646 UPDATE_E->dest are updated accordingly.
1648 Assumption 1: Like the rest of the vectorizer, this function assumes
1649 a single loop exit that has a single predecessor.
1651 Assumption 2: The phi nodes in the LOOP header and in update_bb are
1652 organized in the same order.
1654 Assumption 3: The access function of the ivs is simple enough (see
1655 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
1657 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
1658 coming out of LOOP on which the ivs of LOOP are used (this is the path
1659 that leads to the epilog loop; other paths skip the epilog loop). This
1660 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
1661 needs to have its phis updated.
1664 static void
1665 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
1666 edge update_e)
1668 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1669 basic_block exit_bb = single_exit (loop)->dest;
1670 gimple phi, phi1;
1671 gimple_stmt_iterator gsi, gsi1;
1672 basic_block update_bb = update_e->dest;
1674 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
1676 /* Make sure there exists a single-predecessor exit bb: */
1677 gcc_assert (single_pred_p (exit_bb));
1679 for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
1680 !gsi_end_p (gsi) && !gsi_end_p (gsi1);
1681 gsi_next (&gsi), gsi_next (&gsi1))
1683 tree init_expr;
1684 tree step_expr, off;
1685 tree type;
1686 tree var, ni, ni_name;
1687 gimple_stmt_iterator last_gsi;
1688 stmt_vec_info stmt_info;
1690 phi = gsi_stmt (gsi);
1691 phi1 = gsi_stmt (gsi1);
1692 if (dump_enabled_p ())
1694 dump_printf_loc (MSG_NOTE, vect_location,
1695 "vect_update_ivs_after_vectorizer: phi: ");
1696 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
1699 /* Skip virtual phi's. */
1700 if (virtual_operand_p (PHI_RESULT (phi)))
1702 if (dump_enabled_p ())
1703 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1704 "virtual phi. skip.");
1705 continue;
1708 /* Skip reduction phis. */
1709 stmt_info = vinfo_for_stmt (phi);
1710 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
1712 if (dump_enabled_p ())
1713 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1714 "reduc phi. skip.");
1715 continue;
1718 type = TREE_TYPE (gimple_phi_result (phi));
1719 step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
1720 step_expr = unshare_expr (step_expr);
1722 /* FORNOW: We do not support IVs whose evolution function is a polynomial
1723 of degree >= 2 or exponential. */
1724 gcc_assert (!tree_is_chrec (step_expr));
1726 init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1728 off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
1729 fold_convert (TREE_TYPE (step_expr), niters),
1730 step_expr);
1731 if (POINTER_TYPE_P (type))
1732 ni = fold_build_pointer_plus (init_expr, off);
1733 else
1734 ni = fold_build2 (PLUS_EXPR, type,
1735 init_expr, fold_convert (type, off));
1737 var = create_tmp_var (type, "tmp");
1739 last_gsi = gsi_last_bb (exit_bb);
1740 ni_name = force_gimple_operand_gsi (&last_gsi, ni, false, var,
1741 true, GSI_SAME_STMT);
1743 /* Fix phi expressions in the successor bb. */
1744 adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
1748 /* Function vect_do_peeling_for_loop_bound
1750 Peel the last iterations of the loop represented by LOOP_VINFO.
1751 The peeled iterations form a new epilog loop. Given that the loop now
1752 iterates NITERS times, the new epilog loop iterates
1753 NITERS % VECTORIZATION_FACTOR times.
1755 The original loop will later be made to iterate
1756 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).
1758 COND_EXPR and COND_EXPR_STMT_LIST are combined with a new generated
1759 test. */
1761 void
1762 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
1763 unsigned int th, bool check_profitability)
1765 tree ni_name, ratio_mult_vf_name;
1766 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1767 struct loop *new_loop;
1768 edge update_e;
1769 basic_block preheader;
1770 int loop_num;
1771 int max_iter;
1772 tree cond_expr = NULL_TREE;
1773 gimple_seq cond_expr_stmt_list = NULL;
1775 if (dump_enabled_p ())
1776 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1777 "=== vect_do_peeling_for_loop_bound ===");
1779 initialize_original_copy_tables ();
1781 /* Generate the following variables on the preheader of original loop:
1783 ni_name = number of iteration the original loop executes
1784 ratio = ni_name / vf
1785 ratio_mult_vf_name = ratio * vf */
1786 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
1787 &ratio_mult_vf_name, ratio,
1788 cond_expr_stmt_list);
1790 loop_num = loop->num;
1792 new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
1793 &ratio_mult_vf_name, ni_name, false,
1794 th, check_profitability,
1795 cond_expr, cond_expr_stmt_list,
1796 0, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
1797 gcc_assert (new_loop);
1798 gcc_assert (loop_num == loop->num);
1799 #ifdef ENABLE_CHECKING
1800 slpeel_verify_cfg_after_peeling (loop, new_loop);
1801 #endif
1803 /* A guard that controls whether the new_loop is to be executed or skipped
1804 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
1805 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
1806 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
1807 is on the path where the LOOP IVs are used and need to be updated. */
1809 preheader = loop_preheader_edge (new_loop)->src;
1810 if (EDGE_PRED (preheader, 0)->src == single_exit (loop)->dest)
1811 update_e = EDGE_PRED (preheader, 0);
1812 else
1813 update_e = EDGE_PRED (preheader, 1);
1815 /* Update IVs of original loop as if they were advanced
1816 by ratio_mult_vf_name steps. */
1817 vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
1819 /* For vectorization factor N, we need to copy last N-1 values in epilogue
1820 and this means N-2 loopback edge executions.
1822 PEELING_FOR_GAPS works by subtracting last iteration and thus the epilogue
1823 will execute at least LOOP_VINFO_VECT_FACTOR times. */
1824 max_iter = (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
1825 ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) * 2
1826 : LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 2;
1827 if (check_profitability)
1828 max_iter = MAX (max_iter, (int) th - 1);
1829 record_niter_bound (new_loop, double_int::from_shwi (max_iter), false, true);
1830 dump_printf (MSG_OPTIMIZED_LOCATIONS,
1831 "Setting upper bound of nb iterations for epilogue "
1832 "loop to %d\n", max_iter);
1834 /* After peeling we have to reset scalar evolution analyzer. */
1835 scev_reset ();
1837 free_original_copy_tables ();
1841 /* Function vect_gen_niters_for_prolog_loop
1843 Set the number of iterations for the loop represented by LOOP_VINFO
1844 to the minimum between LOOP_NITERS (the original iteration count of the loop)
1845 and the misalignment of DR - the data reference recorded in
1846 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
1847 this loop, the data reference DR will refer to an aligned location.
1849 The following computation is generated:
1851 If the misalignment of DR is known at compile time:
1852 addr_mis = int mis = DR_MISALIGNMENT (dr);
1853 Else, compute address misalignment in bytes:
1854 addr_mis = addr & (vectype_align - 1)
1856 prolog_niters = min (LOOP_NITERS, ((VF - addr_mis/elem_size)&(VF-1))/step)
1858 (elem_size = element type size; an element is the scalar element whose type
1859 is the inner type of the vectype)
1861 When the step of the data-ref in the loop is not 1 (as in interleaved data
1862 and SLP), the number of iterations of the prolog must be divided by the step
1863 (which is equal to the size of interleaved group).
1865 The above formulas assume that VF == number of elements in the vector. This
1866 may not hold when there are multiple-types in the loop.
1867 In this case, for some data-references in the loop the VF does not represent
1868 the number of elements that fit in the vector. Therefore, instead of VF we
1869 use TYPE_VECTOR_SUBPARTS. */
1871 static tree
1872 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters, int *bound)
1874 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
1875 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1876 tree var;
1877 gimple_seq stmts;
1878 tree iters, iters_name;
1879 edge pe;
1880 basic_block new_bb;
1881 gimple dr_stmt = DR_STMT (dr);
1882 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
1883 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1884 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
1885 tree niters_type = TREE_TYPE (loop_niters);
1886 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1888 pe = loop_preheader_edge (loop);
1890 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
1892 int npeel = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
1894 if (dump_enabled_p ())
1895 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1896 "known peeling = %d.", npeel);
1898 iters = build_int_cst (niters_type, npeel);
1899 *bound = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
1901 else
1903 gimple_seq new_stmts = NULL;
1904 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
1905 tree offset = negative
1906 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : NULL_TREE;
1907 tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
1908 &new_stmts, offset, loop);
1909 tree type = unsigned_type_for (TREE_TYPE (start_addr));
1910 tree vectype_align_minus_1 = build_int_cst (type, vectype_align - 1);
1911 HOST_WIDE_INT elem_size =
1912 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1913 tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
1914 tree nelements_minus_1 = build_int_cst (type, nelements - 1);
1915 tree nelements_tree = build_int_cst (type, nelements);
1916 tree byte_misalign;
1917 tree elem_misalign;
1919 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmts);
1920 gcc_assert (!new_bb);
1922 /* Create: byte_misalign = addr & (vectype_align - 1) */
1923 byte_misalign =
1924 fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
1925 vectype_align_minus_1);
1927 /* Create: elem_misalign = byte_misalign / element_size */
1928 elem_misalign =
1929 fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
1931 /* Create: (niters_type) (nelements - elem_misalign)&(nelements - 1) */
1932 if (negative)
1933 iters = fold_build2 (MINUS_EXPR, type, elem_misalign, nelements_tree);
1934 else
1935 iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
1936 iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
1937 iters = fold_convert (niters_type, iters);
1938 *bound = nelements;
1941 /* Create: prolog_loop_niters = min (iters, loop_niters) */
1942 /* If the loop bound is known at compile time we already verified that it is
1943 greater than vf; since the misalignment ('iters') is at most vf, there's
1944 no need to generate the MIN_EXPR in this case. */
1945 if (TREE_CODE (loop_niters) != INTEGER_CST)
1946 iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters);
1948 if (dump_enabled_p ())
1950 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1951 "niters for prolog loop: ");
1952 dump_generic_expr (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, iters);
1955 var = create_tmp_var (niters_type, "prolog_loop_niters");
1956 stmts = NULL;
1957 iters_name = force_gimple_operand (iters, &stmts, false, var);
1959 /* Insert stmt on loop preheader edge. */
1960 if (stmts)
1962 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1963 gcc_assert (!new_bb);
1966 return iters_name;
1970 /* Function vect_update_init_of_dr
1972 NITERS iterations were peeled from LOOP. DR represents a data reference
1973 in LOOP. This function updates the information recorded in DR to
1974 account for the fact that the first NITERS iterations had already been
1975 executed. Specifically, it updates the OFFSET field of DR. */
1977 static void
1978 vect_update_init_of_dr (struct data_reference *dr, tree niters)
1980 tree offset = DR_OFFSET (dr);
1982 niters = fold_build2 (MULT_EXPR, sizetype,
1983 fold_convert (sizetype, niters),
1984 fold_convert (sizetype, DR_STEP (dr)));
1985 offset = fold_build2 (PLUS_EXPR, sizetype,
1986 fold_convert (sizetype, offset), niters);
1987 DR_OFFSET (dr) = offset;
1991 /* Function vect_update_inits_of_drs
1993 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1994 This function updates the information recorded for the data references in
1995 the loop to account for the fact that the first NITERS iterations had
1996 already been executed. Specifically, it updates the initial_condition of
1997 the access_function of all the data_references in the loop. */
1999 static void
2000 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
2002 unsigned int i;
2003 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2004 struct data_reference *dr;
2006 if (dump_enabled_p ())
2007 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2008 "=== vect_update_inits_of_dr ===");
2010 FOR_EACH_VEC_ELT (datarefs, i, dr)
2011 vect_update_init_of_dr (dr, niters);
2015 /* Function vect_do_peeling_for_alignment
2017 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
2018 'niters' is set to the misalignment of one of the data references in the
2019 loop, thereby forcing it to refer to an aligned location at the beginning
2020 of the execution of this loop. The data reference for which we are
2021 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
2023 void
2024 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo,
2025 unsigned int th, bool check_profitability)
2027 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2028 tree niters_of_prolog_loop, ni_name;
2029 tree n_iters;
2030 tree wide_prolog_niters;
2031 struct loop *new_loop;
2032 int max_iter;
2033 int bound = 0;
2035 if (dump_enabled_p ())
2036 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2037 "=== vect_do_peeling_for_alignment ===");
2039 initialize_original_copy_tables ();
2041 ni_name = vect_build_loop_niters (loop_vinfo, NULL);
2042 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo,
2043 ni_name,
2044 &bound);
2046 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
2047 new_loop =
2048 slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop),
2049 &niters_of_prolog_loop, ni_name, true,
2050 th, check_profitability, NULL_TREE, NULL,
2051 bound,
2054 gcc_assert (new_loop);
2055 #ifdef ENABLE_CHECKING
2056 slpeel_verify_cfg_after_peeling (new_loop, loop);
2057 #endif
2058 /* For vectorization factor N, we need to copy at most N-1 values
2059 for alignment and this means N-2 loopback edge executions. */
2060 max_iter = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 2;
2061 if (check_profitability)
2062 max_iter = MAX (max_iter, (int) th - 1);
2063 record_niter_bound (new_loop, double_int::from_shwi (max_iter), false, true);
2064 dump_printf (MSG_OPTIMIZED_LOCATIONS,
2065 "Setting upper bound of nb iterations for prologue "
2066 "loop to %d\n", max_iter);
2068 /* Update number of times loop executes. */
2069 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
2070 LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
2071 TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
2073 if (types_compatible_p (sizetype, TREE_TYPE (niters_of_prolog_loop)))
2074 wide_prolog_niters = niters_of_prolog_loop;
2075 else
2077 gimple_seq seq = NULL;
2078 edge pe = loop_preheader_edge (loop);
2079 tree wide_iters = fold_convert (sizetype, niters_of_prolog_loop);
2080 tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
2081 wide_prolog_niters = force_gimple_operand (wide_iters, &seq, false,
2082 var);
2083 if (seq)
2085 /* Insert stmt on loop preheader edge. */
2086 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2087 gcc_assert (!new_bb);
2091 /* Update the init conditions of the access functions of all data refs. */
2092 vect_update_inits_of_drs (loop_vinfo, wide_prolog_niters);
2094 /* After peeling we have to reset scalar evolution analyzer. */
2095 scev_reset ();
2097 free_original_copy_tables ();
2101 /* Function vect_create_cond_for_align_checks.
2103 Create a conditional expression that represents the alignment checks for
2104 all of data references (array element references) whose alignment must be
2105 checked at runtime.
2107 Input:
2108 COND_EXPR - input conditional expression. New conditions will be chained
2109 with logical AND operation.
2110 LOOP_VINFO - two fields of the loop information are used.
2111 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
2112 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
2114 Output:
2115 COND_EXPR_STMT_LIST - statements needed to construct the conditional
2116 expression.
2117 The returned value is the conditional expression to be used in the if
2118 statement that controls which version of the loop gets executed at runtime.
2120 The algorithm makes two assumptions:
2121 1) The number of bytes "n" in a vector is a power of 2.
2122 2) An address "a" is aligned if a%n is zero and that this
2123 test can be done as a&(n-1) == 0. For example, for 16
2124 byte vectors the test is a&0xf == 0. */
2126 static void
2127 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
2128 tree *cond_expr,
2129 gimple_seq *cond_expr_stmt_list)
2131 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2132 vec<gimple> may_misalign_stmts
2133 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2134 gimple ref_stmt;
2135 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
2136 tree mask_cst;
2137 unsigned int i;
2138 tree int_ptrsize_type;
2139 char tmp_name[20];
2140 tree or_tmp_name = NULL_TREE;
2141 tree and_tmp_name;
2142 gimple and_stmt;
2143 tree ptrsize_zero;
2144 tree part_cond_expr;
2146 /* Check that mask is one less than a power of 2, i.e., mask is
2147 all zeros followed by all ones. */
2148 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2150 int_ptrsize_type = signed_type_for (ptr_type_node);
2152 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2153 of the first vector of the i'th data reference. */
2155 FOR_EACH_VEC_ELT (may_misalign_stmts, i, ref_stmt)
2157 gimple_seq new_stmt_list = NULL;
2158 tree addr_base;
2159 tree addr_tmp_name;
2160 tree new_or_tmp_name;
2161 gimple addr_stmt, or_stmt;
2162 stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
2163 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2164 bool negative = tree_int_cst_compare
2165 (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
2166 tree offset = negative
2167 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : NULL_TREE;
2169 /* create: addr_tmp = (int)(address_of_first_vector) */
2170 addr_base =
2171 vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
2172 offset, loop);
2173 if (new_stmt_list != NULL)
2174 gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
2176 sprintf (tmp_name, "addr2int%d", i);
2177 addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2178 addr_stmt = gimple_build_assign_with_ops (NOP_EXPR, addr_tmp_name,
2179 addr_base, NULL_TREE);
2180 gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
2182 /* The addresses are OR together. */
2184 if (or_tmp_name != NULL_TREE)
2186 /* create: or_tmp = or_tmp | addr_tmp */
2187 sprintf (tmp_name, "orptrs%d", i);
2188 new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2189 or_stmt = gimple_build_assign_with_ops (BIT_IOR_EXPR,
2190 new_or_tmp_name,
2191 or_tmp_name, addr_tmp_name);
2192 gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
2193 or_tmp_name = new_or_tmp_name;
2195 else
2196 or_tmp_name = addr_tmp_name;
2198 } /* end for i */
2200 mask_cst = build_int_cst (int_ptrsize_type, mask);
2202 /* create: and_tmp = or_tmp & mask */
2203 and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
2205 and_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, and_tmp_name,
2206 or_tmp_name, mask_cst);
2207 gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
2209 /* Make and_tmp the left operand of the conditional test against zero.
2210 if and_tmp has a nonzero bit then some address is unaligned. */
2211 ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2212 part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
2213 and_tmp_name, ptrsize_zero);
2214 if (*cond_expr)
2215 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2216 *cond_expr, part_cond_expr);
2217 else
2218 *cond_expr = part_cond_expr;
2222 /* Function vect_vfa_segment_size.
2224 Create an expression that computes the size of segment
2225 that will be accessed for a data reference. The functions takes into
2226 account that realignment loads may access one more vector.
2228 Input:
2229 DR: The data reference.
2230 LENGTH_FACTOR: segment length to consider.
2232 Return an expression whose value is the size of segment which will be
2233 accessed by DR. */
2235 static tree
2236 vect_vfa_segment_size (struct data_reference *dr, tree length_factor)
2238 tree segment_length;
2240 if (integer_zerop (DR_STEP (dr)))
2241 segment_length = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
2242 else
2243 segment_length = size_binop (MULT_EXPR,
2244 fold_convert (sizetype, DR_STEP (dr)),
2245 fold_convert (sizetype, length_factor));
2247 if (vect_supportable_dr_alignment (dr, false)
2248 == dr_explicit_realign_optimized)
2250 tree vector_size = TYPE_SIZE_UNIT
2251 (STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr))));
2253 segment_length = size_binop (PLUS_EXPR, segment_length, vector_size);
2255 return segment_length;
2259 /* Function vect_create_cond_for_alias_checks.
2261 Create a conditional expression that represents the run-time checks for
2262 overlapping of address ranges represented by a list of data references
2263 relations passed as input.
2265 Input:
2266 COND_EXPR - input conditional expression. New conditions will be chained
2267 with logical AND operation.
2268 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2269 to be checked.
2271 Output:
2272 COND_EXPR - conditional expression.
2273 COND_EXPR_STMT_LIST - statements needed to construct the conditional
2274 expression.
2277 The returned value is the conditional expression to be used in the if
2278 statement that controls which version of the loop gets executed at runtime.
2281 static void
2282 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo,
2283 tree * cond_expr,
2284 gimple_seq * cond_expr_stmt_list)
2286 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2287 vec<ddr_p> may_alias_ddrs =
2288 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2289 int vect_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2290 tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2292 ddr_p ddr;
2293 unsigned int i;
2294 tree part_cond_expr, length_factor;
2296 /* Create expression
2297 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
2298 || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
2302 ((store_ptr_n + store_segment_length_n) <= load_ptr_n)
2303 || (load_ptr_n + load_segment_length_n) <= store_ptr_n)) */
2305 if (may_alias_ddrs.is_empty ())
2306 return;
2308 FOR_EACH_VEC_ELT (may_alias_ddrs, i, ddr)
2310 struct data_reference *dr_a, *dr_b;
2311 gimple dr_group_first_a, dr_group_first_b;
2312 tree addr_base_a, addr_base_b;
2313 tree segment_length_a, segment_length_b;
2314 gimple stmt_a, stmt_b;
2315 tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
2317 dr_a = DDR_A (ddr);
2318 stmt_a = DR_STMT (DDR_A (ddr));
2319 dr_group_first_a = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_a));
2320 if (dr_group_first_a)
2322 stmt_a = dr_group_first_a;
2323 dr_a = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_a));
2326 dr_b = DDR_B (ddr);
2327 stmt_b = DR_STMT (DDR_B (ddr));
2328 dr_group_first_b = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_b));
2329 if (dr_group_first_b)
2331 stmt_b = dr_group_first_b;
2332 dr_b = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_b));
2335 addr_base_a =
2336 vect_create_addr_base_for_vector_ref (stmt_a, cond_expr_stmt_list,
2337 NULL_TREE, loop);
2338 addr_base_b =
2339 vect_create_addr_base_for_vector_ref (stmt_b, cond_expr_stmt_list,
2340 NULL_TREE, loop);
2342 if (!operand_equal_p (DR_STEP (dr_a), DR_STEP (dr_b), 0))
2343 length_factor = scalar_loop_iters;
2344 else
2345 length_factor = size_int (vect_factor);
2346 segment_length_a = vect_vfa_segment_size (dr_a, length_factor);
2347 segment_length_b = vect_vfa_segment_size (dr_b, length_factor);
2349 if (dump_enabled_p ())
2351 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2352 "create runtime check for data references ");
2353 dump_generic_expr (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, DR_REF (dr_a));
2354 dump_printf (MSG_OPTIMIZED_LOCATIONS, " and ");
2355 dump_generic_expr (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, DR_REF (dr_b));
2358 seg_a_min = addr_base_a;
2359 seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
2360 if (tree_int_cst_compare (DR_STEP (dr_a), size_zero_node) < 0)
2361 seg_a_min = seg_a_max, seg_a_max = addr_base_a;
2363 seg_b_min = addr_base_b;
2364 seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
2365 if (tree_int_cst_compare (DR_STEP (dr_b), size_zero_node) < 0)
2366 seg_b_min = seg_b_max, seg_b_max = addr_base_b;
2368 part_cond_expr =
2369 fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2370 fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
2371 fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
2373 if (*cond_expr)
2374 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2375 *cond_expr, part_cond_expr);
2376 else
2377 *cond_expr = part_cond_expr;
2380 if (dump_enabled_p ())
2381 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2382 "created %u versioning for alias checks.\n",
2383 may_alias_ddrs.length ());
2387 /* Function vect_loop_versioning.
2389 If the loop has data references that may or may not be aligned or/and
2390 has data reference relations whose independence was not proven then
2391 two versions of the loop need to be generated, one which is vectorized
2392 and one which isn't. A test is then generated to control which of the
2393 loops is executed. The test checks for the alignment of all of the
2394 data references that may or may not be aligned. An additional
2395 sequence of runtime tests is generated for each pairs of DDRs whose
2396 independence was not proven. The vectorized version of loop is
2397 executed only if both alias and alignment tests are passed.
2399 The test generated to check which version of loop is executed
2400 is modified to also check for profitability as indicated by the
2401 cost model initially.
2403 The versioning precondition(s) are placed in *COND_EXPR and
2404 *COND_EXPR_STMT_LIST. */
2406 void
2407 vect_loop_versioning (loop_vec_info loop_vinfo,
2408 unsigned int th, bool check_profitability)
2410 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2411 basic_block condition_bb;
2412 gimple_stmt_iterator gsi, cond_exp_gsi;
2413 basic_block merge_bb;
2414 basic_block new_exit_bb;
2415 edge new_exit_e, e;
2416 gimple orig_phi, new_phi;
2417 tree cond_expr = NULL_TREE;
2418 gimple_seq cond_expr_stmt_list = NULL;
2419 tree arg;
2420 unsigned prob = 4 * REG_BR_PROB_BASE / 5;
2421 gimple_seq gimplify_stmt_list = NULL;
2422 tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2424 if (check_profitability)
2426 cond_expr = fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
2427 build_int_cst (TREE_TYPE (scalar_loop_iters), th));
2428 cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
2429 is_gimple_condexpr, NULL_TREE);
2432 if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
2433 vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
2434 &cond_expr_stmt_list);
2436 if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2437 vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr,
2438 &cond_expr_stmt_list);
2440 cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,
2441 is_gimple_condexpr, NULL_TREE);
2442 gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
2444 initialize_original_copy_tables ();
2445 loop_version (loop, cond_expr, &condition_bb,
2446 prob, prob, REG_BR_PROB_BASE - prob, true);
2447 free_original_copy_tables();
2449 /* Loop versioning violates an assumption we try to maintain during
2450 vectorization - that the loop exit block has a single predecessor.
2451 After versioning, the exit block of both loop versions is the same
2452 basic block (i.e. it has two predecessors). Just in order to simplify
2453 following transformations in the vectorizer, we fix this situation
2454 here by adding a new (empty) block on the exit-edge of the loop,
2455 with the proper loop-exit phis to maintain loop-closed-form. */
2457 merge_bb = single_exit (loop)->dest;
2458 gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
2459 new_exit_bb = split_edge (single_exit (loop));
2460 new_exit_e = single_exit (loop);
2461 e = EDGE_SUCC (new_exit_bb, 0);
2463 for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2465 tree new_res;
2466 orig_phi = gsi_stmt (gsi);
2467 new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
2468 new_phi = create_phi_node (new_res, new_exit_bb);
2469 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2470 add_phi_arg (new_phi, arg, new_exit_e,
2471 gimple_phi_arg_location_from_edge (orig_phi, e));
2472 adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
2475 /* End loop-exit-fixes after versioning. */
2477 update_ssa (TODO_update_ssa);
2478 if (cond_expr_stmt_list)
2480 cond_exp_gsi = gsi_last_bb (condition_bb);
2481 gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
2482 GSI_SAME_STMT);