2013-01-12 Janus Weil <janus@gcc.gnu.org>
[official-gcc.git] / gcc / tree-vect-loop-manip.c
blob6b6e130159e77dfcde6f8180974a176c843a2e36
1 /* Vectorizer Specific Loop Manipulations
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "basic-block.h"
30 #include "gimple-pretty-print.h"
31 #include "tree-flow.h"
32 #include "tree-pass.h"
33 #include "cfgloop.h"
34 #include "diagnostic-core.h"
35 #include "tree-scalar-evolution.h"
36 #include "tree-vectorizer.h"
37 #include "langhooks.h"
39 /*************************************************************************
40 Simple Loop Peeling Utilities
42 Utilities to support loop peeling for vectorization purposes.
43 *************************************************************************/
46 /* Renames the use *OP_P. */
48 static void
49 rename_use_op (use_operand_p op_p)
51 tree new_name;
53 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
54 return;
56 new_name = get_current_def (USE_FROM_PTR (op_p));
58 /* Something defined outside of the loop. */
59 if (!new_name)
60 return;
62 /* An ordinary ssa name defined in the loop. */
64 SET_USE (op_p, new_name);
68 /* Renames the variables in basic block BB. */
70 void
71 rename_variables_in_bb (basic_block bb)
73 gimple_stmt_iterator gsi;
74 gimple stmt;
75 use_operand_p use_p;
76 ssa_op_iter iter;
77 edge e;
78 edge_iterator ei;
79 struct loop *loop = bb->loop_father;
81 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
83 stmt = gsi_stmt (gsi);
84 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
85 rename_use_op (use_p);
88 FOR_EACH_EDGE (e, ei, bb->succs)
90 if (!flow_bb_inside_loop_p (loop, e->dest))
91 continue;
92 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
93 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi_stmt (gsi), e));
98 /* Renames variables in new generated LOOP. */
100 void
101 rename_variables_in_loop (struct loop *loop)
103 unsigned i;
104 basic_block *bbs;
106 bbs = get_loop_body (loop);
108 for (i = 0; i < loop->num_nodes; i++)
109 rename_variables_in_bb (bbs[i]);
111 free (bbs);
114 typedef struct
116 tree from, to;
117 basic_block bb;
118 } adjust_info;
120 /* A stack of values to be adjusted in debug stmts. We have to
121 process them LIFO, so that the closest substitution applies. If we
122 processed them FIFO, without the stack, we might substitute uses
123 with a PHI DEF that would soon become non-dominant, and when we got
124 to the suitable one, it wouldn't have anything to substitute any
125 more. */
126 static vec<adjust_info, va_stack> adjust_vec;
128 /* Adjust any debug stmts that referenced AI->from values to use the
129 loop-closed AI->to, if the references are dominated by AI->bb and
130 not by the definition of AI->from. */
132 static void
133 adjust_debug_stmts_now (adjust_info *ai)
135 basic_block bbphi = ai->bb;
136 tree orig_def = ai->from;
137 tree new_def = ai->to;
138 imm_use_iterator imm_iter;
139 gimple stmt;
140 basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
142 gcc_assert (dom_info_available_p (CDI_DOMINATORS));
144 /* Adjust any debug stmts that held onto non-loop-closed
145 references. */
146 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
148 use_operand_p use_p;
149 basic_block bbuse;
151 if (!is_gimple_debug (stmt))
152 continue;
154 gcc_assert (gimple_debug_bind_p (stmt));
156 bbuse = gimple_bb (stmt);
158 if ((bbuse == bbphi
159 || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
160 && !(bbuse == bbdef
161 || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
163 if (new_def)
164 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
165 SET_USE (use_p, new_def);
166 else
168 gimple_debug_bind_reset_value (stmt);
169 update_stmt (stmt);
175 /* Adjust debug stmts as scheduled before. */
177 static void
178 adjust_vec_debug_stmts (void)
180 if (!MAY_HAVE_DEBUG_STMTS)
181 return;
183 gcc_assert (adjust_vec.exists ());
185 while (!adjust_vec.is_empty ())
187 adjust_debug_stmts_now (&adjust_vec.last ());
188 adjust_vec.pop ();
191 adjust_vec.release ();
194 /* Adjust any debug stmts that referenced FROM values to use the
195 loop-closed TO, if the references are dominated by BB and not by
196 the definition of FROM. If adjust_vec is non-NULL, adjustments
197 will be postponed until adjust_vec_debug_stmts is called. */
199 static void
200 adjust_debug_stmts (tree from, tree to, basic_block bb)
202 adjust_info ai;
204 if (MAY_HAVE_DEBUG_STMTS
205 && TREE_CODE (from) == SSA_NAME
206 && ! virtual_operand_p (from))
208 ai.from = from;
209 ai.to = to;
210 ai.bb = bb;
212 if (adjust_vec.exists ())
213 adjust_vec.safe_push (ai);
214 else
215 adjust_debug_stmts_now (&ai);
219 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
220 to adjust any debug stmts that referenced the old phi arg,
221 presumably non-loop-closed references left over from other
222 transformations. */
224 static void
225 adjust_phi_and_debug_stmts (gimple update_phi, edge e, tree new_def)
227 tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
229 SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
231 if (MAY_HAVE_DEBUG_STMTS)
232 adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
233 gimple_bb (update_phi));
237 /* Update the PHI nodes of NEW_LOOP.
239 NEW_LOOP is a duplicate of ORIG_LOOP.
240 AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
241 AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
242 executes before it. */
244 static void
245 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
246 struct loop *new_loop, bool after)
248 tree new_ssa_name;
249 gimple phi_new, phi_orig;
250 tree def;
251 edge orig_loop_latch = loop_latch_edge (orig_loop);
252 edge orig_entry_e = loop_preheader_edge (orig_loop);
253 edge new_loop_exit_e = single_exit (new_loop);
254 edge new_loop_entry_e = loop_preheader_edge (new_loop);
255 edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
256 gimple_stmt_iterator gsi_new, gsi_orig;
259 step 1. For each loop-header-phi:
260 Add the first phi argument for the phi in NEW_LOOP
261 (the one associated with the entry of NEW_LOOP)
263 step 2. For each loop-header-phi:
264 Add the second phi argument for the phi in NEW_LOOP
265 (the one associated with the latch of NEW_LOOP)
267 step 3. Update the phis in the successor block of NEW_LOOP.
269 case 1: NEW_LOOP was placed before ORIG_LOOP:
270 The successor block of NEW_LOOP is the header of ORIG_LOOP.
271 Updating the phis in the successor block can therefore be done
272 along with the scanning of the loop header phis, because the
273 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
274 phi nodes, organized in the same order.
276 case 2: NEW_LOOP was placed after ORIG_LOOP:
277 The successor block of NEW_LOOP is the original exit block of
278 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
279 We postpone updating these phis to a later stage (when
280 loop guards are added).
284 /* Scan the phis in the headers of the old and new loops
285 (they are organized in exactly the same order). */
287 for (gsi_new = gsi_start_phis (new_loop->header),
288 gsi_orig = gsi_start_phis (orig_loop->header);
289 !gsi_end_p (gsi_new) && !gsi_end_p (gsi_orig);
290 gsi_next (&gsi_new), gsi_next (&gsi_orig))
292 source_location locus;
293 phi_new = gsi_stmt (gsi_new);
294 phi_orig = gsi_stmt (gsi_orig);
296 /* step 1. */
297 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
298 locus = gimple_phi_arg_location_from_edge (phi_orig, entry_arg_e);
299 add_phi_arg (phi_new, def, new_loop_entry_e, locus);
301 /* step 2. */
302 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
303 locus = gimple_phi_arg_location_from_edge (phi_orig, orig_loop_latch);
304 if (TREE_CODE (def) != SSA_NAME)
305 continue;
307 new_ssa_name = get_current_def (def);
308 if (!new_ssa_name)
310 /* This only happens if there are no definitions
311 inside the loop. use the phi_result in this case. */
312 new_ssa_name = PHI_RESULT (phi_new);
315 /* An ordinary ssa name defined in the loop. */
316 add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop), locus);
318 /* Drop any debug references outside the loop, if they would
319 become ill-formed SSA. */
320 adjust_debug_stmts (def, NULL, single_exit (orig_loop)->dest);
322 /* step 3 (case 1). */
323 if (!after)
325 gcc_assert (new_loop_exit_e == orig_entry_e);
326 adjust_phi_and_debug_stmts (phi_orig, new_loop_exit_e, new_ssa_name);
332 /* Update PHI nodes for a guard of the LOOP.
334 Input:
335 - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
336 controls whether LOOP is to be executed. GUARD_EDGE is the edge that
337 originates from the guard-bb, skips LOOP and reaches the (unique) exit
338 bb of LOOP. This loop-exit-bb is an empty bb with one successor.
339 We denote this bb NEW_MERGE_BB because before the guard code was added
340 it had a single predecessor (the LOOP header), and now it became a merge
341 point of two paths - the path that ends with the LOOP exit-edge, and
342 the path that ends with GUARD_EDGE.
343 - NEW_EXIT_BB: New basic block that is added by this function between LOOP
344 and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
346 ===> The CFG before the guard-code was added:
347 LOOP_header_bb:
348 loop_body
349 if (exit_loop) goto update_bb
350 else goto LOOP_header_bb
351 update_bb:
353 ==> The CFG after the guard-code was added:
354 guard_bb:
355 if (LOOP_guard_condition) goto new_merge_bb
356 else goto LOOP_header_bb
357 LOOP_header_bb:
358 loop_body
359 if (exit_loop_condition) goto new_merge_bb
360 else goto LOOP_header_bb
361 new_merge_bb:
362 goto update_bb
363 update_bb:
365 ==> The CFG after this function:
366 guard_bb:
367 if (LOOP_guard_condition) goto new_merge_bb
368 else goto LOOP_header_bb
369 LOOP_header_bb:
370 loop_body
371 if (exit_loop_condition) goto new_exit_bb
372 else goto LOOP_header_bb
373 new_exit_bb:
374 new_merge_bb:
375 goto update_bb
376 update_bb:
378 This function:
379 1. creates and updates the relevant phi nodes to account for the new
380 incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
381 1.1. Create phi nodes at NEW_MERGE_BB.
382 1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
383 UPDATE_BB). UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
384 2. preserves loop-closed-ssa-form by creating the required phi nodes
385 at the exit of LOOP (i.e, in NEW_EXIT_BB).
387 There are two flavors to this function:
389 slpeel_update_phi_nodes_for_guard1:
390 Here the guard controls whether we enter or skip LOOP, where LOOP is a
391 prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
392 for variables that have phis in the loop header.
394 slpeel_update_phi_nodes_for_guard2:
395 Here the guard controls whether we enter or skip LOOP, where LOOP is an
396 epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
397 for variables that have phis in the loop exit.
399 I.E., the overall structure is:
401 loop1_preheader_bb:
402 guard1 (goto loop1/merge1_bb)
403 loop1
404 loop1_exit_bb:
405 guard2 (goto merge1_bb/merge2_bb)
406 merge1_bb
407 loop2
408 loop2_exit_bb
409 merge2_bb
410 next_bb
412 slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
413 loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
414 that have phis in loop1->header).
416 slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
417 loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
418 that have phis in next_bb). It also adds some of these phis to
419 loop1_exit_bb.
421 slpeel_update_phi_nodes_for_guard1 is always called before
422 slpeel_update_phi_nodes_for_guard2. They are both needed in order
423 to create correct data-flow and loop-closed-ssa-form.
425 Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
426 that change between iterations of a loop (and therefore have a phi-node
427 at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
428 phis for variables that are used out of the loop (and therefore have
429 loop-closed exit phis). Some variables may be both updated between
430 iterations and used after the loop. This is why in loop1_exit_bb we
431 may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
432 and exit phis (created by slpeel_update_phi_nodes_for_guard2).
434 - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
435 an original loop. i.e., we have:
437 orig_loop
438 guard_bb (goto LOOP/new_merge)
439 new_loop <-- LOOP
440 new_exit
441 new_merge
442 next_bb
444 If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
445 have:
447 new_loop
448 guard_bb (goto LOOP/new_merge)
449 orig_loop <-- LOOP
450 new_exit
451 new_merge
452 next_bb
454 The SSA names defined in the original loop have a current
455 reaching definition that that records the corresponding new
456 ssa-name used in the new duplicated loop copy.
459 /* Function slpeel_update_phi_nodes_for_guard1
461 Input:
462 - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
463 - DEFS - a bitmap of ssa names to mark new names for which we recorded
464 information.
466 In the context of the overall structure, we have:
468 loop1_preheader_bb:
469 guard1 (goto loop1/merge1_bb)
470 LOOP-> loop1
471 loop1_exit_bb:
472 guard2 (goto merge1_bb/merge2_bb)
473 merge1_bb
474 loop2
475 loop2_exit_bb
476 merge2_bb
477 next_bb
479 For each name updated between loop iterations (i.e - for each name that has
480 an entry (loop-header) phi in LOOP) we create a new phi in:
481 1. merge1_bb (to account for the edge from guard1)
482 2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
485 static void
486 slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
487 bool is_new_loop, basic_block *new_exit_bb)
489 gimple orig_phi, new_phi;
490 gimple update_phi, update_phi2;
491 tree guard_arg, loop_arg;
492 basic_block new_merge_bb = guard_edge->dest;
493 edge e = EDGE_SUCC (new_merge_bb, 0);
494 basic_block update_bb = e->dest;
495 basic_block orig_bb = loop->header;
496 edge new_exit_e;
497 tree current_new_name;
498 gimple_stmt_iterator gsi_orig, gsi_update;
500 /* Create new bb between loop and new_merge_bb. */
501 *new_exit_bb = split_edge (single_exit (loop));
503 new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
505 for (gsi_orig = gsi_start_phis (orig_bb),
506 gsi_update = gsi_start_phis (update_bb);
507 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
508 gsi_next (&gsi_orig), gsi_next (&gsi_update))
510 source_location loop_locus, guard_locus;
511 tree new_res;
512 orig_phi = gsi_stmt (gsi_orig);
513 update_phi = gsi_stmt (gsi_update);
515 /** 1. Handle new-merge-point phis **/
517 /* 1.1. Generate new phi node in NEW_MERGE_BB: */
518 new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
519 new_phi = create_phi_node (new_res, new_merge_bb);
521 /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
522 of LOOP. Set the two phi args in NEW_PHI for these edges: */
523 loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
524 loop_locus = gimple_phi_arg_location_from_edge (orig_phi,
525 EDGE_SUCC (loop->latch,
526 0));
527 guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
528 guard_locus
529 = gimple_phi_arg_location_from_edge (orig_phi,
530 loop_preheader_edge (loop));
532 add_phi_arg (new_phi, loop_arg, new_exit_e, loop_locus);
533 add_phi_arg (new_phi, guard_arg, guard_edge, guard_locus);
535 /* 1.3. Update phi in successor block. */
536 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
537 || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
538 adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
539 update_phi2 = new_phi;
542 /** 2. Handle loop-closed-ssa-form phis **/
544 if (virtual_operand_p (PHI_RESULT (orig_phi)))
545 continue;
547 /* 2.1. Generate new phi node in NEW_EXIT_BB: */
548 new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
549 new_phi = create_phi_node (new_res, *new_exit_bb);
551 /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */
552 add_phi_arg (new_phi, loop_arg, single_exit (loop), loop_locus);
554 /* 2.3. Update phi in successor of NEW_EXIT_BB: */
555 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
556 adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
557 PHI_RESULT (new_phi));
559 /* 2.4. Record the newly created name with set_current_def.
560 We want to find a name such that
561 name = get_current_def (orig_loop_name)
562 and to set its current definition as follows:
563 set_current_def (name, new_phi_name)
565 If LOOP is a new loop then loop_arg is already the name we're
566 looking for. If LOOP is the original loop, then loop_arg is
567 the orig_loop_name and the relevant name is recorded in its
568 current reaching definition. */
569 if (is_new_loop)
570 current_new_name = loop_arg;
571 else
573 current_new_name = get_current_def (loop_arg);
574 /* current_def is not available only if the variable does not
575 change inside the loop, in which case we also don't care
576 about recording a current_def for it because we won't be
577 trying to create loop-exit-phis for it. */
578 if (!current_new_name)
579 continue;
581 gcc_assert (get_current_def (current_new_name) == NULL_TREE);
583 set_current_def (current_new_name, PHI_RESULT (new_phi));
588 /* Function slpeel_update_phi_nodes_for_guard2
590 Input:
591 - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
593 In the context of the overall structure, we have:
595 loop1_preheader_bb:
596 guard1 (goto loop1/merge1_bb)
597 loop1
598 loop1_exit_bb:
599 guard2 (goto merge1_bb/merge2_bb)
600 merge1_bb
601 LOOP-> loop2
602 loop2_exit_bb
603 merge2_bb
604 next_bb
606 For each name used out side the loop (i.e - for each name that has an exit
607 phi in next_bb) we create a new phi in:
608 1. merge2_bb (to account for the edge from guard_bb)
609 2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
610 3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
611 if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
614 static void
615 slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
616 bool is_new_loop, basic_block *new_exit_bb)
618 gimple orig_phi, new_phi;
619 gimple update_phi, update_phi2;
620 tree guard_arg, loop_arg;
621 basic_block new_merge_bb = guard_edge->dest;
622 edge e = EDGE_SUCC (new_merge_bb, 0);
623 basic_block update_bb = e->dest;
624 edge new_exit_e;
625 tree orig_def, orig_def_new_name;
626 tree new_name, new_name2;
627 tree arg;
628 gimple_stmt_iterator gsi;
630 /* Create new bb between loop and new_merge_bb. */
631 *new_exit_bb = split_edge (single_exit (loop));
633 new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
635 for (gsi = gsi_start_phis (update_bb); !gsi_end_p (gsi); gsi_next (&gsi))
637 tree new_res;
638 update_phi = gsi_stmt (gsi);
639 orig_phi = update_phi;
640 orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
641 /* This loop-closed-phi actually doesn't represent a use
642 out of the loop - the phi arg is a constant. */
643 if (TREE_CODE (orig_def) != SSA_NAME)
644 continue;
645 orig_def_new_name = get_current_def (orig_def);
646 arg = NULL_TREE;
648 /** 1. Handle new-merge-point phis **/
650 /* 1.1. Generate new phi node in NEW_MERGE_BB: */
651 new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
652 new_phi = create_phi_node (new_res, new_merge_bb);
654 /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
655 of LOOP. Set the two PHI args in NEW_PHI for these edges: */
656 new_name = orig_def;
657 new_name2 = NULL_TREE;
658 if (orig_def_new_name)
660 new_name = orig_def_new_name;
661 /* Some variables have both loop-entry-phis and loop-exit-phis.
662 Such variables were given yet newer names by phis placed in
663 guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
664 new_name2 = get_current_def (get_current_def (orig_name)). */
665 new_name2 = get_current_def (new_name);
668 if (is_new_loop)
670 guard_arg = orig_def;
671 loop_arg = new_name;
673 else
675 guard_arg = new_name;
676 loop_arg = orig_def;
678 if (new_name2)
679 guard_arg = new_name2;
681 add_phi_arg (new_phi, loop_arg, new_exit_e, UNKNOWN_LOCATION);
682 add_phi_arg (new_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
684 /* 1.3. Update phi in successor block. */
685 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
686 adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
687 update_phi2 = new_phi;
690 /** 2. Handle loop-closed-ssa-form phis **/
692 /* 2.1. Generate new phi node in NEW_EXIT_BB: */
693 new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
694 new_phi = create_phi_node (new_res, *new_exit_bb);
696 /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */
697 add_phi_arg (new_phi, loop_arg, single_exit (loop), UNKNOWN_LOCATION);
699 /* 2.3. Update phi in successor of NEW_EXIT_BB: */
700 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
701 adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
702 PHI_RESULT (new_phi));
705 /** 3. Handle loop-closed-ssa-form phis for first loop **/
707 /* 3.1. Find the relevant names that need an exit-phi in
708 GUARD_BB, i.e. names for which
709 slpeel_update_phi_nodes_for_guard1 had not already created a
710 phi node. This is the case for names that are used outside
711 the loop (and therefore need an exit phi) but are not updated
712 across loop iterations (and therefore don't have a
713 loop-header-phi).
715 slpeel_update_phi_nodes_for_guard1 is responsible for
716 creating loop-exit phis in GUARD_BB for names that have a
717 loop-header-phi. When such a phi is created we also record
718 the new name in its current definition. If this new name
719 exists, then guard_arg was set to this new name (see 1.2
720 above). Therefore, if guard_arg is not this new name, this
721 is an indication that an exit-phi in GUARD_BB was not yet
722 created, so we take care of it here. */
723 if (guard_arg == new_name2)
724 continue;
725 arg = guard_arg;
727 /* 3.2. Generate new phi node in GUARD_BB: */
728 new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
729 new_phi = create_phi_node (new_res, guard_edge->src);
731 /* 3.3. GUARD_BB has one incoming edge: */
732 gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
733 add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0),
734 UNKNOWN_LOCATION);
736 /* 3.4. Update phi in successor of GUARD_BB: */
737 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
738 == guard_arg);
739 adjust_phi_and_debug_stmts (update_phi2, guard_edge,
740 PHI_RESULT (new_phi));
745 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
746 that starts at zero, increases by one and its limit is NITERS.
748 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
750 void
751 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
753 tree indx_before_incr, indx_after_incr;
754 gimple cond_stmt;
755 gimple orig_cond;
756 edge exit_edge = single_exit (loop);
757 gimple_stmt_iterator loop_cond_gsi;
758 gimple_stmt_iterator incr_gsi;
759 bool insert_after;
760 tree init = build_int_cst (TREE_TYPE (niters), 0);
761 tree step = build_int_cst (TREE_TYPE (niters), 1);
762 LOC loop_loc;
763 enum tree_code code;
765 orig_cond = get_loop_exit_condition (loop);
766 gcc_assert (orig_cond);
767 loop_cond_gsi = gsi_for_stmt (orig_cond);
769 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
770 create_iv (init, step, NULL_TREE, loop,
771 &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
773 indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
774 true, NULL_TREE, true,
775 GSI_SAME_STMT);
776 niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
777 true, GSI_SAME_STMT);
779 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
780 cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
781 NULL_TREE);
783 gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
785 /* Remove old loop exit test: */
786 gsi_remove (&loop_cond_gsi, true);
787 free_stmt_vec_info (orig_cond);
789 loop_loc = find_loop_location (loop);
790 if (dump_enabled_p ())
792 if (LOCATION_LOCUS (loop_loc) != UNKNOWN_LOC)
793 dump_printf (MSG_NOTE, "\nloop at %s:%d: ", LOC_FILE (loop_loc),
794 LOC_LINE (loop_loc));
795 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, cond_stmt, 0);
797 loop->nb_iterations = niters;
801 /* Given LOOP this function generates a new copy of it and puts it
802 on E which is either the entry or exit of LOOP. */
804 struct loop *
805 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, edge e)
807 struct loop *new_loop;
808 basic_block *new_bbs, *bbs;
809 bool at_exit;
810 bool was_imm_dom;
811 basic_block exit_dest;
812 gimple phi;
813 tree phi_arg;
814 edge exit, new_exit;
815 gimple_stmt_iterator gsi;
817 at_exit = (e == single_exit (loop));
818 if (!at_exit && e != loop_preheader_edge (loop))
819 return NULL;
821 bbs = get_loop_body (loop);
823 /* Check whether duplication is possible. */
824 if (!can_copy_bbs_p (bbs, loop->num_nodes))
826 free (bbs);
827 return NULL;
830 /* Generate new loop structure. */
831 new_loop = duplicate_loop (loop, loop_outer (loop));
832 if (!new_loop)
834 free (bbs);
835 return NULL;
838 exit_dest = single_exit (loop)->dest;
839 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
840 exit_dest) == loop->header ?
841 true : false);
843 new_bbs = XNEWVEC (basic_block, loop->num_nodes);
845 exit = single_exit (loop);
846 copy_bbs (bbs, loop->num_nodes, new_bbs,
847 &exit, 1, &new_exit, NULL,
848 e->src);
850 /* Duplicating phi args at exit bbs as coming
851 also from exit of duplicated loop. */
852 for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi); gsi_next (&gsi))
854 phi = gsi_stmt (gsi);
855 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, single_exit (loop));
856 if (phi_arg)
858 edge new_loop_exit_edge;
859 source_location locus;
861 locus = gimple_phi_arg_location_from_edge (phi, single_exit (loop));
862 if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
863 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
864 else
865 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
867 add_phi_arg (phi, phi_arg, new_loop_exit_edge, locus);
871 if (at_exit) /* Add the loop copy at exit. */
873 redirect_edge_and_branch_force (e, new_loop->header);
874 PENDING_STMT (e) = NULL;
875 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
876 if (was_imm_dom)
877 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
879 else /* Add the copy at entry. */
881 edge new_exit_e;
882 edge entry_e = loop_preheader_edge (loop);
883 basic_block preheader = entry_e->src;
885 if (!flow_bb_inside_loop_p (new_loop,
886 EDGE_SUCC (new_loop->header, 0)->dest))
887 new_exit_e = EDGE_SUCC (new_loop->header, 0);
888 else
889 new_exit_e = EDGE_SUCC (new_loop->header, 1);
891 redirect_edge_and_branch_force (new_exit_e, loop->header);
892 PENDING_STMT (new_exit_e) = NULL;
893 set_immediate_dominator (CDI_DOMINATORS, loop->header,
894 new_exit_e->src);
896 /* We have to add phi args to the loop->header here as coming
897 from new_exit_e edge. */
898 for (gsi = gsi_start_phis (loop->header);
899 !gsi_end_p (gsi);
900 gsi_next (&gsi))
902 phi = gsi_stmt (gsi);
903 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
904 if (phi_arg)
905 add_phi_arg (phi, phi_arg, new_exit_e,
906 gimple_phi_arg_location_from_edge (phi, entry_e));
909 redirect_edge_and_branch_force (entry_e, new_loop->header);
910 PENDING_STMT (entry_e) = NULL;
911 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
914 free (new_bbs);
915 free (bbs);
917 return new_loop;
921 /* Given the condition statement COND, put it as the last statement
922 of GUARD_BB; EXIT_BB is the basic block to skip the loop;
923 Assumes that this is the single exit of the guarded loop.
924 Returns the skip edge, inserts new stmts on the COND_EXPR_STMT_LIST. */
926 static edge
927 slpeel_add_loop_guard (basic_block guard_bb, tree cond,
928 gimple_seq cond_expr_stmt_list,
929 basic_block exit_bb, basic_block dom_bb,
930 int probability)
932 gimple_stmt_iterator gsi;
933 edge new_e, enter_e;
934 gimple cond_stmt;
935 gimple_seq gimplify_stmt_list = NULL;
937 enter_e = EDGE_SUCC (guard_bb, 0);
938 enter_e->flags &= ~EDGE_FALLTHRU;
939 enter_e->flags |= EDGE_FALSE_VALUE;
940 gsi = gsi_last_bb (guard_bb);
942 cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
943 NULL_TREE);
944 if (gimplify_stmt_list)
945 gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
946 cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
947 if (cond_expr_stmt_list)
948 gsi_insert_seq_after (&gsi, cond_expr_stmt_list, GSI_NEW_STMT);
950 gsi = gsi_last_bb (guard_bb);
951 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
953 /* Add new edge to connect guard block to the merge/loop-exit block. */
954 new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
956 new_e->count = guard_bb->count;
957 new_e->probability = probability;
958 new_e->count = apply_probability (enter_e->count, probability);
959 enter_e->count -= new_e->count;
960 enter_e->probability = inverse_probability (probability);
961 set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
962 return new_e;
966 /* This function verifies that the following restrictions apply to LOOP:
967 (1) it is innermost
968 (2) it consists of exactly 2 basic blocks - header, and an empty latch.
969 (3) it is single entry, single exit
970 (4) its exit condition is the last stmt in the header
971 (5) E is the entry/exit edge of LOOP.
974 bool
975 slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
977 edge exit_e = single_exit (loop);
978 edge entry_e = loop_preheader_edge (loop);
979 gimple orig_cond = get_loop_exit_condition (loop);
980 gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
982 if (need_ssa_update_p (cfun))
983 return false;
985 if (loop->inner
986 /* All loops have an outer scope; the only case loop->outer is NULL is for
987 the function itself. */
988 || !loop_outer (loop)
989 || loop->num_nodes != 2
990 || !empty_block_p (loop->latch)
991 || !single_exit (loop)
992 /* Verify that new loop exit condition can be trivially modified. */
993 || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
994 || (e != exit_e && e != entry_e))
995 return false;
997 return true;
1000 #ifdef ENABLE_CHECKING
1001 static void
1002 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
1003 struct loop *second_loop)
1005 basic_block loop1_exit_bb = single_exit (first_loop)->dest;
1006 basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
1007 basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
1009 /* A guard that controls whether the second_loop is to be executed or skipped
1010 is placed in first_loop->exit. first_loop->exit therefore has two
1011 successors - one is the preheader of second_loop, and the other is a bb
1012 after second_loop.
1014 gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
1016 /* 1. Verify that one of the successors of first_loop->exit is the preheader
1017 of second_loop. */
1019 /* The preheader of new_loop is expected to have two predecessors:
1020 first_loop->exit and the block that precedes first_loop. */
1022 gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
1023 && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
1024 && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
1025 || (EDGE_PRED (loop2_entry_bb, 1)->src == loop1_exit_bb
1026 && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
1028 /* Verify that the other successor of first_loop->exit is after the
1029 second_loop. */
1030 /* TODO */
1032 #endif
1034 /* If the run time cost model check determines that vectorization is
1035 not profitable and hence scalar loop should be generated then set
1036 FIRST_NITERS to prologue peeled iterations. This will allow all the
1037 iterations to be executed in the prologue peeled scalar loop. */
1039 static void
1040 set_prologue_iterations (basic_block bb_before_first_loop,
1041 tree *first_niters,
1042 struct loop *loop,
1043 unsigned int th,
1044 int probability)
1046 edge e;
1047 basic_block cond_bb, then_bb;
1048 tree var, prologue_after_cost_adjust_name;
1049 gimple_stmt_iterator gsi;
1050 gimple newphi;
1051 edge e_true, e_false, e_fallthru;
1052 gimple cond_stmt;
1053 gimple_seq stmts = NULL;
1054 tree cost_pre_condition = NULL_TREE;
1055 tree scalar_loop_iters =
1056 unshare_expr (LOOP_VINFO_NITERS_UNCHANGED (loop_vec_info_for_loop (loop)));
1058 e = single_pred_edge (bb_before_first_loop);
1059 cond_bb = split_edge(e);
1061 e = single_pred_edge (bb_before_first_loop);
1062 then_bb = split_edge(e);
1063 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
1065 e_false = make_single_succ_edge (cond_bb, bb_before_first_loop,
1066 EDGE_FALSE_VALUE);
1067 set_immediate_dominator (CDI_DOMINATORS, bb_before_first_loop, cond_bb);
1069 e_true = EDGE_PRED (then_bb, 0);
1070 e_true->flags &= ~EDGE_FALLTHRU;
1071 e_true->flags |= EDGE_TRUE_VALUE;
1073 e_true->probability = probability;
1074 e_false->probability = inverse_probability (probability);
1075 e_true->count = apply_probability (cond_bb->count, probability);
1076 e_false->count = cond_bb->count - e_true->count;
1077 then_bb->frequency = EDGE_FREQUENCY (e_true);
1078 then_bb->count = e_true->count;
1080 e_fallthru = EDGE_SUCC (then_bb, 0);
1081 e_fallthru->count = then_bb->count;
1083 gsi = gsi_last_bb (cond_bb);
1084 cost_pre_condition =
1085 fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
1086 build_int_cst (TREE_TYPE (scalar_loop_iters), th));
1087 cost_pre_condition =
1088 force_gimple_operand_gsi_1 (&gsi, cost_pre_condition, is_gimple_condexpr,
1089 NULL_TREE, false, GSI_CONTINUE_LINKING);
1090 cond_stmt = gimple_build_cond_from_tree (cost_pre_condition,
1091 NULL_TREE, NULL_TREE);
1092 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
1094 var = create_tmp_var (TREE_TYPE (scalar_loop_iters),
1095 "prologue_after_cost_adjust");
1096 prologue_after_cost_adjust_name =
1097 force_gimple_operand (scalar_loop_iters, &stmts, false, var);
1099 gsi = gsi_last_bb (then_bb);
1100 if (stmts)
1101 gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
1103 newphi = create_phi_node (var, bb_before_first_loop);
1104 add_phi_arg (newphi, prologue_after_cost_adjust_name, e_fallthru,
1105 UNKNOWN_LOCATION);
1106 add_phi_arg (newphi, *first_niters, e_false, UNKNOWN_LOCATION);
1108 *first_niters = PHI_RESULT (newphi);
1111 /* Function slpeel_tree_peel_loop_to_edge.
1113 Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
1114 that is placed on the entry (exit) edge E of LOOP. After this transformation
1115 we have two loops one after the other - first-loop iterates FIRST_NITERS
1116 times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
1117 If the cost model indicates that it is profitable to emit a scalar
1118 loop instead of the vector one, then the prolog (epilog) loop will iterate
1119 for the entire unchanged scalar iterations of the loop.
1121 Input:
1122 - LOOP: the loop to be peeled.
1123 - E: the exit or entry edge of LOOP.
1124 If it is the entry edge, we peel the first iterations of LOOP. In this
1125 case first-loop is LOOP, and second-loop is the newly created loop.
1126 If it is the exit edge, we peel the last iterations of LOOP. In this
1127 case, first-loop is the newly created loop, and second-loop is LOOP.
1128 - NITERS: the number of iterations that LOOP iterates.
1129 - FIRST_NITERS: the number of iterations that the first-loop should iterate.
1130 - UPDATE_FIRST_LOOP_COUNT: specified whether this function is responsible
1131 for updating the loop bound of the first-loop to FIRST_NITERS. If it
1132 is false, the caller of this function may want to take care of this
1133 (this can be useful if we don't want new stmts added to first-loop).
1134 - TH: cost model profitability threshold of iterations for vectorization.
1135 - CHECK_PROFITABILITY: specify whether cost model check has not occurred
1136 during versioning and hence needs to occur during
1137 prologue generation or whether cost model check
1138 has not occurred during prologue generation and hence
1139 needs to occur during epilogue generation.
1140 - BOUND1 is the upper bound on number of iterations of the first loop (if known)
1141 - BOUND2 is the upper bound on number of iterations of the second loop (if known)
1144 Output:
1145 The function returns a pointer to the new loop-copy, or NULL if it failed
1146 to perform the transformation.
1148 The function generates two if-then-else guards: one before the first loop,
1149 and the other before the second loop:
1150 The first guard is:
1151 if (FIRST_NITERS == 0) then skip the first loop,
1152 and go directly to the second loop.
1153 The second guard is:
1154 if (FIRST_NITERS == NITERS) then skip the second loop.
1156 If the optional COND_EXPR and COND_EXPR_STMT_LIST arguments are given
1157 then the generated condition is combined with COND_EXPR and the
1158 statements in COND_EXPR_STMT_LIST are emitted together with it.
1160 FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1161 FORNOW the resulting code will not be in loop-closed-ssa form.
1164 static struct loop*
1165 slpeel_tree_peel_loop_to_edge (struct loop *loop,
1166 edge e, tree *first_niters,
1167 tree niters, bool update_first_loop_count,
1168 unsigned int th, bool check_profitability,
1169 tree cond_expr, gimple_seq cond_expr_stmt_list,
1170 int bound1, int bound2)
1172 struct loop *new_loop = NULL, *first_loop, *second_loop;
1173 edge skip_e;
1174 tree pre_condition = NULL_TREE;
1175 basic_block bb_before_second_loop, bb_after_second_loop;
1176 basic_block bb_before_first_loop;
1177 basic_block bb_between_loops;
1178 basic_block new_exit_bb;
1179 gimple_stmt_iterator gsi;
1180 edge exit_e = single_exit (loop);
1181 LOC loop_loc;
1182 tree cost_pre_condition = NULL_TREE;
1183 /* There are many aspects to how likely the first loop is going to be executed.
1184 Without histogram we can't really do good job. Simply set it to
1185 2/3, so the first loop is not reordered to the end of function and
1186 the hot path through stays short. */
1187 int first_guard_probability = 2 * REG_BR_PROB_BASE / 3;
1188 int second_guard_probability = 2 * REG_BR_PROB_BASE / 3;
1189 int probability_of_second_loop;
1191 if (!slpeel_can_duplicate_loop_p (loop, e))
1192 return NULL;
1194 /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
1195 in the exit bb and rename all the uses after the loop. This simplifies
1196 the *guard[12] routines, which assume loop closed SSA form for all PHIs
1197 (but normally loop closed SSA form doesn't require virtual PHIs to be
1198 in the same form). Doing this early simplifies the checking what
1199 uses should be renamed. */
1200 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1201 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
1203 gimple phi = gsi_stmt (gsi);
1204 for (gsi = gsi_start_phis (exit_e->dest);
1205 !gsi_end_p (gsi); gsi_next (&gsi))
1206 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
1207 break;
1208 if (gsi_end_p (gsi))
1210 tree new_vop = copy_ssa_name (PHI_RESULT (phi), NULL);
1211 gimple new_phi = create_phi_node (new_vop, exit_e->dest);
1212 tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
1213 imm_use_iterator imm_iter;
1214 gimple stmt;
1215 use_operand_p use_p;
1217 add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
1218 gimple_phi_set_result (new_phi, new_vop);
1219 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
1220 if (stmt != new_phi && gimple_bb (stmt) != loop->header)
1221 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1222 SET_USE (use_p, new_vop);
1224 break;
1227 /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1228 Resulting CFG would be:
1230 first_loop:
1231 do {
1232 } while ...
1234 second_loop:
1235 do {
1236 } while ...
1238 orig_exit_bb:
1241 if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, e)))
1243 loop_loc = find_loop_location (loop);
1244 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1245 "tree_duplicate_loop_to_edge_cfg failed.\n");
1246 return NULL;
1249 if (MAY_HAVE_DEBUG_STMTS)
1251 gcc_assert (!adjust_vec.exists ());
1252 vec_stack_alloc (adjust_info, adjust_vec, 32);
1255 if (e == exit_e)
1257 /* NEW_LOOP was placed after LOOP. */
1258 first_loop = loop;
1259 second_loop = new_loop;
1261 else
1263 /* NEW_LOOP was placed before LOOP. */
1264 first_loop = new_loop;
1265 second_loop = loop;
1268 slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1269 rename_variables_in_loop (new_loop);
1272 /* 2. Add the guard code in one of the following ways:
1274 2.a Add the guard that controls whether the first loop is executed.
1275 This occurs when this function is invoked for prologue or epilogue
1276 generation and when the cost model check can be done at compile time.
1278 Resulting CFG would be:
1280 bb_before_first_loop:
1281 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1282 GOTO first-loop
1284 first_loop:
1285 do {
1286 } while ...
1288 bb_before_second_loop:
1290 second_loop:
1291 do {
1292 } while ...
1294 orig_exit_bb:
1296 2.b Add the cost model check that allows the prologue
1297 to iterate for the entire unchanged scalar
1298 iterations of the loop in the event that the cost
1299 model indicates that the scalar loop is more
1300 profitable than the vector one. This occurs when
1301 this function is invoked for prologue generation
1302 and the cost model check needs to be done at run
1303 time.
1305 Resulting CFG after prologue peeling would be:
1307 if (scalar_loop_iterations <= th)
1308 FIRST_NITERS = scalar_loop_iterations
1310 bb_before_first_loop:
1311 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1312 GOTO first-loop
1314 first_loop:
1315 do {
1316 } while ...
1318 bb_before_second_loop:
1320 second_loop:
1321 do {
1322 } while ...
1324 orig_exit_bb:
1326 2.c Add the cost model check that allows the epilogue
1327 to iterate for the entire unchanged scalar
1328 iterations of the loop in the event that the cost
1329 model indicates that the scalar loop is more
1330 profitable than the vector one. This occurs when
1331 this function is invoked for epilogue generation
1332 and the cost model check needs to be done at run
1333 time. This check is combined with any pre-existing
1334 check in COND_EXPR to avoid versioning.
1336 Resulting CFG after prologue peeling would be:
1338 bb_before_first_loop:
1339 if ((scalar_loop_iterations <= th)
1341 FIRST_NITERS == 0) GOTO bb_before_second_loop
1342 GOTO first-loop
1344 first_loop:
1345 do {
1346 } while ...
1348 bb_before_second_loop:
1350 second_loop:
1351 do {
1352 } while ...
1354 orig_exit_bb:
1357 bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1358 bb_before_second_loop = split_edge (single_exit (first_loop));
1360 probability_of_second_loop = (inverse_probability (first_guard_probability)
1361 + combine_probabilities (second_guard_probability,
1362 first_guard_probability));
1363 /* Theoretically preheader edge of first loop and exit edge should have
1364 same frequencies. Loop exit probablities are however easy to get wrong.
1365 It is safer to copy value from original loop entry. */
1366 bb_before_second_loop->frequency
1367 = apply_probability (bb_before_first_loop->frequency,
1368 probability_of_second_loop);
1369 bb_before_second_loop->count
1370 = apply_probability (bb_before_first_loop->count,
1371 probability_of_second_loop);
1372 single_succ_edge (bb_before_second_loop)->count
1373 = bb_before_second_loop->count;
1375 /* Epilogue peeling. */
1376 if (!update_first_loop_count)
1378 pre_condition =
1379 fold_build2 (LE_EXPR, boolean_type_node, *first_niters,
1380 build_int_cst (TREE_TYPE (*first_niters), 0));
1381 if (check_profitability)
1383 tree scalar_loop_iters
1384 = unshare_expr (LOOP_VINFO_NITERS_UNCHANGED
1385 (loop_vec_info_for_loop (loop)));
1386 cost_pre_condition =
1387 fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
1388 build_int_cst (TREE_TYPE (scalar_loop_iters), th));
1390 pre_condition = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1391 cost_pre_condition, pre_condition);
1393 if (cond_expr)
1395 pre_condition =
1396 fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1397 pre_condition,
1398 fold_build1 (TRUTH_NOT_EXPR, boolean_type_node,
1399 cond_expr));
1403 /* Prologue peeling. */
1404 else
1406 if (check_profitability)
1407 set_prologue_iterations (bb_before_first_loop, first_niters,
1408 loop, th, first_guard_probability);
1410 pre_condition =
1411 fold_build2 (LE_EXPR, boolean_type_node, *first_niters,
1412 build_int_cst (TREE_TYPE (*first_niters), 0));
1415 skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1416 cond_expr_stmt_list,
1417 bb_before_second_loop, bb_before_first_loop,
1418 inverse_probability (first_guard_probability));
1419 scale_loop_profile (first_loop, first_guard_probability,
1420 check_profitability && (int)th > bound1 ? th : bound1);
1421 slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
1422 first_loop == new_loop,
1423 &new_exit_bb);
1426 /* 3. Add the guard that controls whether the second loop is executed.
1427 Resulting CFG would be:
1429 bb_before_first_loop:
1430 if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1431 GOTO first-loop
1433 first_loop:
1434 do {
1435 } while ...
1437 bb_between_loops:
1438 if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1439 GOTO bb_before_second_loop
1441 bb_before_second_loop:
1443 second_loop:
1444 do {
1445 } while ...
1447 bb_after_second_loop:
1449 orig_exit_bb:
1452 bb_between_loops = new_exit_bb;
1453 bb_after_second_loop = split_edge (single_exit (second_loop));
1455 pre_condition =
1456 fold_build2 (EQ_EXPR, boolean_type_node, *first_niters, niters);
1457 skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, NULL,
1458 bb_after_second_loop, bb_before_first_loop,
1459 inverse_probability (second_guard_probability));
1460 scale_loop_profile (second_loop, probability_of_second_loop, bound2);
1461 slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
1462 second_loop == new_loop, &new_exit_bb);
1464 /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1466 if (update_first_loop_count)
1467 slpeel_make_loop_iterate_ntimes (first_loop, *first_niters);
1469 delete_update_ssa ();
1471 adjust_vec_debug_stmts ();
1473 return new_loop;
1476 /* Function vect_get_loop_location.
1478 Extract the location of the loop in the source code.
1479 If the loop is not well formed for vectorization, an estimated
1480 location is calculated.
1481 Return the loop location if succeed and NULL if not. */
1484 find_loop_location (struct loop *loop)
1486 gimple stmt = NULL;
1487 basic_block bb;
1488 gimple_stmt_iterator si;
1490 if (!loop)
1491 return UNKNOWN_LOC;
1493 stmt = get_loop_exit_condition (loop);
1495 if (stmt && gimple_location (stmt) != UNKNOWN_LOC)
1496 return gimple_location (stmt);
1498 /* If we got here the loop is probably not "well formed",
1499 try to estimate the loop location */
1501 if (!loop->header)
1502 return UNKNOWN_LOC;
1504 bb = loop->header;
1506 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1508 stmt = gsi_stmt (si);
1509 if (gimple_location (stmt) != UNKNOWN_LOC)
1510 return gimple_location (stmt);
1513 return UNKNOWN_LOC;
1517 /* This function builds ni_name = number of iterations loop executes
1518 on the loop preheader. If SEQ is given the stmt is instead emitted
1519 there. */
1521 static tree
1522 vect_build_loop_niters (loop_vec_info loop_vinfo, gimple_seq seq)
1524 tree ni_name, var;
1525 gimple_seq stmts = NULL;
1526 edge pe;
1527 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1528 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1530 var = create_tmp_var (TREE_TYPE (ni), "niters");
1531 ni_name = force_gimple_operand (ni, &stmts, false, var);
1533 pe = loop_preheader_edge (loop);
1534 if (stmts)
1536 if (seq)
1537 gimple_seq_add_seq (&seq, stmts);
1538 else
1540 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1541 gcc_assert (!new_bb);
1545 return ni_name;
1549 /* This function generates the following statements:
1551 ni_name = number of iterations loop executes
1552 ratio = ni_name / vf
1553 ratio_mult_vf_name = ratio * vf
1555 and places them at the loop preheader edge or in COND_EXPR_STMT_LIST
1556 if that is non-NULL. */
1558 static void
1559 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
1560 tree *ni_name_ptr,
1561 tree *ratio_mult_vf_name_ptr,
1562 tree *ratio_name_ptr,
1563 gimple_seq cond_expr_stmt_list)
1566 edge pe;
1567 basic_block new_bb;
1568 gimple_seq stmts;
1569 tree ni_name, ni_minus_gap_name;
1570 tree var;
1571 tree ratio_name;
1572 tree ratio_mult_vf_name;
1573 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1574 tree ni = LOOP_VINFO_NITERS (loop_vinfo);
1575 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1576 tree log_vf;
1578 pe = loop_preheader_edge (loop);
1580 /* Generate temporary variable that contains
1581 number of iterations loop executes. */
1583 ni_name = vect_build_loop_niters (loop_vinfo, cond_expr_stmt_list);
1584 log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
1586 /* If epilogue loop is required because of data accesses with gaps, we
1587 subtract one iteration from the total number of iterations here for
1588 correct calculation of RATIO. */
1589 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1591 ni_minus_gap_name = fold_build2 (MINUS_EXPR, TREE_TYPE (ni_name),
1592 ni_name,
1593 build_one_cst (TREE_TYPE (ni_name)));
1594 if (!is_gimple_val (ni_minus_gap_name))
1596 var = create_tmp_var (TREE_TYPE (ni), "ni_gap");
1598 stmts = NULL;
1599 ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts,
1600 true, var);
1601 if (cond_expr_stmt_list)
1602 gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
1603 else
1605 pe = loop_preheader_edge (loop);
1606 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1607 gcc_assert (!new_bb);
1611 else
1612 ni_minus_gap_name = ni_name;
1614 /* Create: ratio = ni >> log2(vf) */
1616 ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_minus_gap_name),
1617 ni_minus_gap_name, log_vf);
1618 if (!is_gimple_val (ratio_name))
1620 var = create_tmp_var (TREE_TYPE (ni), "bnd");
1622 stmts = NULL;
1623 ratio_name = force_gimple_operand (ratio_name, &stmts, true, var);
1624 if (cond_expr_stmt_list)
1625 gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
1626 else
1628 pe = loop_preheader_edge (loop);
1629 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1630 gcc_assert (!new_bb);
1634 /* Create: ratio_mult_vf = ratio << log2 (vf). */
1636 ratio_mult_vf_name = fold_build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name),
1637 ratio_name, log_vf);
1638 if (!is_gimple_val (ratio_mult_vf_name))
1640 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
1642 stmts = NULL;
1643 ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts,
1644 true, var);
1645 if (cond_expr_stmt_list)
1646 gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
1647 else
1649 pe = loop_preheader_edge (loop);
1650 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1651 gcc_assert (!new_bb);
1655 *ni_name_ptr = ni_name;
1656 *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
1657 *ratio_name_ptr = ratio_name;
1659 return;
1662 /* Function vect_can_advance_ivs_p
1664 In case the number of iterations that LOOP iterates is unknown at compile
1665 time, an epilog loop will be generated, and the loop induction variables
1666 (IVs) will be "advanced" to the value they are supposed to take just before
1667 the epilog loop. Here we check that the access function of the loop IVs
1668 and the expression that represents the loop bound are simple enough.
1669 These restrictions will be relaxed in the future. */
1671 bool
1672 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1674 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1675 basic_block bb = loop->header;
1676 gimple phi;
1677 gimple_stmt_iterator gsi;
1679 /* Analyze phi functions of the loop header. */
1681 if (dump_enabled_p ())
1682 dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:");
1683 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1685 tree access_fn = NULL;
1686 tree evolution_part;
1688 phi = gsi_stmt (gsi);
1689 if (dump_enabled_p ())
1691 dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
1692 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
1695 /* Skip virtual phi's. The data dependences that are associated with
1696 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
1698 if (virtual_operand_p (PHI_RESULT (phi)))
1700 if (dump_enabled_p ())
1701 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1702 "virtual phi. skip.");
1703 continue;
1706 /* Skip reduction phis. */
1708 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1710 if (dump_enabled_p ())
1711 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1712 "reduc phi. skip.");
1713 continue;
1716 /* Analyze the evolution function. */
1718 access_fn = instantiate_parameters
1719 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1721 if (!access_fn)
1723 if (dump_enabled_p ())
1724 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1725 "No Access function.");
1726 return false;
1729 STRIP_NOPS (access_fn);
1730 if (dump_enabled_p ())
1732 dump_printf_loc (MSG_NOTE, vect_location,
1733 "Access function of PHI: ");
1734 dump_generic_expr (MSG_NOTE, TDF_SLIM, access_fn);
1737 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
1739 if (evolution_part == NULL_TREE)
1741 if (dump_enabled_p ())
1742 dump_printf (MSG_MISSED_OPTIMIZATION, "No evolution.");
1743 return false;
1746 /* FORNOW: We do not transform initial conditions of IVs
1747 which evolution functions are a polynomial of degree >= 2. */
1749 if (tree_is_chrec (evolution_part))
1750 return false;
1753 return true;
1757 /* Function vect_update_ivs_after_vectorizer.
1759 "Advance" the induction variables of LOOP to the value they should take
1760 after the execution of LOOP. This is currently necessary because the
1761 vectorizer does not handle induction variables that are used after the
1762 loop. Such a situation occurs when the last iterations of LOOP are
1763 peeled, because:
1764 1. We introduced new uses after LOOP for IVs that were not originally used
1765 after LOOP: the IVs of LOOP are now used by an epilog loop.
1766 2. LOOP is going to be vectorized; this means that it will iterate N/VF
1767 times, whereas the loop IVs should be bumped N times.
1769 Input:
1770 - LOOP - a loop that is going to be vectorized. The last few iterations
1771 of LOOP were peeled.
1772 - NITERS - the number of iterations that LOOP executes (before it is
1773 vectorized). i.e, the number of times the ivs should be bumped.
1774 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
1775 coming out from LOOP on which there are uses of the LOOP ivs
1776 (this is the path from LOOP->exit to epilog_loop->preheader).
1778 The new definitions of the ivs are placed in LOOP->exit.
1779 The phi args associated with the edge UPDATE_E in the bb
1780 UPDATE_E->dest are updated accordingly.
1782 Assumption 1: Like the rest of the vectorizer, this function assumes
1783 a single loop exit that has a single predecessor.
1785 Assumption 2: The phi nodes in the LOOP header and in update_bb are
1786 organized in the same order.
1788 Assumption 3: The access function of the ivs is simple enough (see
1789 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
1791 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
1792 coming out of LOOP on which the ivs of LOOP are used (this is the path
1793 that leads to the epilog loop; other paths skip the epilog loop). This
1794 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
1795 needs to have its phis updated.
1798 static void
1799 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
1800 edge update_e)
1802 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1803 basic_block exit_bb = single_exit (loop)->dest;
1804 gimple phi, phi1;
1805 gimple_stmt_iterator gsi, gsi1;
1806 basic_block update_bb = update_e->dest;
1808 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
1810 /* Make sure there exists a single-predecessor exit bb: */
1811 gcc_assert (single_pred_p (exit_bb));
1813 for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
1814 !gsi_end_p (gsi) && !gsi_end_p (gsi1);
1815 gsi_next (&gsi), gsi_next (&gsi1))
1817 tree init_expr;
1818 tree step_expr, off;
1819 tree type;
1820 tree var, ni, ni_name;
1821 gimple_stmt_iterator last_gsi;
1822 stmt_vec_info stmt_info;
1824 phi = gsi_stmt (gsi);
1825 phi1 = gsi_stmt (gsi1);
1826 if (dump_enabled_p ())
1828 dump_printf_loc (MSG_NOTE, vect_location,
1829 "vect_update_ivs_after_vectorizer: phi: ");
1830 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
1833 /* Skip virtual phi's. */
1834 if (virtual_operand_p (PHI_RESULT (phi)))
1836 if (dump_enabled_p ())
1837 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1838 "virtual phi. skip.");
1839 continue;
1842 /* Skip reduction phis. */
1843 stmt_info = vinfo_for_stmt (phi);
1844 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
1846 if (dump_enabled_p ())
1847 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1848 "reduc phi. skip.");
1849 continue;
1852 type = TREE_TYPE (gimple_phi_result (phi));
1853 step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
1854 step_expr = unshare_expr (step_expr);
1856 /* FORNOW: We do not support IVs whose evolution function is a polynomial
1857 of degree >= 2 or exponential. */
1858 gcc_assert (!tree_is_chrec (step_expr));
1860 init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1862 off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
1863 fold_convert (TREE_TYPE (step_expr), niters),
1864 step_expr);
1865 if (POINTER_TYPE_P (type))
1866 ni = fold_build_pointer_plus (init_expr, off);
1867 else
1868 ni = fold_build2 (PLUS_EXPR, type,
1869 init_expr, fold_convert (type, off));
1871 var = create_tmp_var (type, "tmp");
1873 last_gsi = gsi_last_bb (exit_bb);
1874 ni_name = force_gimple_operand_gsi (&last_gsi, ni, false, var,
1875 true, GSI_SAME_STMT);
1877 /* Fix phi expressions in the successor bb. */
1878 adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
1882 /* Function vect_do_peeling_for_loop_bound
1884 Peel the last iterations of the loop represented by LOOP_VINFO.
1885 The peeled iterations form a new epilog loop. Given that the loop now
1886 iterates NITERS times, the new epilog loop iterates
1887 NITERS % VECTORIZATION_FACTOR times.
1889 The original loop will later be made to iterate
1890 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).
1892 COND_EXPR and COND_EXPR_STMT_LIST are combined with a new generated
1893 test. */
1895 void
1896 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
1897 unsigned int th, bool check_profitability)
1899 tree ni_name, ratio_mult_vf_name;
1900 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1901 struct loop *new_loop;
1902 edge update_e;
1903 basic_block preheader;
1904 int loop_num;
1905 int max_iter;
1906 tree cond_expr = NULL_TREE;
1907 gimple_seq cond_expr_stmt_list = NULL;
1909 if (dump_enabled_p ())
1910 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1911 "=== vect_do_peeling_for_loop_bound ===");
1913 initialize_original_copy_tables ();
1915 /* Generate the following variables on the preheader of original loop:
1917 ni_name = number of iteration the original loop executes
1918 ratio = ni_name / vf
1919 ratio_mult_vf_name = ratio * vf */
1920 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
1921 &ratio_mult_vf_name, ratio,
1922 cond_expr_stmt_list);
1924 loop_num = loop->num;
1926 new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
1927 &ratio_mult_vf_name, ni_name, false,
1928 th, check_profitability,
1929 cond_expr, cond_expr_stmt_list,
1930 0, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
1931 gcc_assert (new_loop);
1932 gcc_assert (loop_num == loop->num);
1933 #ifdef ENABLE_CHECKING
1934 slpeel_verify_cfg_after_peeling (loop, new_loop);
1935 #endif
1937 /* A guard that controls whether the new_loop is to be executed or skipped
1938 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
1939 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
1940 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
1941 is on the path where the LOOP IVs are used and need to be updated. */
1943 preheader = loop_preheader_edge (new_loop)->src;
1944 if (EDGE_PRED (preheader, 0)->src == single_exit (loop)->dest)
1945 update_e = EDGE_PRED (preheader, 0);
1946 else
1947 update_e = EDGE_PRED (preheader, 1);
1949 /* Update IVs of original loop as if they were advanced
1950 by ratio_mult_vf_name steps. */
1951 vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
1953 /* For vectorization factor N, we need to copy last N-1 values in epilogue
1954 and this means N-2 loopback edge executions.
1956 PEELING_FOR_GAPS works by subtracting last iteration and thus the epilogue
1957 will execute at least LOOP_VINFO_VECT_FACTOR times. */
1958 max_iter = (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
1959 ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) * 2
1960 : LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 2;
1961 if (check_profitability)
1962 max_iter = MAX (max_iter, (int) th - 1);
1963 record_niter_bound (new_loop, double_int::from_shwi (max_iter), false, true);
1964 dump_printf (MSG_OPTIMIZED_LOCATIONS,
1965 "Setting upper bound of nb iterations for epilogue "
1966 "loop to %d\n", max_iter);
1968 /* After peeling we have to reset scalar evolution analyzer. */
1969 scev_reset ();
1971 free_original_copy_tables ();
1975 /* Function vect_gen_niters_for_prolog_loop
1977 Set the number of iterations for the loop represented by LOOP_VINFO
1978 to the minimum between LOOP_NITERS (the original iteration count of the loop)
1979 and the misalignment of DR - the data reference recorded in
1980 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
1981 this loop, the data reference DR will refer to an aligned location.
1983 The following computation is generated:
1985 If the misalignment of DR is known at compile time:
1986 addr_mis = int mis = DR_MISALIGNMENT (dr);
1987 Else, compute address misalignment in bytes:
1988 addr_mis = addr & (vectype_align - 1)
1990 prolog_niters = min (LOOP_NITERS, ((VF - addr_mis/elem_size)&(VF-1))/step)
1992 (elem_size = element type size; an element is the scalar element whose type
1993 is the inner type of the vectype)
1995 When the step of the data-ref in the loop is not 1 (as in interleaved data
1996 and SLP), the number of iterations of the prolog must be divided by the step
1997 (which is equal to the size of interleaved group).
1999 The above formulas assume that VF == number of elements in the vector. This
2000 may not hold when there are multiple-types in the loop.
2001 In this case, for some data-references in the loop the VF does not represent
2002 the number of elements that fit in the vector. Therefore, instead of VF we
2003 use TYPE_VECTOR_SUBPARTS. */
2005 static tree
2006 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters, int *bound)
2008 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
2009 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2010 tree var;
2011 gimple_seq stmts;
2012 tree iters, iters_name;
2013 edge pe;
2014 basic_block new_bb;
2015 gimple dr_stmt = DR_STMT (dr);
2016 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
2017 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2018 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
2019 tree niters_type = TREE_TYPE (loop_niters);
2020 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
2022 pe = loop_preheader_edge (loop);
2024 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
2026 int npeel = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
2028 if (dump_enabled_p ())
2029 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2030 "known peeling = %d.", npeel);
2032 iters = build_int_cst (niters_type, npeel);
2033 *bound = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
2035 else
2037 gimple_seq new_stmts = NULL;
2038 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
2039 tree offset = negative
2040 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : NULL_TREE;
2041 tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
2042 &new_stmts, offset, loop);
2043 tree type = unsigned_type_for (TREE_TYPE (start_addr));
2044 tree vectype_align_minus_1 = build_int_cst (type, vectype_align - 1);
2045 HOST_WIDE_INT elem_size =
2046 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
2047 tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
2048 tree nelements_minus_1 = build_int_cst (type, nelements - 1);
2049 tree nelements_tree = build_int_cst (type, nelements);
2050 tree byte_misalign;
2051 tree elem_misalign;
2053 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmts);
2054 gcc_assert (!new_bb);
2056 /* Create: byte_misalign = addr & (vectype_align - 1) */
2057 byte_misalign =
2058 fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
2059 vectype_align_minus_1);
2061 /* Create: elem_misalign = byte_misalign / element_size */
2062 elem_misalign =
2063 fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
2065 /* Create: (niters_type) (nelements - elem_misalign)&(nelements - 1) */
2066 if (negative)
2067 iters = fold_build2 (MINUS_EXPR, type, elem_misalign, nelements_tree);
2068 else
2069 iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
2070 iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
2071 iters = fold_convert (niters_type, iters);
2072 *bound = nelements;
2075 /* Create: prolog_loop_niters = min (iters, loop_niters) */
2076 /* If the loop bound is known at compile time we already verified that it is
2077 greater than vf; since the misalignment ('iters') is at most vf, there's
2078 no need to generate the MIN_EXPR in this case. */
2079 if (TREE_CODE (loop_niters) != INTEGER_CST)
2080 iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters);
2082 if (dump_enabled_p ())
2084 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2085 "niters for prolog loop: ");
2086 dump_generic_expr (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, iters);
2089 var = create_tmp_var (niters_type, "prolog_loop_niters");
2090 stmts = NULL;
2091 iters_name = force_gimple_operand (iters, &stmts, false, var);
2093 /* Insert stmt on loop preheader edge. */
2094 if (stmts)
2096 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2097 gcc_assert (!new_bb);
2100 return iters_name;
2104 /* Function vect_update_init_of_dr
2106 NITERS iterations were peeled from LOOP. DR represents a data reference
2107 in LOOP. This function updates the information recorded in DR to
2108 account for the fact that the first NITERS iterations had already been
2109 executed. Specifically, it updates the OFFSET field of DR. */
2111 static void
2112 vect_update_init_of_dr (struct data_reference *dr, tree niters)
2114 tree offset = DR_OFFSET (dr);
2116 niters = fold_build2 (MULT_EXPR, sizetype,
2117 fold_convert (sizetype, niters),
2118 fold_convert (sizetype, DR_STEP (dr)));
2119 offset = fold_build2 (PLUS_EXPR, sizetype,
2120 fold_convert (sizetype, offset), niters);
2121 DR_OFFSET (dr) = offset;
2125 /* Function vect_update_inits_of_drs
2127 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
2128 This function updates the information recorded for the data references in
2129 the loop to account for the fact that the first NITERS iterations had
2130 already been executed. Specifically, it updates the initial_condition of
2131 the access_function of all the data_references in the loop. */
2133 static void
2134 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
2136 unsigned int i;
2137 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2138 struct data_reference *dr;
2140 if (dump_enabled_p ())
2141 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2142 "=== vect_update_inits_of_dr ===");
2144 FOR_EACH_VEC_ELT (datarefs, i, dr)
2145 vect_update_init_of_dr (dr, niters);
2149 /* Function vect_do_peeling_for_alignment
2151 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
2152 'niters' is set to the misalignment of one of the data references in the
2153 loop, thereby forcing it to refer to an aligned location at the beginning
2154 of the execution of this loop. The data reference for which we are
2155 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
2157 void
2158 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo,
2159 unsigned int th, bool check_profitability)
2161 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2162 tree niters_of_prolog_loop, ni_name;
2163 tree n_iters;
2164 tree wide_prolog_niters;
2165 struct loop *new_loop;
2166 int max_iter;
2167 int bound = 0;
2169 if (dump_enabled_p ())
2170 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2171 "=== vect_do_peeling_for_alignment ===");
2173 initialize_original_copy_tables ();
2175 ni_name = vect_build_loop_niters (loop_vinfo, NULL);
2176 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo,
2177 ni_name,
2178 &bound);
2180 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
2181 new_loop =
2182 slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop),
2183 &niters_of_prolog_loop, ni_name, true,
2184 th, check_profitability, NULL_TREE, NULL,
2185 bound,
2188 gcc_assert (new_loop);
2189 #ifdef ENABLE_CHECKING
2190 slpeel_verify_cfg_after_peeling (new_loop, loop);
2191 #endif
2192 /* For vectorization factor N, we need to copy at most N-1 values
2193 for alignment and this means N-2 loopback edge executions. */
2194 max_iter = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 2;
2195 if (check_profitability)
2196 max_iter = MAX (max_iter, (int) th - 1);
2197 record_niter_bound (new_loop, double_int::from_shwi (max_iter), false, true);
2198 dump_printf (MSG_OPTIMIZED_LOCATIONS,
2199 "Setting upper bound of nb iterations for prologue "
2200 "loop to %d\n", max_iter);
2202 /* Update number of times loop executes. */
2203 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
2204 LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
2205 TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
2207 if (types_compatible_p (sizetype, TREE_TYPE (niters_of_prolog_loop)))
2208 wide_prolog_niters = niters_of_prolog_loop;
2209 else
2211 gimple_seq seq = NULL;
2212 edge pe = loop_preheader_edge (loop);
2213 tree wide_iters = fold_convert (sizetype, niters_of_prolog_loop);
2214 tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
2215 wide_prolog_niters = force_gimple_operand (wide_iters, &seq, false,
2216 var);
2217 if (seq)
2219 /* Insert stmt on loop preheader edge. */
2220 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2221 gcc_assert (!new_bb);
2225 /* Update the init conditions of the access functions of all data refs. */
2226 vect_update_inits_of_drs (loop_vinfo, wide_prolog_niters);
2228 /* After peeling we have to reset scalar evolution analyzer. */
2229 scev_reset ();
2231 free_original_copy_tables ();
2235 /* Function vect_create_cond_for_align_checks.
2237 Create a conditional expression that represents the alignment checks for
2238 all of data references (array element references) whose alignment must be
2239 checked at runtime.
2241 Input:
2242 COND_EXPR - input conditional expression. New conditions will be chained
2243 with logical AND operation.
2244 LOOP_VINFO - two fields of the loop information are used.
2245 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
2246 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
2248 Output:
2249 COND_EXPR_STMT_LIST - statements needed to construct the conditional
2250 expression.
2251 The returned value is the conditional expression to be used in the if
2252 statement that controls which version of the loop gets executed at runtime.
2254 The algorithm makes two assumptions:
2255 1) The number of bytes "n" in a vector is a power of 2.
2256 2) An address "a" is aligned if a%n is zero and that this
2257 test can be done as a&(n-1) == 0. For example, for 16
2258 byte vectors the test is a&0xf == 0. */
2260 static void
2261 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
2262 tree *cond_expr,
2263 gimple_seq *cond_expr_stmt_list)
2265 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2266 vec<gimple> may_misalign_stmts
2267 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2268 gimple ref_stmt;
2269 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
2270 tree mask_cst;
2271 unsigned int i;
2272 tree int_ptrsize_type;
2273 char tmp_name[20];
2274 tree or_tmp_name = NULL_TREE;
2275 tree and_tmp_name;
2276 gimple and_stmt;
2277 tree ptrsize_zero;
2278 tree part_cond_expr;
2280 /* Check that mask is one less than a power of 2, i.e., mask is
2281 all zeros followed by all ones. */
2282 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2284 int_ptrsize_type = signed_type_for (ptr_type_node);
2286 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2287 of the first vector of the i'th data reference. */
2289 FOR_EACH_VEC_ELT (may_misalign_stmts, i, ref_stmt)
2291 gimple_seq new_stmt_list = NULL;
2292 tree addr_base;
2293 tree addr_tmp_name;
2294 tree new_or_tmp_name;
2295 gimple addr_stmt, or_stmt;
2296 stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
2297 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2298 bool negative = tree_int_cst_compare
2299 (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
2300 tree offset = negative
2301 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : NULL_TREE;
2303 /* create: addr_tmp = (int)(address_of_first_vector) */
2304 addr_base =
2305 vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
2306 offset, loop);
2307 if (new_stmt_list != NULL)
2308 gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
2310 sprintf (tmp_name, "addr2int%d", i);
2311 addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2312 addr_stmt = gimple_build_assign_with_ops (NOP_EXPR, addr_tmp_name,
2313 addr_base, NULL_TREE);
2314 gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
2316 /* The addresses are OR together. */
2318 if (or_tmp_name != NULL_TREE)
2320 /* create: or_tmp = or_tmp | addr_tmp */
2321 sprintf (tmp_name, "orptrs%d", i);
2322 new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2323 or_stmt = gimple_build_assign_with_ops (BIT_IOR_EXPR,
2324 new_or_tmp_name,
2325 or_tmp_name, addr_tmp_name);
2326 gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
2327 or_tmp_name = new_or_tmp_name;
2329 else
2330 or_tmp_name = addr_tmp_name;
2332 } /* end for i */
2334 mask_cst = build_int_cst (int_ptrsize_type, mask);
2336 /* create: and_tmp = or_tmp & mask */
2337 and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
2339 and_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, and_tmp_name,
2340 or_tmp_name, mask_cst);
2341 gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
2343 /* Make and_tmp the left operand of the conditional test against zero.
2344 if and_tmp has a nonzero bit then some address is unaligned. */
2345 ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2346 part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
2347 and_tmp_name, ptrsize_zero);
2348 if (*cond_expr)
2349 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2350 *cond_expr, part_cond_expr);
2351 else
2352 *cond_expr = part_cond_expr;
2356 /* Function vect_vfa_segment_size.
2358 Create an expression that computes the size of segment
2359 that will be accessed for a data reference. The functions takes into
2360 account that realignment loads may access one more vector.
2362 Input:
2363 DR: The data reference.
2364 LENGTH_FACTOR: segment length to consider.
2366 Return an expression whose value is the size of segment which will be
2367 accessed by DR. */
2369 static tree
2370 vect_vfa_segment_size (struct data_reference *dr, tree length_factor)
2372 tree segment_length;
2374 if (integer_zerop (DR_STEP (dr)))
2375 segment_length = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
2376 else
2377 segment_length = size_binop (MULT_EXPR,
2378 fold_convert (sizetype, DR_STEP (dr)),
2379 fold_convert (sizetype, length_factor));
2381 if (vect_supportable_dr_alignment (dr, false)
2382 == dr_explicit_realign_optimized)
2384 tree vector_size = TYPE_SIZE_UNIT
2385 (STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr))));
2387 segment_length = size_binop (PLUS_EXPR, segment_length, vector_size);
2389 return segment_length;
2393 /* Function vect_create_cond_for_alias_checks.
2395 Create a conditional expression that represents the run-time checks for
2396 overlapping of address ranges represented by a list of data references
2397 relations passed as input.
2399 Input:
2400 COND_EXPR - input conditional expression. New conditions will be chained
2401 with logical AND operation.
2402 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2403 to be checked.
2405 Output:
2406 COND_EXPR - conditional expression.
2407 COND_EXPR_STMT_LIST - statements needed to construct the conditional
2408 expression.
2411 The returned value is the conditional expression to be used in the if
2412 statement that controls which version of the loop gets executed at runtime.
2415 static void
2416 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo,
2417 tree * cond_expr,
2418 gimple_seq * cond_expr_stmt_list)
2420 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2421 vec<ddr_p> may_alias_ddrs =
2422 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2423 int vect_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2424 tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2426 ddr_p ddr;
2427 unsigned int i;
2428 tree part_cond_expr, length_factor;
2430 /* Create expression
2431 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
2432 || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
2436 ((store_ptr_n + store_segment_length_n) <= load_ptr_n)
2437 || (load_ptr_n + load_segment_length_n) <= store_ptr_n)) */
2439 if (may_alias_ddrs.is_empty ())
2440 return;
2442 FOR_EACH_VEC_ELT (may_alias_ddrs, i, ddr)
2444 struct data_reference *dr_a, *dr_b;
2445 gimple dr_group_first_a, dr_group_first_b;
2446 tree addr_base_a, addr_base_b;
2447 tree segment_length_a, segment_length_b;
2448 gimple stmt_a, stmt_b;
2449 tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
2451 dr_a = DDR_A (ddr);
2452 stmt_a = DR_STMT (DDR_A (ddr));
2453 dr_group_first_a = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_a));
2454 if (dr_group_first_a)
2456 stmt_a = dr_group_first_a;
2457 dr_a = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_a));
2460 dr_b = DDR_B (ddr);
2461 stmt_b = DR_STMT (DDR_B (ddr));
2462 dr_group_first_b = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_b));
2463 if (dr_group_first_b)
2465 stmt_b = dr_group_first_b;
2466 dr_b = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_b));
2469 addr_base_a =
2470 vect_create_addr_base_for_vector_ref (stmt_a, cond_expr_stmt_list,
2471 NULL_TREE, loop);
2472 addr_base_b =
2473 vect_create_addr_base_for_vector_ref (stmt_b, cond_expr_stmt_list,
2474 NULL_TREE, loop);
2476 if (!operand_equal_p (DR_STEP (dr_a), DR_STEP (dr_b), 0))
2477 length_factor = scalar_loop_iters;
2478 else
2479 length_factor = size_int (vect_factor);
2480 segment_length_a = vect_vfa_segment_size (dr_a, length_factor);
2481 segment_length_b = vect_vfa_segment_size (dr_b, length_factor);
2483 if (dump_enabled_p ())
2485 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2486 "create runtime check for data references ");
2487 dump_generic_expr (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, DR_REF (dr_a));
2488 dump_printf (MSG_OPTIMIZED_LOCATIONS, " and ");
2489 dump_generic_expr (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, DR_REF (dr_b));
2492 seg_a_min = addr_base_a;
2493 seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
2494 if (tree_int_cst_compare (DR_STEP (dr_a), size_zero_node) < 0)
2495 seg_a_min = seg_a_max, seg_a_max = addr_base_a;
2497 seg_b_min = addr_base_b;
2498 seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
2499 if (tree_int_cst_compare (DR_STEP (dr_b), size_zero_node) < 0)
2500 seg_b_min = seg_b_max, seg_b_max = addr_base_b;
2502 part_cond_expr =
2503 fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2504 fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
2505 fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
2507 if (*cond_expr)
2508 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2509 *cond_expr, part_cond_expr);
2510 else
2511 *cond_expr = part_cond_expr;
2514 if (dump_enabled_p ())
2515 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2516 "created %u versioning for alias checks.\n",
2517 may_alias_ddrs.length ());
2521 /* Function vect_loop_versioning.
2523 If the loop has data references that may or may not be aligned or/and
2524 has data reference relations whose independence was not proven then
2525 two versions of the loop need to be generated, one which is vectorized
2526 and one which isn't. A test is then generated to control which of the
2527 loops is executed. The test checks for the alignment of all of the
2528 data references that may or may not be aligned. An additional
2529 sequence of runtime tests is generated for each pairs of DDRs whose
2530 independence was not proven. The vectorized version of loop is
2531 executed only if both alias and alignment tests are passed.
2533 The test generated to check which version of loop is executed
2534 is modified to also check for profitability as indicated by the
2535 cost model initially.
2537 The versioning precondition(s) are placed in *COND_EXPR and
2538 *COND_EXPR_STMT_LIST. */
2540 void
2541 vect_loop_versioning (loop_vec_info loop_vinfo,
2542 unsigned int th, bool check_profitability)
2544 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2545 basic_block condition_bb;
2546 gimple_stmt_iterator gsi, cond_exp_gsi;
2547 basic_block merge_bb;
2548 basic_block new_exit_bb;
2549 edge new_exit_e, e;
2550 gimple orig_phi, new_phi;
2551 tree cond_expr = NULL_TREE;
2552 gimple_seq cond_expr_stmt_list = NULL;
2553 tree arg;
2554 unsigned prob = 4 * REG_BR_PROB_BASE / 5;
2555 gimple_seq gimplify_stmt_list = NULL;
2556 tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2558 if (check_profitability)
2560 cond_expr = fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
2561 build_int_cst (TREE_TYPE (scalar_loop_iters), th));
2562 cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
2563 is_gimple_condexpr, NULL_TREE);
2566 if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
2567 vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
2568 &cond_expr_stmt_list);
2570 if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2571 vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr,
2572 &cond_expr_stmt_list);
2574 cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,
2575 is_gimple_condexpr, NULL_TREE);
2576 gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
2578 initialize_original_copy_tables ();
2579 loop_version (loop, cond_expr, &condition_bb,
2580 prob, prob, REG_BR_PROB_BASE - prob, true);
2581 free_original_copy_tables();
2583 /* Loop versioning violates an assumption we try to maintain during
2584 vectorization - that the loop exit block has a single predecessor.
2585 After versioning, the exit block of both loop versions is the same
2586 basic block (i.e. it has two predecessors). Just in order to simplify
2587 following transformations in the vectorizer, we fix this situation
2588 here by adding a new (empty) block on the exit-edge of the loop,
2589 with the proper loop-exit phis to maintain loop-closed-form. */
2591 merge_bb = single_exit (loop)->dest;
2592 gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
2593 new_exit_bb = split_edge (single_exit (loop));
2594 new_exit_e = single_exit (loop);
2595 e = EDGE_SUCC (new_exit_bb, 0);
2597 for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2599 tree new_res;
2600 orig_phi = gsi_stmt (gsi);
2601 new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
2602 new_phi = create_phi_node (new_res, new_exit_bb);
2603 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2604 add_phi_arg (new_phi, arg, new_exit_e,
2605 gimple_phi_arg_location_from_edge (orig_phi, e));
2606 adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
2609 /* End loop-exit-fixes after versioning. */
2611 update_ssa (TODO_update_ssa);
2612 if (cond_expr_stmt_list)
2614 cond_exp_gsi = gsi_last_bb (condition_bb);
2615 gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
2616 GSI_SAME_STMT);