1 /* Vectorizer Specific Loop Manipulations
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
29 #include "basic-block.h"
30 #include "tree-pretty-print.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
35 #include "cfglayout.h"
36 #include "diagnostic-core.h"
38 #include "tree-scalar-evolution.h"
39 #include "tree-vectorizer.h"
40 #include "langhooks.h"
42 /*************************************************************************
43 Simple Loop Peeling Utilities
45 Utilities to support loop peeling for vectorization purposes.
46 *************************************************************************/
49 /* Renames the use *OP_P. */
52 rename_use_op (use_operand_p op_p
)
56 if (TREE_CODE (USE_FROM_PTR (op_p
)) != SSA_NAME
)
59 new_name
= get_current_def (USE_FROM_PTR (op_p
));
61 /* Something defined outside of the loop. */
65 /* An ordinary ssa name defined in the loop. */
67 SET_USE (op_p
, new_name
);
71 /* Renames the variables in basic block BB. */
74 rename_variables_in_bb (basic_block bb
)
76 gimple_stmt_iterator gsi
;
82 struct loop
*loop
= bb
->loop_father
;
84 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
86 stmt
= gsi_stmt (gsi
);
87 FOR_EACH_SSA_USE_OPERAND (use_p
, stmt
, iter
, SSA_OP_ALL_USES
)
88 rename_use_op (use_p
);
91 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
93 if (!flow_bb_inside_loop_p (loop
, e
->dest
))
95 for (gsi
= gsi_start_phis (e
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
96 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi_stmt (gsi
), e
));
101 /* Renames variables in new generated LOOP. */
104 rename_variables_in_loop (struct loop
*loop
)
109 bbs
= get_loop_body (loop
);
111 for (i
= 0; i
< loop
->num_nodes
; i
++)
112 rename_variables_in_bb (bbs
[i
]);
123 DEF_VEC_O(adjust_info
);
124 DEF_VEC_ALLOC_O_STACK(adjust_info
);
125 #define VEC_adjust_info_stack_alloc(alloc) VEC_stack_alloc (adjust_info, alloc)
127 /* A stack of values to be adjusted in debug stmts. We have to
128 process them LIFO, so that the closest substitution applies. If we
129 processed them FIFO, without the stack, we might substitute uses
130 with a PHI DEF that would soon become non-dominant, and when we got
131 to the suitable one, it wouldn't have anything to substitute any
133 static VEC(adjust_info
, stack
) *adjust_vec
;
135 /* Adjust any debug stmts that referenced AI->from values to use the
136 loop-closed AI->to, if the references are dominated by AI->bb and
137 not by the definition of AI->from. */
140 adjust_debug_stmts_now (adjust_info
*ai
)
142 basic_block bbphi
= ai
->bb
;
143 tree orig_def
= ai
->from
;
144 tree new_def
= ai
->to
;
145 imm_use_iterator imm_iter
;
147 basic_block bbdef
= gimple_bb (SSA_NAME_DEF_STMT (orig_def
));
149 gcc_assert (dom_info_available_p (CDI_DOMINATORS
));
151 /* Adjust any debug stmts that held onto non-loop-closed
153 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, orig_def
)
158 if (!is_gimple_debug (stmt
))
161 gcc_assert (gimple_debug_bind_p (stmt
));
163 bbuse
= gimple_bb (stmt
);
166 || dominated_by_p (CDI_DOMINATORS
, bbuse
, bbphi
))
168 || dominated_by_p (CDI_DOMINATORS
, bbuse
, bbdef
)))
171 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
172 SET_USE (use_p
, new_def
);
175 gimple_debug_bind_reset_value (stmt
);
182 /* Adjust debug stmts as scheduled before. */
185 adjust_vec_debug_stmts (void)
187 if (!MAY_HAVE_DEBUG_STMTS
)
190 gcc_assert (adjust_vec
);
192 while (!VEC_empty (adjust_info
, adjust_vec
))
194 adjust_debug_stmts_now (VEC_last (adjust_info
, adjust_vec
));
195 VEC_pop (adjust_info
, adjust_vec
);
198 VEC_free (adjust_info
, stack
, adjust_vec
);
201 /* Adjust any debug stmts that referenced FROM values to use the
202 loop-closed TO, if the references are dominated by BB and not by
203 the definition of FROM. If adjust_vec is non-NULL, adjustments
204 will be postponed until adjust_vec_debug_stmts is called. */
207 adjust_debug_stmts (tree from
, tree to
, basic_block bb
)
211 if (MAY_HAVE_DEBUG_STMTS
&& TREE_CODE (from
) == SSA_NAME
212 && SSA_NAME_VAR (from
) != gimple_vop (cfun
))
219 VEC_safe_push (adjust_info
, stack
, adjust_vec
, &ai
);
221 adjust_debug_stmts_now (&ai
);
225 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
226 to adjust any debug stmts that referenced the old phi arg,
227 presumably non-loop-closed references left over from other
231 adjust_phi_and_debug_stmts (gimple update_phi
, edge e
, tree new_def
)
233 tree orig_def
= PHI_ARG_DEF_FROM_EDGE (update_phi
, e
);
235 SET_PHI_ARG_DEF (update_phi
, e
->dest_idx
, new_def
);
237 if (MAY_HAVE_DEBUG_STMTS
)
238 adjust_debug_stmts (orig_def
, PHI_RESULT (update_phi
),
239 gimple_bb (update_phi
));
243 /* Update the PHI nodes of NEW_LOOP.
245 NEW_LOOP is a duplicate of ORIG_LOOP.
246 AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
247 AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
248 executes before it. */
251 slpeel_update_phis_for_duplicate_loop (struct loop
*orig_loop
,
252 struct loop
*new_loop
, bool after
)
255 gimple phi_new
, phi_orig
;
257 edge orig_loop_latch
= loop_latch_edge (orig_loop
);
258 edge orig_entry_e
= loop_preheader_edge (orig_loop
);
259 edge new_loop_exit_e
= single_exit (new_loop
);
260 edge new_loop_entry_e
= loop_preheader_edge (new_loop
);
261 edge entry_arg_e
= (after
? orig_loop_latch
: orig_entry_e
);
262 gimple_stmt_iterator gsi_new
, gsi_orig
;
265 step 1. For each loop-header-phi:
266 Add the first phi argument for the phi in NEW_LOOP
267 (the one associated with the entry of NEW_LOOP)
269 step 2. For each loop-header-phi:
270 Add the second phi argument for the phi in NEW_LOOP
271 (the one associated with the latch of NEW_LOOP)
273 step 3. Update the phis in the successor block of NEW_LOOP.
275 case 1: NEW_LOOP was placed before ORIG_LOOP:
276 The successor block of NEW_LOOP is the header of ORIG_LOOP.
277 Updating the phis in the successor block can therefore be done
278 along with the scanning of the loop header phis, because the
279 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
280 phi nodes, organized in the same order.
282 case 2: NEW_LOOP was placed after ORIG_LOOP:
283 The successor block of NEW_LOOP is the original exit block of
284 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
285 We postpone updating these phis to a later stage (when
286 loop guards are added).
290 /* Scan the phis in the headers of the old and new loops
291 (they are organized in exactly the same order). */
293 for (gsi_new
= gsi_start_phis (new_loop
->header
),
294 gsi_orig
= gsi_start_phis (orig_loop
->header
);
295 !gsi_end_p (gsi_new
) && !gsi_end_p (gsi_orig
);
296 gsi_next (&gsi_new
), gsi_next (&gsi_orig
))
298 source_location locus
;
299 phi_new
= gsi_stmt (gsi_new
);
300 phi_orig
= gsi_stmt (gsi_orig
);
303 def
= PHI_ARG_DEF_FROM_EDGE (phi_orig
, entry_arg_e
);
304 locus
= gimple_phi_arg_location_from_edge (phi_orig
, entry_arg_e
);
305 add_phi_arg (phi_new
, def
, new_loop_entry_e
, locus
);
308 def
= PHI_ARG_DEF_FROM_EDGE (phi_orig
, orig_loop_latch
);
309 locus
= gimple_phi_arg_location_from_edge (phi_orig
, orig_loop_latch
);
310 if (TREE_CODE (def
) != SSA_NAME
)
313 new_ssa_name
= get_current_def (def
);
316 /* This only happens if there are no definitions
317 inside the loop. use the phi_result in this case. */
318 new_ssa_name
= PHI_RESULT (phi_new
);
321 /* An ordinary ssa name defined in the loop. */
322 add_phi_arg (phi_new
, new_ssa_name
, loop_latch_edge (new_loop
), locus
);
324 /* Drop any debug references outside the loop, if they would
325 become ill-formed SSA. */
326 adjust_debug_stmts (def
, NULL
, single_exit (orig_loop
)->dest
);
328 /* step 3 (case 1). */
331 gcc_assert (new_loop_exit_e
== orig_entry_e
);
332 adjust_phi_and_debug_stmts (phi_orig
, new_loop_exit_e
, new_ssa_name
);
338 /* Update PHI nodes for a guard of the LOOP.
341 - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
342 controls whether LOOP is to be executed. GUARD_EDGE is the edge that
343 originates from the guard-bb, skips LOOP and reaches the (unique) exit
344 bb of LOOP. This loop-exit-bb is an empty bb with one successor.
345 We denote this bb NEW_MERGE_BB because before the guard code was added
346 it had a single predecessor (the LOOP header), and now it became a merge
347 point of two paths - the path that ends with the LOOP exit-edge, and
348 the path that ends with GUARD_EDGE.
349 - NEW_EXIT_BB: New basic block that is added by this function between LOOP
350 and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
352 ===> The CFG before the guard-code was added:
355 if (exit_loop) goto update_bb
356 else goto LOOP_header_bb
359 ==> The CFG after the guard-code was added:
361 if (LOOP_guard_condition) goto new_merge_bb
362 else goto LOOP_header_bb
365 if (exit_loop_condition) goto new_merge_bb
366 else goto LOOP_header_bb
371 ==> The CFG after this function:
373 if (LOOP_guard_condition) goto new_merge_bb
374 else goto LOOP_header_bb
377 if (exit_loop_condition) goto new_exit_bb
378 else goto LOOP_header_bb
385 1. creates and updates the relevant phi nodes to account for the new
386 incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
387 1.1. Create phi nodes at NEW_MERGE_BB.
388 1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
389 UPDATE_BB). UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
390 2. preserves loop-closed-ssa-form by creating the required phi nodes
391 at the exit of LOOP (i.e, in NEW_EXIT_BB).
393 There are two flavors to this function:
395 slpeel_update_phi_nodes_for_guard1:
396 Here the guard controls whether we enter or skip LOOP, where LOOP is a
397 prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
398 for variables that have phis in the loop header.
400 slpeel_update_phi_nodes_for_guard2:
401 Here the guard controls whether we enter or skip LOOP, where LOOP is an
402 epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
403 for variables that have phis in the loop exit.
405 I.E., the overall structure is:
408 guard1 (goto loop1/merge1_bb)
411 guard2 (goto merge1_bb/merge2_bb)
418 slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
419 loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
420 that have phis in loop1->header).
422 slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
423 loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
424 that have phis in next_bb). It also adds some of these phis to
427 slpeel_update_phi_nodes_for_guard1 is always called before
428 slpeel_update_phi_nodes_for_guard2. They are both needed in order
429 to create correct data-flow and loop-closed-ssa-form.
431 Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
432 that change between iterations of a loop (and therefore have a phi-node
433 at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
434 phis for variables that are used out of the loop (and therefore have
435 loop-closed exit phis). Some variables may be both updated between
436 iterations and used after the loop. This is why in loop1_exit_bb we
437 may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
438 and exit phis (created by slpeel_update_phi_nodes_for_guard2).
440 - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
441 an original loop. i.e., we have:
444 guard_bb (goto LOOP/new_merge)
450 If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
454 guard_bb (goto LOOP/new_merge)
460 The SSA names defined in the original loop have a current
461 reaching definition that that records the corresponding new
462 ssa-name used in the new duplicated loop copy.
465 /* Function slpeel_update_phi_nodes_for_guard1
468 - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
469 - DEFS - a bitmap of ssa names to mark new names for which we recorded
472 In the context of the overall structure, we have:
475 guard1 (goto loop1/merge1_bb)
478 guard2 (goto merge1_bb/merge2_bb)
485 For each name updated between loop iterations (i.e - for each name that has
486 an entry (loop-header) phi in LOOP) we create a new phi in:
487 1. merge1_bb (to account for the edge from guard1)
488 2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
492 slpeel_update_phi_nodes_for_guard1 (edge guard_edge
, struct loop
*loop
,
493 bool is_new_loop
, basic_block
*new_exit_bb
,
496 gimple orig_phi
, new_phi
;
497 gimple update_phi
, update_phi2
;
498 tree guard_arg
, loop_arg
;
499 basic_block new_merge_bb
= guard_edge
->dest
;
500 edge e
= EDGE_SUCC (new_merge_bb
, 0);
501 basic_block update_bb
= e
->dest
;
502 basic_block orig_bb
= loop
->header
;
504 tree current_new_name
;
505 gimple_stmt_iterator gsi_orig
, gsi_update
;
507 /* Create new bb between loop and new_merge_bb. */
508 *new_exit_bb
= split_edge (single_exit (loop
));
510 new_exit_e
= EDGE_SUCC (*new_exit_bb
, 0);
512 for (gsi_orig
= gsi_start_phis (orig_bb
),
513 gsi_update
= gsi_start_phis (update_bb
);
514 !gsi_end_p (gsi_orig
) && !gsi_end_p (gsi_update
);
515 gsi_next (&gsi_orig
), gsi_next (&gsi_update
))
517 source_location loop_locus
, guard_locus
;;
518 orig_phi
= gsi_stmt (gsi_orig
);
519 update_phi
= gsi_stmt (gsi_update
);
521 /** 1. Handle new-merge-point phis **/
523 /* 1.1. Generate new phi node in NEW_MERGE_BB: */
524 new_phi
= create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi
)),
527 /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
528 of LOOP. Set the two phi args in NEW_PHI for these edges: */
529 loop_arg
= PHI_ARG_DEF_FROM_EDGE (orig_phi
, EDGE_SUCC (loop
->latch
, 0));
530 loop_locus
= gimple_phi_arg_location_from_edge (orig_phi
,
531 EDGE_SUCC (loop
->latch
,
533 guard_arg
= PHI_ARG_DEF_FROM_EDGE (orig_phi
, loop_preheader_edge (loop
));
535 = gimple_phi_arg_location_from_edge (orig_phi
,
536 loop_preheader_edge (loop
));
538 add_phi_arg (new_phi
, loop_arg
, new_exit_e
, loop_locus
);
539 add_phi_arg (new_phi
, guard_arg
, guard_edge
, guard_locus
);
541 /* 1.3. Update phi in successor block. */
542 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi
, e
) == loop_arg
543 || PHI_ARG_DEF_FROM_EDGE (update_phi
, e
) == guard_arg
);
544 adjust_phi_and_debug_stmts (update_phi
, e
, PHI_RESULT (new_phi
));
545 update_phi2
= new_phi
;
548 /** 2. Handle loop-closed-ssa-form phis **/
550 if (!is_gimple_reg (PHI_RESULT (orig_phi
)))
553 /* 2.1. Generate new phi node in NEW_EXIT_BB: */
554 new_phi
= create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi
)),
557 /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */
558 add_phi_arg (new_phi
, loop_arg
, single_exit (loop
), loop_locus
);
560 /* 2.3. Update phi in successor of NEW_EXIT_BB: */
561 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2
, new_exit_e
) == loop_arg
);
562 adjust_phi_and_debug_stmts (update_phi2
, new_exit_e
,
563 PHI_RESULT (new_phi
));
565 /* 2.4. Record the newly created name with set_current_def.
566 We want to find a name such that
567 name = get_current_def (orig_loop_name)
568 and to set its current definition as follows:
569 set_current_def (name, new_phi_name)
571 If LOOP is a new loop then loop_arg is already the name we're
572 looking for. If LOOP is the original loop, then loop_arg is
573 the orig_loop_name and the relevant name is recorded in its
574 current reaching definition. */
576 current_new_name
= loop_arg
;
579 current_new_name
= get_current_def (loop_arg
);
580 /* current_def is not available only if the variable does not
581 change inside the loop, in which case we also don't care
582 about recording a current_def for it because we won't be
583 trying to create loop-exit-phis for it. */
584 if (!current_new_name
)
587 gcc_assert (get_current_def (current_new_name
) == NULL_TREE
);
589 set_current_def (current_new_name
, PHI_RESULT (new_phi
));
590 bitmap_set_bit (*defs
, SSA_NAME_VERSION (current_new_name
));
595 /* Function slpeel_update_phi_nodes_for_guard2
598 - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
600 In the context of the overall structure, we have:
603 guard1 (goto loop1/merge1_bb)
606 guard2 (goto merge1_bb/merge2_bb)
613 For each name used out side the loop (i.e - for each name that has an exit
614 phi in next_bb) we create a new phi in:
615 1. merge2_bb (to account for the edge from guard_bb)
616 2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
617 3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
618 if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
622 slpeel_update_phi_nodes_for_guard2 (edge guard_edge
, struct loop
*loop
,
623 bool is_new_loop
, basic_block
*new_exit_bb
)
625 gimple orig_phi
, new_phi
;
626 gimple update_phi
, update_phi2
;
627 tree guard_arg
, loop_arg
;
628 basic_block new_merge_bb
= guard_edge
->dest
;
629 edge e
= EDGE_SUCC (new_merge_bb
, 0);
630 basic_block update_bb
= e
->dest
;
632 tree orig_def
, orig_def_new_name
;
633 tree new_name
, new_name2
;
635 gimple_stmt_iterator gsi
;
637 /* Create new bb between loop and new_merge_bb. */
638 *new_exit_bb
= split_edge (single_exit (loop
));
640 new_exit_e
= EDGE_SUCC (*new_exit_bb
, 0);
642 for (gsi
= gsi_start_phis (update_bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
644 update_phi
= gsi_stmt (gsi
);
645 orig_phi
= update_phi
;
646 orig_def
= PHI_ARG_DEF_FROM_EDGE (orig_phi
, e
);
647 /* This loop-closed-phi actually doesn't represent a use
648 out of the loop - the phi arg is a constant. */
649 if (TREE_CODE (orig_def
) != SSA_NAME
)
651 orig_def_new_name
= get_current_def (orig_def
);
654 /** 1. Handle new-merge-point phis **/
656 /* 1.1. Generate new phi node in NEW_MERGE_BB: */
657 new_phi
= create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi
)),
660 /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
661 of LOOP. Set the two PHI args in NEW_PHI for these edges: */
663 new_name2
= NULL_TREE
;
664 if (orig_def_new_name
)
666 new_name
= orig_def_new_name
;
667 /* Some variables have both loop-entry-phis and loop-exit-phis.
668 Such variables were given yet newer names by phis placed in
669 guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
670 new_name2 = get_current_def (get_current_def (orig_name)). */
671 new_name2
= get_current_def (new_name
);
676 guard_arg
= orig_def
;
681 guard_arg
= new_name
;
685 guard_arg
= new_name2
;
687 add_phi_arg (new_phi
, loop_arg
, new_exit_e
, UNKNOWN_LOCATION
);
688 add_phi_arg (new_phi
, guard_arg
, guard_edge
, UNKNOWN_LOCATION
);
690 /* 1.3. Update phi in successor block. */
691 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi
, e
) == orig_def
);
692 adjust_phi_and_debug_stmts (update_phi
, e
, PHI_RESULT (new_phi
));
693 update_phi2
= new_phi
;
696 /** 2. Handle loop-closed-ssa-form phis **/
698 /* 2.1. Generate new phi node in NEW_EXIT_BB: */
699 new_phi
= create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi
)),
702 /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */
703 add_phi_arg (new_phi
, loop_arg
, single_exit (loop
), UNKNOWN_LOCATION
);
705 /* 2.3. Update phi in successor of NEW_EXIT_BB: */
706 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2
, new_exit_e
) == loop_arg
);
707 adjust_phi_and_debug_stmts (update_phi2
, new_exit_e
,
708 PHI_RESULT (new_phi
));
711 /** 3. Handle loop-closed-ssa-form phis for first loop **/
713 /* 3.1. Find the relevant names that need an exit-phi in
714 GUARD_BB, i.e. names for which
715 slpeel_update_phi_nodes_for_guard1 had not already created a
716 phi node. This is the case for names that are used outside
717 the loop (and therefore need an exit phi) but are not updated
718 across loop iterations (and therefore don't have a
721 slpeel_update_phi_nodes_for_guard1 is responsible for
722 creating loop-exit phis in GUARD_BB for names that have a
723 loop-header-phi. When such a phi is created we also record
724 the new name in its current definition. If this new name
725 exists, then guard_arg was set to this new name (see 1.2
726 above). Therefore, if guard_arg is not this new name, this
727 is an indication that an exit-phi in GUARD_BB was not yet
728 created, so we take care of it here. */
729 if (guard_arg
== new_name2
)
733 /* 3.2. Generate new phi node in GUARD_BB: */
734 new_phi
= create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi
)),
737 /* 3.3. GUARD_BB has one incoming edge: */
738 gcc_assert (EDGE_COUNT (guard_edge
->src
->preds
) == 1);
739 add_phi_arg (new_phi
, arg
, EDGE_PRED (guard_edge
->src
, 0),
742 /* 3.4. Update phi in successor of GUARD_BB: */
743 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2
, guard_edge
)
745 adjust_phi_and_debug_stmts (update_phi2
, guard_edge
,
746 PHI_RESULT (new_phi
));
751 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
752 that starts at zero, increases by one and its limit is NITERS.
754 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
757 slpeel_make_loop_iterate_ntimes (struct loop
*loop
, tree niters
)
759 tree indx_before_incr
, indx_after_incr
;
762 edge exit_edge
= single_exit (loop
);
763 gimple_stmt_iterator loop_cond_gsi
;
764 gimple_stmt_iterator incr_gsi
;
766 tree init
= build_int_cst (TREE_TYPE (niters
), 0);
767 tree step
= build_int_cst (TREE_TYPE (niters
), 1);
771 orig_cond
= get_loop_exit_condition (loop
);
772 gcc_assert (orig_cond
);
773 loop_cond_gsi
= gsi_for_stmt (orig_cond
);
775 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
776 create_iv (init
, step
, NULL_TREE
, loop
,
777 &incr_gsi
, insert_after
, &indx_before_incr
, &indx_after_incr
);
779 indx_after_incr
= force_gimple_operand_gsi (&loop_cond_gsi
, indx_after_incr
,
780 true, NULL_TREE
, true,
782 niters
= force_gimple_operand_gsi (&loop_cond_gsi
, niters
, true, NULL_TREE
,
783 true, GSI_SAME_STMT
);
785 code
= (exit_edge
->flags
& EDGE_TRUE_VALUE
) ? GE_EXPR
: LT_EXPR
;
786 cond_stmt
= gimple_build_cond (code
, indx_after_incr
, niters
, NULL_TREE
,
789 gsi_insert_before (&loop_cond_gsi
, cond_stmt
, GSI_SAME_STMT
);
791 /* Remove old loop exit test: */
792 gsi_remove (&loop_cond_gsi
, true);
794 loop_loc
= find_loop_location (loop
);
795 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
797 if (loop_loc
!= UNKNOWN_LOC
)
798 fprintf (dump_file
, "\nloop at %s:%d: ",
799 LOC_FILE (loop_loc
), LOC_LINE (loop_loc
));
800 print_gimple_stmt (dump_file
, cond_stmt
, 0, TDF_SLIM
);
803 loop
->nb_iterations
= niters
;
807 /* Given LOOP this function generates a new copy of it and puts it
808 on E which is either the entry or exit of LOOP. */
811 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop
*loop
, edge e
)
813 struct loop
*new_loop
;
814 basic_block
*new_bbs
, *bbs
;
817 basic_block exit_dest
;
821 gimple_stmt_iterator gsi
;
823 at_exit
= (e
== single_exit (loop
));
824 if (!at_exit
&& e
!= loop_preheader_edge (loop
))
827 bbs
= get_loop_body (loop
);
829 /* Check whether duplication is possible. */
830 if (!can_copy_bbs_p (bbs
, loop
->num_nodes
))
836 /* Generate new loop structure. */
837 new_loop
= duplicate_loop (loop
, loop_outer (loop
));
844 exit_dest
= single_exit (loop
)->dest
;
845 was_imm_dom
= (get_immediate_dominator (CDI_DOMINATORS
,
846 exit_dest
) == loop
->header
?
849 new_bbs
= XNEWVEC (basic_block
, loop
->num_nodes
);
851 exit
= single_exit (loop
);
852 copy_bbs (bbs
, loop
->num_nodes
, new_bbs
,
853 &exit
, 1, &new_exit
, NULL
,
856 /* Duplicating phi args at exit bbs as coming
857 also from exit of duplicated loop. */
858 for (gsi
= gsi_start_phis (exit_dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
860 phi
= gsi_stmt (gsi
);
861 phi_arg
= PHI_ARG_DEF_FROM_EDGE (phi
, single_exit (loop
));
864 edge new_loop_exit_edge
;
865 source_location locus
;
867 locus
= gimple_phi_arg_location_from_edge (phi
, single_exit (loop
));
868 if (EDGE_SUCC (new_loop
->header
, 0)->dest
== new_loop
->latch
)
869 new_loop_exit_edge
= EDGE_SUCC (new_loop
->header
, 1);
871 new_loop_exit_edge
= EDGE_SUCC (new_loop
->header
, 0);
873 add_phi_arg (phi
, phi_arg
, new_loop_exit_edge
, locus
);
877 if (at_exit
) /* Add the loop copy at exit. */
879 redirect_edge_and_branch_force (e
, new_loop
->header
);
880 PENDING_STMT (e
) = NULL
;
881 set_immediate_dominator (CDI_DOMINATORS
, new_loop
->header
, e
->src
);
883 set_immediate_dominator (CDI_DOMINATORS
, exit_dest
, new_loop
->header
);
885 else /* Add the copy at entry. */
888 edge entry_e
= loop_preheader_edge (loop
);
889 basic_block preheader
= entry_e
->src
;
891 if (!flow_bb_inside_loop_p (new_loop
,
892 EDGE_SUCC (new_loop
->header
, 0)->dest
))
893 new_exit_e
= EDGE_SUCC (new_loop
->header
, 0);
895 new_exit_e
= EDGE_SUCC (new_loop
->header
, 1);
897 redirect_edge_and_branch_force (new_exit_e
, loop
->header
);
898 PENDING_STMT (new_exit_e
) = NULL
;
899 set_immediate_dominator (CDI_DOMINATORS
, loop
->header
,
902 /* We have to add phi args to the loop->header here as coming
903 from new_exit_e edge. */
904 for (gsi
= gsi_start_phis (loop
->header
);
908 phi
= gsi_stmt (gsi
);
909 phi_arg
= PHI_ARG_DEF_FROM_EDGE (phi
, entry_e
);
911 add_phi_arg (phi
, phi_arg
, new_exit_e
,
912 gimple_phi_arg_location_from_edge (phi
, entry_e
));
915 redirect_edge_and_branch_force (entry_e
, new_loop
->header
);
916 PENDING_STMT (entry_e
) = NULL
;
917 set_immediate_dominator (CDI_DOMINATORS
, new_loop
->header
, preheader
);
927 /* Given the condition statement COND, put it as the last statement
928 of GUARD_BB; EXIT_BB is the basic block to skip the loop;
929 Assumes that this is the single exit of the guarded loop.
930 Returns the skip edge, inserts new stmts on the COND_EXPR_STMT_LIST. */
933 slpeel_add_loop_guard (basic_block guard_bb
, tree cond
,
934 gimple_seq cond_expr_stmt_list
,
935 basic_block exit_bb
, basic_block dom_bb
)
937 gimple_stmt_iterator gsi
;
940 gimple_seq gimplify_stmt_list
= NULL
;
942 enter_e
= EDGE_SUCC (guard_bb
, 0);
943 enter_e
->flags
&= ~EDGE_FALLTHRU
;
944 enter_e
->flags
|= EDGE_FALSE_VALUE
;
945 gsi
= gsi_last_bb (guard_bb
);
947 cond
= force_gimple_operand (cond
, &gimplify_stmt_list
, true, NULL_TREE
);
948 if (gimplify_stmt_list
)
949 gimple_seq_add_seq (&cond_expr_stmt_list
, gimplify_stmt_list
);
950 cond_stmt
= gimple_build_cond (NE_EXPR
,
951 cond
, build_int_cst (TREE_TYPE (cond
), 0),
952 NULL_TREE
, NULL_TREE
);
953 if (cond_expr_stmt_list
)
954 gsi_insert_seq_after (&gsi
, cond_expr_stmt_list
, GSI_NEW_STMT
);
956 gsi
= gsi_last_bb (guard_bb
);
957 gsi_insert_after (&gsi
, cond_stmt
, GSI_NEW_STMT
);
959 /* Add new edge to connect guard block to the merge/loop-exit block. */
960 new_e
= make_edge (guard_bb
, exit_bb
, EDGE_TRUE_VALUE
);
961 set_immediate_dominator (CDI_DOMINATORS
, exit_bb
, dom_bb
);
966 /* This function verifies that the following restrictions apply to LOOP:
968 (2) it consists of exactly 2 basic blocks - header, and an empty latch.
969 (3) it is single entry, single exit
970 (4) its exit condition is the last stmt in the header
971 (5) E is the entry/exit edge of LOOP.
975 slpeel_can_duplicate_loop_p (const struct loop
*loop
, const_edge e
)
977 edge exit_e
= single_exit (loop
);
978 edge entry_e
= loop_preheader_edge (loop
);
979 gimple orig_cond
= get_loop_exit_condition (loop
);
980 gimple_stmt_iterator loop_exit_gsi
= gsi_last_bb (exit_e
->src
);
982 if (need_ssa_update_p (cfun
))
986 /* All loops have an outer scope; the only case loop->outer is NULL is for
987 the function itself. */
988 || !loop_outer (loop
)
989 || loop
->num_nodes
!= 2
990 || !empty_block_p (loop
->latch
)
991 || !single_exit (loop
)
992 /* Verify that new loop exit condition can be trivially modified. */
993 || (!orig_cond
|| orig_cond
!= gsi_stmt (loop_exit_gsi
))
994 || (e
!= exit_e
&& e
!= entry_e
))
1000 #ifdef ENABLE_CHECKING
1002 slpeel_verify_cfg_after_peeling (struct loop
*first_loop
,
1003 struct loop
*second_loop
)
1005 basic_block loop1_exit_bb
= single_exit (first_loop
)->dest
;
1006 basic_block loop2_entry_bb
= loop_preheader_edge (second_loop
)->src
;
1007 basic_block loop1_entry_bb
= loop_preheader_edge (first_loop
)->src
;
1009 /* A guard that controls whether the second_loop is to be executed or skipped
1010 is placed in first_loop->exit. first_loop->exit therefore has two
1011 successors - one is the preheader of second_loop, and the other is a bb
1014 gcc_assert (EDGE_COUNT (loop1_exit_bb
->succs
) == 2);
1016 /* 1. Verify that one of the successors of first_loop->exit is the preheader
1019 /* The preheader of new_loop is expected to have two predecessors:
1020 first_loop->exit and the block that precedes first_loop. */
1022 gcc_assert (EDGE_COUNT (loop2_entry_bb
->preds
) == 2
1023 && ((EDGE_PRED (loop2_entry_bb
, 0)->src
== loop1_exit_bb
1024 && EDGE_PRED (loop2_entry_bb
, 1)->src
== loop1_entry_bb
)
1025 || (EDGE_PRED (loop2_entry_bb
, 1)->src
== loop1_exit_bb
1026 && EDGE_PRED (loop2_entry_bb
, 0)->src
== loop1_entry_bb
)));
1028 /* Verify that the other successor of first_loop->exit is after the
1034 /* If the run time cost model check determines that vectorization is
1035 not profitable and hence scalar loop should be generated then set
1036 FIRST_NITERS to prologue peeled iterations. This will allow all the
1037 iterations to be executed in the prologue peeled scalar loop. */
1040 set_prologue_iterations (basic_block bb_before_first_loop
,
1046 basic_block cond_bb
, then_bb
;
1047 tree var
, prologue_after_cost_adjust_name
;
1048 gimple_stmt_iterator gsi
;
1050 edge e_true
, e_false
, e_fallthru
;
1052 gimple_seq gimplify_stmt_list
= NULL
, stmts
= NULL
;
1053 tree cost_pre_condition
= NULL_TREE
;
1054 tree scalar_loop_iters
=
1055 unshare_expr (LOOP_VINFO_NITERS_UNCHANGED (loop_vec_info_for_loop (loop
)));
1057 e
= single_pred_edge (bb_before_first_loop
);
1058 cond_bb
= split_edge(e
);
1060 e
= single_pred_edge (bb_before_first_loop
);
1061 then_bb
= split_edge(e
);
1062 set_immediate_dominator (CDI_DOMINATORS
, then_bb
, cond_bb
);
1064 e_false
= make_single_succ_edge (cond_bb
, bb_before_first_loop
,
1066 set_immediate_dominator (CDI_DOMINATORS
, bb_before_first_loop
, cond_bb
);
1068 e_true
= EDGE_PRED (then_bb
, 0);
1069 e_true
->flags
&= ~EDGE_FALLTHRU
;
1070 e_true
->flags
|= EDGE_TRUE_VALUE
;
1072 e_fallthru
= EDGE_SUCC (then_bb
, 0);
1074 cost_pre_condition
=
1075 fold_build2 (LE_EXPR
, boolean_type_node
, scalar_loop_iters
,
1076 build_int_cst (TREE_TYPE (scalar_loop_iters
), th
));
1077 cost_pre_condition
=
1078 force_gimple_operand (cost_pre_condition
, &gimplify_stmt_list
,
1080 cond_stmt
= gimple_build_cond (NE_EXPR
, cost_pre_condition
,
1081 build_int_cst (TREE_TYPE (cost_pre_condition
),
1082 0), NULL_TREE
, NULL_TREE
);
1084 gsi
= gsi_last_bb (cond_bb
);
1085 if (gimplify_stmt_list
)
1086 gsi_insert_seq_after (&gsi
, gimplify_stmt_list
, GSI_NEW_STMT
);
1088 gsi
= gsi_last_bb (cond_bb
);
1089 gsi_insert_after (&gsi
, cond_stmt
, GSI_NEW_STMT
);
1091 var
= create_tmp_var (TREE_TYPE (scalar_loop_iters
),
1092 "prologue_after_cost_adjust");
1093 add_referenced_var (var
);
1094 prologue_after_cost_adjust_name
=
1095 force_gimple_operand (scalar_loop_iters
, &stmts
, false, var
);
1097 gsi
= gsi_last_bb (then_bb
);
1099 gsi_insert_seq_after (&gsi
, stmts
, GSI_NEW_STMT
);
1101 newphi
= create_phi_node (var
, bb_before_first_loop
);
1102 add_phi_arg (newphi
, prologue_after_cost_adjust_name
, e_fallthru
,
1104 add_phi_arg (newphi
, first_niters
, e_false
, UNKNOWN_LOCATION
);
1106 first_niters
= PHI_RESULT (newphi
);
1110 /* Function slpeel_tree_peel_loop_to_edge.
1112 Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
1113 that is placed on the entry (exit) edge E of LOOP. After this transformation
1114 we have two loops one after the other - first-loop iterates FIRST_NITERS
1115 times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
1116 If the cost model indicates that it is profitable to emit a scalar
1117 loop instead of the vector one, then the prolog (epilog) loop will iterate
1118 for the entire unchanged scalar iterations of the loop.
1121 - LOOP: the loop to be peeled.
1122 - E: the exit or entry edge of LOOP.
1123 If it is the entry edge, we peel the first iterations of LOOP. In this
1124 case first-loop is LOOP, and second-loop is the newly created loop.
1125 If it is the exit edge, we peel the last iterations of LOOP. In this
1126 case, first-loop is the newly created loop, and second-loop is LOOP.
1127 - NITERS: the number of iterations that LOOP iterates.
1128 - FIRST_NITERS: the number of iterations that the first-loop should iterate.
1129 - UPDATE_FIRST_LOOP_COUNT: specified whether this function is responsible
1130 for updating the loop bound of the first-loop to FIRST_NITERS. If it
1131 is false, the caller of this function may want to take care of this
1132 (this can be useful if we don't want new stmts added to first-loop).
1133 - TH: cost model profitability threshold of iterations for vectorization.
1134 - CHECK_PROFITABILITY: specify whether cost model check has not occurred
1135 during versioning and hence needs to occur during
1136 prologue generation or whether cost model check
1137 has not occurred during prologue generation and hence
1138 needs to occur during epilogue generation.
1142 The function returns a pointer to the new loop-copy, or NULL if it failed
1143 to perform the transformation.
1145 The function generates two if-then-else guards: one before the first loop,
1146 and the other before the second loop:
1148 if (FIRST_NITERS == 0) then skip the first loop,
1149 and go directly to the second loop.
1150 The second guard is:
1151 if (FIRST_NITERS == NITERS) then skip the second loop.
1153 If the optional COND_EXPR and COND_EXPR_STMT_LIST arguments are given
1154 then the generated condition is combined with COND_EXPR and the
1155 statements in COND_EXPR_STMT_LIST are emitted together with it.
1157 FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1158 FORNOW the resulting code will not be in loop-closed-ssa form.
1162 slpeel_tree_peel_loop_to_edge (struct loop
*loop
,
1163 edge e
, tree first_niters
,
1164 tree niters
, bool update_first_loop_count
,
1165 unsigned int th
, bool check_profitability
,
1166 tree cond_expr
, gimple_seq cond_expr_stmt_list
)
1168 struct loop
*new_loop
= NULL
, *first_loop
, *second_loop
;
1170 tree pre_condition
= NULL_TREE
;
1172 basic_block bb_before_second_loop
, bb_after_second_loop
;
1173 basic_block bb_before_first_loop
;
1174 basic_block bb_between_loops
;
1175 basic_block new_exit_bb
;
1176 edge exit_e
= single_exit (loop
);
1178 tree cost_pre_condition
= NULL_TREE
;
1180 if (!slpeel_can_duplicate_loop_p (loop
, e
))
1183 /* We have to initialize cfg_hooks. Then, when calling
1184 cfg_hooks->split_edge, the function tree_split_edge
1185 is actually called and, when calling cfg_hooks->duplicate_block,
1186 the function tree_duplicate_bb is called. */
1187 gimple_register_cfg_hooks ();
1190 /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1191 Resulting CFG would be:
1204 if (!(new_loop
= slpeel_tree_duplicate_loop_to_edge_cfg (loop
, e
)))
1206 loop_loc
= find_loop_location (loop
);
1207 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1209 if (loop_loc
!= UNKNOWN_LOC
)
1210 fprintf (dump_file
, "\n%s:%d: note: ",
1211 LOC_FILE (loop_loc
), LOC_LINE (loop_loc
));
1212 fprintf (dump_file
, "tree_duplicate_loop_to_edge_cfg failed.\n");
1217 if (MAY_HAVE_DEBUG_STMTS
)
1219 gcc_assert (!adjust_vec
);
1220 adjust_vec
= VEC_alloc (adjust_info
, stack
, 32);
1225 /* NEW_LOOP was placed after LOOP. */
1227 second_loop
= new_loop
;
1231 /* NEW_LOOP was placed before LOOP. */
1232 first_loop
= new_loop
;
1236 definitions
= ssa_names_to_replace ();
1237 slpeel_update_phis_for_duplicate_loop (loop
, new_loop
, e
== exit_e
);
1238 rename_variables_in_loop (new_loop
);
1241 /* 2. Add the guard code in one of the following ways:
1243 2.a Add the guard that controls whether the first loop is executed.
1244 This occurs when this function is invoked for prologue or epilogue
1245 generation and when the cost model check can be done at compile time.
1247 Resulting CFG would be:
1249 bb_before_first_loop:
1250 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1257 bb_before_second_loop:
1265 2.b Add the cost model check that allows the prologue
1266 to iterate for the entire unchanged scalar
1267 iterations of the loop in the event that the cost
1268 model indicates that the scalar loop is more
1269 profitable than the vector one. This occurs when
1270 this function is invoked for prologue generation
1271 and the cost model check needs to be done at run
1274 Resulting CFG after prologue peeling would be:
1276 if (scalar_loop_iterations <= th)
1277 FIRST_NITERS = scalar_loop_iterations
1279 bb_before_first_loop:
1280 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1287 bb_before_second_loop:
1295 2.c Add the cost model check that allows the epilogue
1296 to iterate for the entire unchanged scalar
1297 iterations of the loop in the event that the cost
1298 model indicates that the scalar loop is more
1299 profitable than the vector one. This occurs when
1300 this function is invoked for epilogue generation
1301 and the cost model check needs to be done at run
1302 time. This check is combined with any pre-existing
1303 check in COND_EXPR to avoid versioning.
1305 Resulting CFG after prologue peeling would be:
1307 bb_before_first_loop:
1308 if ((scalar_loop_iterations <= th)
1310 FIRST_NITERS == 0) GOTO bb_before_second_loop
1317 bb_before_second_loop:
1326 bb_before_first_loop
= split_edge (loop_preheader_edge (first_loop
));
1327 bb_before_second_loop
= split_edge (single_exit (first_loop
));
1329 /* Epilogue peeling. */
1330 if (!update_first_loop_count
)
1333 fold_build2 (LE_EXPR
, boolean_type_node
, first_niters
,
1334 build_int_cst (TREE_TYPE (first_niters
), 0));
1335 if (check_profitability
)
1337 tree scalar_loop_iters
1338 = unshare_expr (LOOP_VINFO_NITERS_UNCHANGED
1339 (loop_vec_info_for_loop (loop
)));
1340 cost_pre_condition
=
1341 fold_build2 (LE_EXPR
, boolean_type_node
, scalar_loop_iters
,
1342 build_int_cst (TREE_TYPE (scalar_loop_iters
), th
));
1344 pre_condition
= fold_build2 (TRUTH_OR_EXPR
, boolean_type_node
,
1345 cost_pre_condition
, pre_condition
);
1350 fold_build2 (TRUTH_OR_EXPR
, boolean_type_node
,
1352 fold_build1 (TRUTH_NOT_EXPR
, boolean_type_node
,
1357 /* Prologue peeling. */
1360 if (check_profitability
)
1361 set_prologue_iterations (bb_before_first_loop
, first_niters
,
1365 fold_build2 (LE_EXPR
, boolean_type_node
, first_niters
,
1366 build_int_cst (TREE_TYPE (first_niters
), 0));
1369 skip_e
= slpeel_add_loop_guard (bb_before_first_loop
, pre_condition
,
1370 cond_expr_stmt_list
,
1371 bb_before_second_loop
, bb_before_first_loop
);
1372 slpeel_update_phi_nodes_for_guard1 (skip_e
, first_loop
,
1373 first_loop
== new_loop
,
1374 &new_exit_bb
, &definitions
);
1377 /* 3. Add the guard that controls whether the second loop is executed.
1378 Resulting CFG would be:
1380 bb_before_first_loop:
1381 if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1389 if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1390 GOTO bb_before_second_loop
1392 bb_before_second_loop:
1398 bb_after_second_loop:
1403 bb_between_loops
= new_exit_bb
;
1404 bb_after_second_loop
= split_edge (single_exit (second_loop
));
1407 fold_build2 (EQ_EXPR
, boolean_type_node
, first_niters
, niters
);
1408 skip_e
= slpeel_add_loop_guard (bb_between_loops
, pre_condition
, NULL
,
1409 bb_after_second_loop
, bb_before_first_loop
);
1410 slpeel_update_phi_nodes_for_guard2 (skip_e
, second_loop
,
1411 second_loop
== new_loop
, &new_exit_bb
);
1413 /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1415 if (update_first_loop_count
)
1416 slpeel_make_loop_iterate_ntimes (first_loop
, first_niters
);
1418 adjust_vec_debug_stmts ();
1420 BITMAP_FREE (definitions
);
1421 delete_update_ssa ();
1426 /* Function vect_get_loop_location.
1428 Extract the location of the loop in the source code.
1429 If the loop is not well formed for vectorization, an estimated
1430 location is calculated.
1431 Return the loop location if succeed and NULL if not. */
1434 find_loop_location (struct loop
*loop
)
1438 gimple_stmt_iterator si
;
1443 stmt
= get_loop_exit_condition (loop
);
1445 if (stmt
&& gimple_location (stmt
) != UNKNOWN_LOC
)
1446 return gimple_location (stmt
);
1448 /* If we got here the loop is probably not "well formed",
1449 try to estimate the loop location */
1456 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
1458 stmt
= gsi_stmt (si
);
1459 if (gimple_location (stmt
) != UNKNOWN_LOC
)
1460 return gimple_location (stmt
);
1467 /* This function builds ni_name = number of iterations loop executes
1468 on the loop preheader. If SEQ is given the stmt is instead emitted
1472 vect_build_loop_niters (loop_vec_info loop_vinfo
, gimple_seq seq
)
1475 gimple_seq stmts
= NULL
;
1477 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1478 tree ni
= unshare_expr (LOOP_VINFO_NITERS (loop_vinfo
));
1480 var
= create_tmp_var (TREE_TYPE (ni
), "niters");
1481 add_referenced_var (var
);
1482 ni_name
= force_gimple_operand (ni
, &stmts
, false, var
);
1484 pe
= loop_preheader_edge (loop
);
1488 gimple_seq_add_seq (&seq
, stmts
);
1491 basic_block new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
1492 gcc_assert (!new_bb
);
1500 /* This function generates the following statements:
1502 ni_name = number of iterations loop executes
1503 ratio = ni_name / vf
1504 ratio_mult_vf_name = ratio * vf
1506 and places them at the loop preheader edge or in COND_EXPR_STMT_LIST
1507 if that is non-NULL. */
1510 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo
,
1512 tree
*ratio_mult_vf_name_ptr
,
1513 tree
*ratio_name_ptr
,
1514 gimple_seq cond_expr_stmt_list
)
1523 tree ratio_mult_vf_name
;
1524 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1525 tree ni
= LOOP_VINFO_NITERS (loop_vinfo
);
1526 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1529 pe
= loop_preheader_edge (loop
);
1531 /* Generate temporary variable that contains
1532 number of iterations loop executes. */
1534 ni_name
= vect_build_loop_niters (loop_vinfo
, cond_expr_stmt_list
);
1535 log_vf
= build_int_cst (TREE_TYPE (ni
), exact_log2 (vf
));
1537 /* Create: ratio = ni >> log2(vf) */
1539 ratio_name
= fold_build2 (RSHIFT_EXPR
, TREE_TYPE (ni_name
), ni_name
, log_vf
);
1540 if (!is_gimple_val (ratio_name
))
1542 var
= create_tmp_var (TREE_TYPE (ni
), "bnd");
1543 add_referenced_var (var
);
1546 ratio_name
= force_gimple_operand (ratio_name
, &stmts
, true, var
);
1547 if (cond_expr_stmt_list
)
1548 gimple_seq_add_seq (&cond_expr_stmt_list
, stmts
);
1551 pe
= loop_preheader_edge (loop
);
1552 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
1553 gcc_assert (!new_bb
);
1557 /* Create: ratio_mult_vf = ratio << log2 (vf). */
1559 ratio_mult_vf_name
= fold_build2 (LSHIFT_EXPR
, TREE_TYPE (ratio_name
),
1560 ratio_name
, log_vf
);
1561 if (!is_gimple_val (ratio_mult_vf_name
))
1563 var
= create_tmp_var (TREE_TYPE (ni
), "ratio_mult_vf");
1564 add_referenced_var (var
);
1567 ratio_mult_vf_name
= force_gimple_operand (ratio_mult_vf_name
, &stmts
,
1569 if (cond_expr_stmt_list
)
1570 gimple_seq_add_seq (&cond_expr_stmt_list
, stmts
);
1573 pe
= loop_preheader_edge (loop
);
1574 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
1575 gcc_assert (!new_bb
);
1579 *ni_name_ptr
= ni_name
;
1580 *ratio_mult_vf_name_ptr
= ratio_mult_vf_name
;
1581 *ratio_name_ptr
= ratio_name
;
1586 /* Function vect_can_advance_ivs_p
1588 In case the number of iterations that LOOP iterates is unknown at compile
1589 time, an epilog loop will be generated, and the loop induction variables
1590 (IVs) will be "advanced" to the value they are supposed to take just before
1591 the epilog loop. Here we check that the access function of the loop IVs
1592 and the expression that represents the loop bound are simple enough.
1593 These restrictions will be relaxed in the future. */
1596 vect_can_advance_ivs_p (loop_vec_info loop_vinfo
)
1598 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1599 basic_block bb
= loop
->header
;
1601 gimple_stmt_iterator gsi
;
1603 /* Analyze phi functions of the loop header. */
1605 if (vect_print_dump_info (REPORT_DETAILS
))
1606 fprintf (vect_dump
, "vect_can_advance_ivs_p:");
1608 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1610 tree access_fn
= NULL
;
1611 tree evolution_part
;
1613 phi
= gsi_stmt (gsi
);
1614 if (vect_print_dump_info (REPORT_DETAILS
))
1616 fprintf (vect_dump
, "Analyze phi: ");
1617 print_gimple_stmt (vect_dump
, phi
, 0, TDF_SLIM
);
1620 /* Skip virtual phi's. The data dependences that are associated with
1621 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
1623 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi
))))
1625 if (vect_print_dump_info (REPORT_DETAILS
))
1626 fprintf (vect_dump
, "virtual phi. skip.");
1630 /* Skip reduction phis. */
1632 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi
)) == vect_reduction_def
)
1634 if (vect_print_dump_info (REPORT_DETAILS
))
1635 fprintf (vect_dump
, "reduc phi. skip.");
1639 /* Analyze the evolution function. */
1641 access_fn
= instantiate_parameters
1642 (loop
, analyze_scalar_evolution (loop
, PHI_RESULT (phi
)));
1646 if (vect_print_dump_info (REPORT_DETAILS
))
1647 fprintf (vect_dump
, "No Access function.");
1651 if (vect_print_dump_info (REPORT_DETAILS
))
1653 fprintf (vect_dump
, "Access function of PHI: ");
1654 print_generic_expr (vect_dump
, access_fn
, TDF_SLIM
);
1657 evolution_part
= evolution_part_in_loop_num (access_fn
, loop
->num
);
1659 if (evolution_part
== NULL_TREE
)
1661 if (vect_print_dump_info (REPORT_DETAILS
))
1662 fprintf (vect_dump
, "No evolution.");
1666 /* FORNOW: We do not transform initial conditions of IVs
1667 which evolution functions are a polynomial of degree >= 2. */
1669 if (tree_is_chrec (evolution_part
))
1677 /* Function vect_update_ivs_after_vectorizer.
1679 "Advance" the induction variables of LOOP to the value they should take
1680 after the execution of LOOP. This is currently necessary because the
1681 vectorizer does not handle induction variables that are used after the
1682 loop. Such a situation occurs when the last iterations of LOOP are
1684 1. We introduced new uses after LOOP for IVs that were not originally used
1685 after LOOP: the IVs of LOOP are now used by an epilog loop.
1686 2. LOOP is going to be vectorized; this means that it will iterate N/VF
1687 times, whereas the loop IVs should be bumped N times.
1690 - LOOP - a loop that is going to be vectorized. The last few iterations
1691 of LOOP were peeled.
1692 - NITERS - the number of iterations that LOOP executes (before it is
1693 vectorized). i.e, the number of times the ivs should be bumped.
1694 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
1695 coming out from LOOP on which there are uses of the LOOP ivs
1696 (this is the path from LOOP->exit to epilog_loop->preheader).
1698 The new definitions of the ivs are placed in LOOP->exit.
1699 The phi args associated with the edge UPDATE_E in the bb
1700 UPDATE_E->dest are updated accordingly.
1702 Assumption 1: Like the rest of the vectorizer, this function assumes
1703 a single loop exit that has a single predecessor.
1705 Assumption 2: The phi nodes in the LOOP header and in update_bb are
1706 organized in the same order.
1708 Assumption 3: The access function of the ivs is simple enough (see
1709 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
1711 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
1712 coming out of LOOP on which the ivs of LOOP are used (this is the path
1713 that leads to the epilog loop; other paths skip the epilog loop). This
1714 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
1715 needs to have its phis updated.
1719 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo
, tree niters
,
1722 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1723 basic_block exit_bb
= single_exit (loop
)->dest
;
1725 gimple_stmt_iterator gsi
, gsi1
;
1726 basic_block update_bb
= update_e
->dest
;
1728 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
1730 /* Make sure there exists a single-predecessor exit bb: */
1731 gcc_assert (single_pred_p (exit_bb
));
1733 for (gsi
= gsi_start_phis (loop
->header
), gsi1
= gsi_start_phis (update_bb
);
1734 !gsi_end_p (gsi
) && !gsi_end_p (gsi1
);
1735 gsi_next (&gsi
), gsi_next (&gsi1
))
1737 tree access_fn
= NULL
;
1738 tree evolution_part
;
1740 tree step_expr
, off
;
1742 tree var
, ni
, ni_name
;
1743 gimple_stmt_iterator last_gsi
;
1745 phi
= gsi_stmt (gsi
);
1746 phi1
= gsi_stmt (gsi1
);
1747 if (vect_print_dump_info (REPORT_DETAILS
))
1749 fprintf (vect_dump
, "vect_update_ivs_after_vectorizer: phi: ");
1750 print_gimple_stmt (vect_dump
, phi
, 0, TDF_SLIM
);
1753 /* Skip virtual phi's. */
1754 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi
))))
1756 if (vect_print_dump_info (REPORT_DETAILS
))
1757 fprintf (vect_dump
, "virtual phi. skip.");
1761 /* Skip reduction phis. */
1762 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi
)) == vect_reduction_def
)
1764 if (vect_print_dump_info (REPORT_DETAILS
))
1765 fprintf (vect_dump
, "reduc phi. skip.");
1769 access_fn
= analyze_scalar_evolution (loop
, PHI_RESULT (phi
));
1770 gcc_assert (access_fn
);
1771 /* We can end up with an access_fn like
1772 (short int) {(short unsigned int) i_49, +, 1}_1
1773 for further analysis we need to strip the outer cast but we
1774 need to preserve the original type. */
1775 type
= TREE_TYPE (access_fn
);
1776 STRIP_NOPS (access_fn
);
1778 unshare_expr (evolution_part_in_loop_num (access_fn
, loop
->num
));
1779 gcc_assert (evolution_part
!= NULL_TREE
);
1781 /* FORNOW: We do not support IVs whose evolution function is a polynomial
1782 of degree >= 2 or exponential. */
1783 gcc_assert (!tree_is_chrec (evolution_part
));
1785 step_expr
= evolution_part
;
1786 init_expr
= unshare_expr (initial_condition_in_loop_num (access_fn
,
1788 init_expr
= fold_convert (type
, init_expr
);
1790 off
= fold_build2 (MULT_EXPR
, TREE_TYPE (step_expr
),
1791 fold_convert (TREE_TYPE (step_expr
), niters
),
1793 if (POINTER_TYPE_P (TREE_TYPE (init_expr
)))
1794 ni
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (init_expr
),
1796 fold_convert (sizetype
, off
));
1798 ni
= fold_build2 (PLUS_EXPR
, TREE_TYPE (init_expr
),
1800 fold_convert (TREE_TYPE (init_expr
), off
));
1802 var
= create_tmp_var (TREE_TYPE (init_expr
), "tmp");
1803 add_referenced_var (var
);
1805 last_gsi
= gsi_last_bb (exit_bb
);
1806 ni_name
= force_gimple_operand_gsi (&last_gsi
, ni
, false, var
,
1807 true, GSI_SAME_STMT
);
1809 /* Fix phi expressions in the successor bb. */
1810 adjust_phi_and_debug_stmts (phi1
, update_e
, ni_name
);
1814 /* Return the more conservative threshold between the
1815 min_profitable_iters returned by the cost model and the user
1816 specified threshold, if provided. */
1819 conservative_cost_threshold (loop_vec_info loop_vinfo
,
1820 int min_profitable_iters
)
1823 int min_scalar_loop_bound
;
1825 min_scalar_loop_bound
= ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND
)
1826 * LOOP_VINFO_VECT_FACTOR (loop_vinfo
)) - 1);
1828 /* Use the cost model only if it is more conservative than user specified
1830 th
= (unsigned) min_scalar_loop_bound
;
1831 if (min_profitable_iters
1832 && (!min_scalar_loop_bound
1833 || min_profitable_iters
> min_scalar_loop_bound
))
1834 th
= (unsigned) min_profitable_iters
;
1836 if (th
&& vect_print_dump_info (REPORT_COST
))
1837 fprintf (vect_dump
, "Profitability threshold is %u loop iterations.", th
);
1842 /* Function vect_do_peeling_for_loop_bound
1844 Peel the last iterations of the loop represented by LOOP_VINFO.
1845 The peeled iterations form a new epilog loop. Given that the loop now
1846 iterates NITERS times, the new epilog loop iterates
1847 NITERS % VECTORIZATION_FACTOR times.
1849 The original loop will later be made to iterate
1850 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).
1852 COND_EXPR and COND_EXPR_STMT_LIST are combined with a new generated
1856 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo
, tree
*ratio
,
1857 tree cond_expr
, gimple_seq cond_expr_stmt_list
)
1859 tree ni_name
, ratio_mult_vf_name
;
1860 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1861 struct loop
*new_loop
;
1863 basic_block preheader
;
1865 bool check_profitability
= false;
1866 unsigned int th
= 0;
1867 int min_profitable_iters
;
1869 if (vect_print_dump_info (REPORT_DETAILS
))
1870 fprintf (vect_dump
, "=== vect_do_peeling_for_loop_bound ===");
1872 initialize_original_copy_tables ();
1874 /* Generate the following variables on the preheader of original loop:
1876 ni_name = number of iteration the original loop executes
1877 ratio = ni_name / vf
1878 ratio_mult_vf_name = ratio * vf */
1879 vect_generate_tmps_on_preheader (loop_vinfo
, &ni_name
,
1880 &ratio_mult_vf_name
, ratio
,
1881 cond_expr_stmt_list
);
1883 loop_num
= loop
->num
;
1885 /* If cost model check not done during versioning and
1886 peeling for alignment. */
1887 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo
)
1888 && !LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo
)
1889 && !LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
)
1892 check_profitability
= true;
1894 /* Get profitability threshold for vectorized loop. */
1895 min_profitable_iters
= LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo
);
1897 th
= conservative_cost_threshold (loop_vinfo
,
1898 min_profitable_iters
);
1901 new_loop
= slpeel_tree_peel_loop_to_edge (loop
, single_exit (loop
),
1902 ratio_mult_vf_name
, ni_name
, false,
1903 th
, check_profitability
,
1904 cond_expr
, cond_expr_stmt_list
);
1905 gcc_assert (new_loop
);
1906 gcc_assert (loop_num
== loop
->num
);
1907 #ifdef ENABLE_CHECKING
1908 slpeel_verify_cfg_after_peeling (loop
, new_loop
);
1911 /* A guard that controls whether the new_loop is to be executed or skipped
1912 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
1913 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
1914 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
1915 is on the path where the LOOP IVs are used and need to be updated. */
1917 preheader
= loop_preheader_edge (new_loop
)->src
;
1918 if (EDGE_PRED (preheader
, 0)->src
== single_exit (loop
)->dest
)
1919 update_e
= EDGE_PRED (preheader
, 0);
1921 update_e
= EDGE_PRED (preheader
, 1);
1923 /* Update IVs of original loop as if they were advanced
1924 by ratio_mult_vf_name steps. */
1925 vect_update_ivs_after_vectorizer (loop_vinfo
, ratio_mult_vf_name
, update_e
);
1927 /* After peeling we have to reset scalar evolution analyzer. */
1930 free_original_copy_tables ();
1934 /* Function vect_gen_niters_for_prolog_loop
1936 Set the number of iterations for the loop represented by LOOP_VINFO
1937 to the minimum between LOOP_NITERS (the original iteration count of the loop)
1938 and the misalignment of DR - the data reference recorded in
1939 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
1940 this loop, the data reference DR will refer to an aligned location.
1942 The following computation is generated:
1944 If the misalignment of DR is known at compile time:
1945 addr_mis = int mis = DR_MISALIGNMENT (dr);
1946 Else, compute address misalignment in bytes:
1947 addr_mis = addr & (vectype_size - 1)
1949 prolog_niters = min (LOOP_NITERS, ((VF - addr_mis/elem_size)&(VF-1))/step)
1951 (elem_size = element type size; an element is the scalar element whose type
1952 is the inner type of the vectype)
1954 When the step of the data-ref in the loop is not 1 (as in interleaved data
1955 and SLP), the number of iterations of the prolog must be divided by the step
1956 (which is equal to the size of interleaved group).
1958 The above formulas assume that VF == number of elements in the vector. This
1959 may not hold when there are multiple-types in the loop.
1960 In this case, for some data-references in the loop the VF does not represent
1961 the number of elements that fit in the vector. Therefore, instead of VF we
1962 use TYPE_VECTOR_SUBPARTS. */
1965 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo
, tree loop_niters
,
1966 tree
*wide_prolog_niters
)
1968 struct data_reference
*dr
= LOOP_VINFO_UNALIGNED_DR (loop_vinfo
);
1969 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1972 tree iters
, iters_name
;
1975 gimple dr_stmt
= DR_STMT (dr
);
1976 stmt_vec_info stmt_info
= vinfo_for_stmt (dr_stmt
);
1977 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1978 int vectype_align
= TYPE_ALIGN (vectype
) / BITS_PER_UNIT
;
1979 tree niters_type
= TREE_TYPE (loop_niters
);
1980 int nelements
= TYPE_VECTOR_SUBPARTS (vectype
);
1982 pe
= loop_preheader_edge (loop
);
1984 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
) > 0)
1986 int npeel
= LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
);
1988 if (vect_print_dump_info (REPORT_DETAILS
))
1989 fprintf (vect_dump
, "known peeling = %d.", npeel
);
1991 iters
= build_int_cst (niters_type
, npeel
);
1995 gimple_seq new_stmts
= NULL
;
1996 tree start_addr
= vect_create_addr_base_for_vector_ref (dr_stmt
,
1997 &new_stmts
, NULL_TREE
, loop
);
1998 tree ptr_type
= TREE_TYPE (start_addr
);
1999 tree size
= TYPE_SIZE (ptr_type
);
2000 tree type
= lang_hooks
.types
.type_for_size (tree_low_cst (size
, 1), 1);
2001 tree vectype_size_minus_1
= build_int_cst (type
, vectype_align
- 1);
2002 tree elem_size_log
=
2003 build_int_cst (type
, exact_log2 (vectype_align
/nelements
));
2004 tree nelements_minus_1
= build_int_cst (type
, nelements
- 1);
2005 tree nelements_tree
= build_int_cst (type
, nelements
);
2009 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, new_stmts
);
2010 gcc_assert (!new_bb
);
2012 /* Create: byte_misalign = addr & (vectype_size - 1) */
2014 fold_build2 (BIT_AND_EXPR
, type
, fold_convert (type
, start_addr
),
2015 vectype_size_minus_1
);
2017 /* Create: elem_misalign = byte_misalign / element_size */
2019 fold_build2 (RSHIFT_EXPR
, type
, byte_misalign
, elem_size_log
);
2021 /* Create: (niters_type) (nelements - elem_misalign)&(nelements - 1) */
2022 iters
= fold_build2 (MINUS_EXPR
, type
, nelements_tree
, elem_misalign
);
2023 iters
= fold_build2 (BIT_AND_EXPR
, type
, iters
, nelements_minus_1
);
2024 iters
= fold_convert (niters_type
, iters
);
2027 /* Create: prolog_loop_niters = min (iters, loop_niters) */
2028 /* If the loop bound is known at compile time we already verified that it is
2029 greater than vf; since the misalignment ('iters') is at most vf, there's
2030 no need to generate the MIN_EXPR in this case. */
2031 if (TREE_CODE (loop_niters
) != INTEGER_CST
)
2032 iters
= fold_build2 (MIN_EXPR
, niters_type
, iters
, loop_niters
);
2034 if (vect_print_dump_info (REPORT_DETAILS
))
2036 fprintf (vect_dump
, "niters for prolog loop: ");
2037 print_generic_expr (vect_dump
, iters
, TDF_SLIM
);
2040 var
= create_tmp_var (niters_type
, "prolog_loop_niters");
2041 add_referenced_var (var
);
2043 iters_name
= force_gimple_operand (iters
, &stmts
, false, var
);
2044 if (types_compatible_p (sizetype
, niters_type
))
2045 *wide_prolog_niters
= iters_name
;
2048 gimple_seq seq
= NULL
;
2049 tree wide_iters
= fold_convert (sizetype
, iters
);
2050 var
= create_tmp_var (sizetype
, "prolog_loop_niters");
2051 add_referenced_var (var
);
2052 *wide_prolog_niters
= force_gimple_operand (wide_iters
, &seq
, false,
2055 gimple_seq_add_seq (&stmts
, seq
);
2058 /* Insert stmt on loop preheader edge. */
2061 basic_block new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
2062 gcc_assert (!new_bb
);
2069 /* Function vect_update_init_of_dr
2071 NITERS iterations were peeled from LOOP. DR represents a data reference
2072 in LOOP. This function updates the information recorded in DR to
2073 account for the fact that the first NITERS iterations had already been
2074 executed. Specifically, it updates the OFFSET field of DR. */
2077 vect_update_init_of_dr (struct data_reference
*dr
, tree niters
)
2079 tree offset
= DR_OFFSET (dr
);
2081 niters
= fold_build2 (MULT_EXPR
, sizetype
,
2082 fold_convert (sizetype
, niters
),
2083 fold_convert (sizetype
, DR_STEP (dr
)));
2084 offset
= fold_build2 (PLUS_EXPR
, sizetype
,
2085 fold_convert (sizetype
, offset
), niters
);
2086 DR_OFFSET (dr
) = offset
;
2090 /* Function vect_update_inits_of_drs
2092 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
2093 This function updates the information recorded for the data references in
2094 the loop to account for the fact that the first NITERS iterations had
2095 already been executed. Specifically, it updates the initial_condition of
2096 the access_function of all the data_references in the loop. */
2099 vect_update_inits_of_drs (loop_vec_info loop_vinfo
, tree niters
)
2102 VEC (data_reference_p
, heap
) *datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
2103 struct data_reference
*dr
;
2105 if (vect_print_dump_info (REPORT_DETAILS
))
2106 fprintf (vect_dump
, "=== vect_update_inits_of_dr ===");
2108 FOR_EACH_VEC_ELT (data_reference_p
, datarefs
, i
, dr
)
2109 vect_update_init_of_dr (dr
, niters
);
2113 /* Function vect_do_peeling_for_alignment
2115 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
2116 'niters' is set to the misalignment of one of the data references in the
2117 loop, thereby forcing it to refer to an aligned location at the beginning
2118 of the execution of this loop. The data reference for which we are
2119 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
2122 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo
)
2124 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2125 tree niters_of_prolog_loop
, ni_name
;
2127 tree wide_prolog_niters
;
2128 struct loop
*new_loop
;
2129 unsigned int th
= 0;
2130 int min_profitable_iters
;
2132 if (vect_print_dump_info (REPORT_DETAILS
))
2133 fprintf (vect_dump
, "=== vect_do_peeling_for_alignment ===");
2135 initialize_original_copy_tables ();
2137 ni_name
= vect_build_loop_niters (loop_vinfo
, NULL
);
2138 niters_of_prolog_loop
= vect_gen_niters_for_prolog_loop (loop_vinfo
, ni_name
,
2139 &wide_prolog_niters
);
2142 /* Get profitability threshold for vectorized loop. */
2143 min_profitable_iters
= LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo
);
2144 th
= conservative_cost_threshold (loop_vinfo
,
2145 min_profitable_iters
);
2147 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
2149 slpeel_tree_peel_loop_to_edge (loop
, loop_preheader_edge (loop
),
2150 niters_of_prolog_loop
, ni_name
, true,
2151 th
, true, NULL_TREE
, NULL
);
2153 gcc_assert (new_loop
);
2154 #ifdef ENABLE_CHECKING
2155 slpeel_verify_cfg_after_peeling (new_loop
, loop
);
2158 /* Update number of times loop executes. */
2159 n_iters
= LOOP_VINFO_NITERS (loop_vinfo
);
2160 LOOP_VINFO_NITERS (loop_vinfo
) = fold_build2 (MINUS_EXPR
,
2161 TREE_TYPE (n_iters
), n_iters
, niters_of_prolog_loop
);
2163 /* Update the init conditions of the access functions of all data refs. */
2164 vect_update_inits_of_drs (loop_vinfo
, wide_prolog_niters
);
2166 /* After peeling we have to reset scalar evolution analyzer. */
2169 free_original_copy_tables ();
2173 /* Function vect_create_cond_for_align_checks.
2175 Create a conditional expression that represents the alignment checks for
2176 all of data references (array element references) whose alignment must be
2180 COND_EXPR - input conditional expression. New conditions will be chained
2181 with logical AND operation.
2182 LOOP_VINFO - two fields of the loop information are used.
2183 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
2184 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
2187 COND_EXPR_STMT_LIST - statements needed to construct the conditional
2189 The returned value is the conditional expression to be used in the if
2190 statement that controls which version of the loop gets executed at runtime.
2192 The algorithm makes two assumptions:
2193 1) The number of bytes "n" in a vector is a power of 2.
2194 2) An address "a" is aligned if a%n is zero and that this
2195 test can be done as a&(n-1) == 0. For example, for 16
2196 byte vectors the test is a&0xf == 0. */
2199 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo
,
2201 gimple_seq
*cond_expr_stmt_list
)
2203 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2204 VEC(gimple
,heap
) *may_misalign_stmts
2205 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
);
2207 int mask
= LOOP_VINFO_PTR_MASK (loop_vinfo
);
2211 tree int_ptrsize_type
;
2213 tree or_tmp_name
= NULL_TREE
;
2214 tree and_tmp
, and_tmp_name
;
2217 tree part_cond_expr
;
2219 /* Check that mask is one less than a power of 2, i.e., mask is
2220 all zeros followed by all ones. */
2221 gcc_assert ((mask
!= 0) && ((mask
& (mask
+1)) == 0));
2223 /* CHECKME: what is the best integer or unsigned type to use to hold a
2224 cast from a pointer value? */
2225 psize
= TYPE_SIZE (ptr_type_node
);
2227 = lang_hooks
.types
.type_for_size (tree_low_cst (psize
, 1), 0);
2229 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2230 of the first vector of the i'th data reference. */
2232 FOR_EACH_VEC_ELT (gimple
, may_misalign_stmts
, i
, ref_stmt
)
2234 gimple_seq new_stmt_list
= NULL
;
2236 tree addr_tmp
, addr_tmp_name
;
2237 tree or_tmp
, new_or_tmp_name
;
2238 gimple addr_stmt
, or_stmt
;
2240 /* create: addr_tmp = (int)(address_of_first_vector) */
2242 vect_create_addr_base_for_vector_ref (ref_stmt
, &new_stmt_list
,
2244 if (new_stmt_list
!= NULL
)
2245 gimple_seq_add_seq (cond_expr_stmt_list
, new_stmt_list
);
2247 sprintf (tmp_name
, "%s%d", "addr2int", i
);
2248 addr_tmp
= create_tmp_var (int_ptrsize_type
, tmp_name
);
2249 add_referenced_var (addr_tmp
);
2250 addr_tmp_name
= make_ssa_name (addr_tmp
, NULL
);
2251 addr_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, addr_tmp_name
,
2252 addr_base
, NULL_TREE
);
2253 SSA_NAME_DEF_STMT (addr_tmp_name
) = addr_stmt
;
2254 gimple_seq_add_stmt (cond_expr_stmt_list
, addr_stmt
);
2256 /* The addresses are OR together. */
2258 if (or_tmp_name
!= NULL_TREE
)
2260 /* create: or_tmp = or_tmp | addr_tmp */
2261 sprintf (tmp_name
, "%s%d", "orptrs", i
);
2262 or_tmp
= create_tmp_var (int_ptrsize_type
, tmp_name
);
2263 add_referenced_var (or_tmp
);
2264 new_or_tmp_name
= make_ssa_name (or_tmp
, NULL
);
2265 or_stmt
= gimple_build_assign_with_ops (BIT_IOR_EXPR
,
2267 or_tmp_name
, addr_tmp_name
);
2268 SSA_NAME_DEF_STMT (new_or_tmp_name
) = or_stmt
;
2269 gimple_seq_add_stmt (cond_expr_stmt_list
, or_stmt
);
2270 or_tmp_name
= new_or_tmp_name
;
2273 or_tmp_name
= addr_tmp_name
;
2277 mask_cst
= build_int_cst (int_ptrsize_type
, mask
);
2279 /* create: and_tmp = or_tmp & mask */
2280 and_tmp
= create_tmp_var (int_ptrsize_type
, "andmask" );
2281 add_referenced_var (and_tmp
);
2282 and_tmp_name
= make_ssa_name (and_tmp
, NULL
);
2284 and_stmt
= gimple_build_assign_with_ops (BIT_AND_EXPR
, and_tmp_name
,
2285 or_tmp_name
, mask_cst
);
2286 SSA_NAME_DEF_STMT (and_tmp_name
) = and_stmt
;
2287 gimple_seq_add_stmt (cond_expr_stmt_list
, and_stmt
);
2289 /* Make and_tmp the left operand of the conditional test against zero.
2290 if and_tmp has a nonzero bit then some address is unaligned. */
2291 ptrsize_zero
= build_int_cst (int_ptrsize_type
, 0);
2292 part_cond_expr
= fold_build2 (EQ_EXPR
, boolean_type_node
,
2293 and_tmp_name
, ptrsize_zero
);
2295 *cond_expr
= fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
,
2296 *cond_expr
, part_cond_expr
);
2298 *cond_expr
= part_cond_expr
;
2302 /* Function vect_vfa_segment_size.
2304 Create an expression that computes the size of segment
2305 that will be accessed for a data reference. The functions takes into
2306 account that realignment loads may access one more vector.
2309 DR: The data reference.
2310 VECT_FACTOR: vectorization factor.
2312 Return an expression whose value is the size of segment which will be
2316 vect_vfa_segment_size (struct data_reference
*dr
, tree vect_factor
)
2318 tree segment_length
= fold_build2 (MULT_EXPR
, integer_type_node
,
2319 DR_STEP (dr
), vect_factor
);
2321 if (vect_supportable_dr_alignment (dr
, false)
2322 == dr_explicit_realign_optimized
)
2324 tree vector_size
= TYPE_SIZE_UNIT
2325 (STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr
))));
2327 segment_length
= fold_build2 (PLUS_EXPR
, integer_type_node
,
2328 segment_length
, vector_size
);
2330 return fold_convert (sizetype
, segment_length
);
2334 /* Function vect_create_cond_for_alias_checks.
2336 Create a conditional expression that represents the run-time checks for
2337 overlapping of address ranges represented by a list of data references
2338 relations passed as input.
2341 COND_EXPR - input conditional expression. New conditions will be chained
2342 with logical AND operation.
2343 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2347 COND_EXPR - conditional expression.
2348 COND_EXPR_STMT_LIST - statements needed to construct the conditional
2352 The returned value is the conditional expression to be used in the if
2353 statement that controls which version of the loop gets executed at runtime.
2357 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo
,
2359 gimple_seq
* cond_expr_stmt_list
)
2361 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2362 VEC (ddr_p
, heap
) * may_alias_ddrs
=
2363 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
);
2365 build_int_cst (integer_type_node
, LOOP_VINFO_VECT_FACTOR (loop_vinfo
));
2369 tree part_cond_expr
;
2371 /* Create expression
2372 ((store_ptr_0 + store_segment_length_0) < load_ptr_0)
2373 || (load_ptr_0 + load_segment_length_0) < store_ptr_0))
2377 ((store_ptr_n + store_segment_length_n) < load_ptr_n)
2378 || (load_ptr_n + load_segment_length_n) < store_ptr_n)) */
2380 if (VEC_empty (ddr_p
, may_alias_ddrs
))
2383 FOR_EACH_VEC_ELT (ddr_p
, may_alias_ddrs
, i
, ddr
)
2385 struct data_reference
*dr_a
, *dr_b
;
2386 gimple dr_group_first_a
, dr_group_first_b
;
2387 tree addr_base_a
, addr_base_b
;
2388 tree segment_length_a
, segment_length_b
;
2389 gimple stmt_a
, stmt_b
;
2392 stmt_a
= DR_STMT (DDR_A (ddr
));
2393 dr_group_first_a
= DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_a
));
2394 if (dr_group_first_a
)
2396 stmt_a
= dr_group_first_a
;
2397 dr_a
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_a
));
2401 stmt_b
= DR_STMT (DDR_B (ddr
));
2402 dr_group_first_b
= DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_b
));
2403 if (dr_group_first_b
)
2405 stmt_b
= dr_group_first_b
;
2406 dr_b
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_b
));
2410 vect_create_addr_base_for_vector_ref (stmt_a
, cond_expr_stmt_list
,
2413 vect_create_addr_base_for_vector_ref (stmt_b
, cond_expr_stmt_list
,
2416 segment_length_a
= vect_vfa_segment_size (dr_a
, vect_factor
);
2417 segment_length_b
= vect_vfa_segment_size (dr_b
, vect_factor
);
2419 if (vect_print_dump_info (REPORT_DR_DETAILS
))
2422 "create runtime check for data references ");
2423 print_generic_expr (vect_dump
, DR_REF (dr_a
), TDF_SLIM
);
2424 fprintf (vect_dump
, " and ");
2425 print_generic_expr (vect_dump
, DR_REF (dr_b
), TDF_SLIM
);
2430 fold_build2 (TRUTH_OR_EXPR
, boolean_type_node
,
2431 fold_build2 (LT_EXPR
, boolean_type_node
,
2432 fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (addr_base_a
),
2436 fold_build2 (LT_EXPR
, boolean_type_node
,
2437 fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (addr_base_b
),
2443 *cond_expr
= fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
,
2444 *cond_expr
, part_cond_expr
);
2446 *cond_expr
= part_cond_expr
;
2449 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS
))
2450 fprintf (vect_dump
, "created %u versioning for alias checks.\n",
2451 VEC_length (ddr_p
, may_alias_ddrs
));
2455 /* Function vect_loop_versioning.
2457 If the loop has data references that may or may not be aligned or/and
2458 has data reference relations whose independence was not proven then
2459 two versions of the loop need to be generated, one which is vectorized
2460 and one which isn't. A test is then generated to control which of the
2461 loops is executed. The test checks for the alignment of all of the
2462 data references that may or may not be aligned. An additional
2463 sequence of runtime tests is generated for each pairs of DDRs whose
2464 independence was not proven. The vectorized version of loop is
2465 executed only if both alias and alignment tests are passed.
2467 The test generated to check which version of loop is executed
2468 is modified to also check for profitability as indicated by the
2469 cost model initially.
2471 The versioning precondition(s) are placed in *COND_EXPR and
2472 *COND_EXPR_STMT_LIST. If DO_VERSIONING is true versioning is
2473 also performed, otherwise only the conditions are generated. */
2476 vect_loop_versioning (loop_vec_info loop_vinfo
, bool do_versioning
,
2477 tree
*cond_expr
, gimple_seq
*cond_expr_stmt_list
)
2479 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2480 basic_block condition_bb
;
2481 gimple_stmt_iterator gsi
, cond_exp_gsi
;
2482 basic_block merge_bb
;
2483 basic_block new_exit_bb
;
2485 gimple orig_phi
, new_phi
;
2487 unsigned prob
= 4 * REG_BR_PROB_BASE
/ 5;
2488 gimple_seq gimplify_stmt_list
= NULL
;
2489 tree scalar_loop_iters
= LOOP_VINFO_NITERS (loop_vinfo
);
2490 int min_profitable_iters
= 0;
2493 /* Get profitability threshold for vectorized loop. */
2494 min_profitable_iters
= LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo
);
2496 th
= conservative_cost_threshold (loop_vinfo
,
2497 min_profitable_iters
);
2500 fold_build2 (GT_EXPR
, boolean_type_node
, scalar_loop_iters
,
2501 build_int_cst (TREE_TYPE (scalar_loop_iters
), th
));
2503 *cond_expr
= force_gimple_operand (*cond_expr
, cond_expr_stmt_list
,
2506 if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo
))
2507 vect_create_cond_for_align_checks (loop_vinfo
, cond_expr
,
2508 cond_expr_stmt_list
);
2510 if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo
))
2511 vect_create_cond_for_alias_checks (loop_vinfo
, cond_expr
,
2512 cond_expr_stmt_list
);
2515 fold_build2 (NE_EXPR
, boolean_type_node
, *cond_expr
, integer_zero_node
);
2517 force_gimple_operand (*cond_expr
, &gimplify_stmt_list
, true, NULL_TREE
);
2518 gimple_seq_add_seq (cond_expr_stmt_list
, gimplify_stmt_list
);
2520 /* If we only needed the extra conditions and a new loop copy
2525 initialize_original_copy_tables ();
2526 loop_version (loop
, *cond_expr
, &condition_bb
,
2527 prob
, prob
, REG_BR_PROB_BASE
- prob
, true);
2528 free_original_copy_tables();
2530 /* Loop versioning violates an assumption we try to maintain during
2531 vectorization - that the loop exit block has a single predecessor.
2532 After versioning, the exit block of both loop versions is the same
2533 basic block (i.e. it has two predecessors). Just in order to simplify
2534 following transformations in the vectorizer, we fix this situation
2535 here by adding a new (empty) block on the exit-edge of the loop,
2536 with the proper loop-exit phis to maintain loop-closed-form. */
2538 merge_bb
= single_exit (loop
)->dest
;
2539 gcc_assert (EDGE_COUNT (merge_bb
->preds
) == 2);
2540 new_exit_bb
= split_edge (single_exit (loop
));
2541 new_exit_e
= single_exit (loop
);
2542 e
= EDGE_SUCC (new_exit_bb
, 0);
2544 for (gsi
= gsi_start_phis (merge_bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2546 orig_phi
= gsi_stmt (gsi
);
2547 new_phi
= create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi
)),
2549 arg
= PHI_ARG_DEF_FROM_EDGE (orig_phi
, e
);
2550 add_phi_arg (new_phi
, arg
, new_exit_e
,
2551 gimple_phi_arg_location_from_edge (orig_phi
, e
));
2552 adjust_phi_and_debug_stmts (orig_phi
, e
, PHI_RESULT (new_phi
));
2555 /* End loop-exit-fixes after versioning. */
2557 update_ssa (TODO_update_ssa
);
2558 if (*cond_expr_stmt_list
)
2560 cond_exp_gsi
= gsi_last_bb (condition_bb
);
2561 gsi_insert_seq_before (&cond_exp_gsi
, *cond_expr_stmt_list
,
2563 *cond_expr_stmt_list
= NULL
;
2565 *cond_expr
= NULL_TREE
;