1 /* Vectorizer Specific Loop Manipulations
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
28 #include "basic-block.h"
29 #include "gimple-pretty-print.h"
30 #include "tree-ssa-alias.h"
31 #include "internal-fn.h"
32 #include "gimple-expr.h"
36 #include "gimple-iterator.h"
37 #include "gimplify-me.h"
38 #include "gimple-ssa.h"
40 #include "tree-phinodes.h"
41 #include "ssa-iterators.h"
42 #include "stringpool.h"
43 #include "tree-ssanames.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "tree-into-ssa.h"
47 #include "tree-pass.h"
49 #include "diagnostic-core.h"
50 #include "tree-scalar-evolution.h"
51 #include "tree-vectorizer.h"
52 #include "langhooks.h"
54 /*************************************************************************
55 Simple Loop Peeling Utilities
57 Utilities to support loop peeling for vectorization purposes.
58 *************************************************************************/
61 /* Renames the use *OP_P. */
64 rename_use_op (use_operand_p op_p
)
68 if (TREE_CODE (USE_FROM_PTR (op_p
)) != SSA_NAME
)
71 new_name
= get_current_def (USE_FROM_PTR (op_p
));
73 /* Something defined outside of the loop. */
77 /* An ordinary ssa name defined in the loop. */
79 SET_USE (op_p
, new_name
);
83 /* Renames the variables in basic block BB. */
86 rename_variables_in_bb (basic_block bb
)
88 gimple_stmt_iterator gsi
;
94 struct loop
*loop
= bb
->loop_father
;
96 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
98 stmt
= gsi_stmt (gsi
);
99 FOR_EACH_SSA_USE_OPERAND (use_p
, stmt
, iter
, SSA_OP_ALL_USES
)
100 rename_use_op (use_p
);
103 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
105 if (!flow_bb_inside_loop_p (loop
, e
->src
))
107 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
108 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi_stmt (gsi
), e
));
119 /* A stack of values to be adjusted in debug stmts. We have to
120 process them LIFO, so that the closest substitution applies. If we
121 processed them FIFO, without the stack, we might substitute uses
122 with a PHI DEF that would soon become non-dominant, and when we got
123 to the suitable one, it wouldn't have anything to substitute any
125 static vec
<adjust_info
, va_heap
> adjust_vec
;
127 /* Adjust any debug stmts that referenced AI->from values to use the
128 loop-closed AI->to, if the references are dominated by AI->bb and
129 not by the definition of AI->from. */
132 adjust_debug_stmts_now (adjust_info
*ai
)
134 basic_block bbphi
= ai
->bb
;
135 tree orig_def
= ai
->from
;
136 tree new_def
= ai
->to
;
137 imm_use_iterator imm_iter
;
139 basic_block bbdef
= gimple_bb (SSA_NAME_DEF_STMT (orig_def
));
141 gcc_assert (dom_info_available_p (CDI_DOMINATORS
));
143 /* Adjust any debug stmts that held onto non-loop-closed
145 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, orig_def
)
150 if (!is_gimple_debug (stmt
))
153 gcc_assert (gimple_debug_bind_p (stmt
));
155 bbuse
= gimple_bb (stmt
);
158 || dominated_by_p (CDI_DOMINATORS
, bbuse
, bbphi
))
160 || dominated_by_p (CDI_DOMINATORS
, bbuse
, bbdef
)))
163 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
164 SET_USE (use_p
, new_def
);
167 gimple_debug_bind_reset_value (stmt
);
174 /* Adjust debug stmts as scheduled before. */
177 adjust_vec_debug_stmts (void)
179 if (!MAY_HAVE_DEBUG_STMTS
)
182 gcc_assert (adjust_vec
.exists ());
184 while (!adjust_vec
.is_empty ())
186 adjust_debug_stmts_now (&adjust_vec
.last ());
190 adjust_vec
.release ();
193 /* Adjust any debug stmts that referenced FROM values to use the
194 loop-closed TO, if the references are dominated by BB and not by
195 the definition of FROM. If adjust_vec is non-NULL, adjustments
196 will be postponed until adjust_vec_debug_stmts is called. */
199 adjust_debug_stmts (tree from
, tree to
, basic_block bb
)
203 if (MAY_HAVE_DEBUG_STMTS
204 && TREE_CODE (from
) == SSA_NAME
205 && ! SSA_NAME_IS_DEFAULT_DEF (from
)
206 && ! virtual_operand_p (from
))
212 if (adjust_vec
.exists ())
213 adjust_vec
.safe_push (ai
);
215 adjust_debug_stmts_now (&ai
);
219 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
220 to adjust any debug stmts that referenced the old phi arg,
221 presumably non-loop-closed references left over from other
225 adjust_phi_and_debug_stmts (gimple update_phi
, edge e
, tree new_def
)
227 tree orig_def
= PHI_ARG_DEF_FROM_EDGE (update_phi
, e
);
229 SET_PHI_ARG_DEF (update_phi
, e
->dest_idx
, new_def
);
231 if (MAY_HAVE_DEBUG_STMTS
)
232 adjust_debug_stmts (orig_def
, PHI_RESULT (update_phi
),
233 gimple_bb (update_phi
));
237 /* Update PHI nodes for a guard of the LOOP.
240 - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
241 controls whether LOOP is to be executed. GUARD_EDGE is the edge that
242 originates from the guard-bb, skips LOOP and reaches the (unique) exit
243 bb of LOOP. This loop-exit-bb is an empty bb with one successor.
244 We denote this bb NEW_MERGE_BB because before the guard code was added
245 it had a single predecessor (the LOOP header), and now it became a merge
246 point of two paths - the path that ends with the LOOP exit-edge, and
247 the path that ends with GUARD_EDGE.
248 - NEW_EXIT_BB: New basic block that is added by this function between LOOP
249 and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
251 ===> The CFG before the guard-code was added:
254 if (exit_loop) goto update_bb
255 else goto LOOP_header_bb
258 ==> The CFG after the guard-code was added:
260 if (LOOP_guard_condition) goto new_merge_bb
261 else goto LOOP_header_bb
264 if (exit_loop_condition) goto new_merge_bb
265 else goto LOOP_header_bb
270 ==> The CFG after this function:
272 if (LOOP_guard_condition) goto new_merge_bb
273 else goto LOOP_header_bb
276 if (exit_loop_condition) goto new_exit_bb
277 else goto LOOP_header_bb
284 1. creates and updates the relevant phi nodes to account for the new
285 incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
286 1.1. Create phi nodes at NEW_MERGE_BB.
287 1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
288 UPDATE_BB). UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
289 2. preserves loop-closed-ssa-form by creating the required phi nodes
290 at the exit of LOOP (i.e, in NEW_EXIT_BB).
292 There are two flavors to this function:
294 slpeel_update_phi_nodes_for_guard1:
295 Here the guard controls whether we enter or skip LOOP, where LOOP is a
296 prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
297 for variables that have phis in the loop header.
299 slpeel_update_phi_nodes_for_guard2:
300 Here the guard controls whether we enter or skip LOOP, where LOOP is an
301 epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
302 for variables that have phis in the loop exit.
304 I.E., the overall structure is:
307 guard1 (goto loop1/merge1_bb)
310 guard2 (goto merge1_bb/merge2_bb)
317 slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
318 loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
319 that have phis in loop1->header).
321 slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
322 loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
323 that have phis in next_bb). It also adds some of these phis to
326 slpeel_update_phi_nodes_for_guard1 is always called before
327 slpeel_update_phi_nodes_for_guard2. They are both needed in order
328 to create correct data-flow and loop-closed-ssa-form.
330 Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
331 that change between iterations of a loop (and therefore have a phi-node
332 at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
333 phis for variables that are used out of the loop (and therefore have
334 loop-closed exit phis). Some variables may be both updated between
335 iterations and used after the loop. This is why in loop1_exit_bb we
336 may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
337 and exit phis (created by slpeel_update_phi_nodes_for_guard2).
339 - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
340 an original loop. i.e., we have:
343 guard_bb (goto LOOP/new_merge)
349 If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
353 guard_bb (goto LOOP/new_merge)
359 The SSA names defined in the original loop have a current
360 reaching definition that that records the corresponding new
361 ssa-name used in the new duplicated loop copy.
364 /* Function slpeel_update_phi_nodes_for_guard1
367 - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
368 - DEFS - a bitmap of ssa names to mark new names for which we recorded
371 In the context of the overall structure, we have:
374 guard1 (goto loop1/merge1_bb)
377 guard2 (goto merge1_bb/merge2_bb)
384 For each name updated between loop iterations (i.e - for each name that has
385 an entry (loop-header) phi in LOOP) we create a new phi in:
386 1. merge1_bb (to account for the edge from guard1)
387 2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
391 slpeel_update_phi_nodes_for_guard1 (edge guard_edge
, struct loop
*loop
,
392 bool is_new_loop
, basic_block
*new_exit_bb
)
394 gimple orig_phi
, new_phi
;
395 gimple update_phi
, update_phi2
;
396 tree guard_arg
, loop_arg
;
397 basic_block new_merge_bb
= guard_edge
->dest
;
398 edge e
= EDGE_SUCC (new_merge_bb
, 0);
399 basic_block update_bb
= e
->dest
;
400 basic_block orig_bb
= loop
->header
;
402 tree current_new_name
;
403 gimple_stmt_iterator gsi_orig
, gsi_update
;
405 /* Create new bb between loop and new_merge_bb. */
406 *new_exit_bb
= split_edge (single_exit (loop
));
408 new_exit_e
= EDGE_SUCC (*new_exit_bb
, 0);
410 for (gsi_orig
= gsi_start_phis (orig_bb
),
411 gsi_update
= gsi_start_phis (update_bb
);
412 !gsi_end_p (gsi_orig
) && !gsi_end_p (gsi_update
);
413 gsi_next (&gsi_orig
), gsi_next (&gsi_update
))
415 source_location loop_locus
, guard_locus
;
417 orig_phi
= gsi_stmt (gsi_orig
);
418 update_phi
= gsi_stmt (gsi_update
);
420 /** 1. Handle new-merge-point phis **/
422 /* 1.1. Generate new phi node in NEW_MERGE_BB: */
423 new_res
= copy_ssa_name (PHI_RESULT (orig_phi
), NULL
);
424 new_phi
= create_phi_node (new_res
, new_merge_bb
);
426 /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
427 of LOOP. Set the two phi args in NEW_PHI for these edges: */
428 loop_arg
= PHI_ARG_DEF_FROM_EDGE (orig_phi
, EDGE_SUCC (loop
->latch
, 0));
429 loop_locus
= gimple_phi_arg_location_from_edge (orig_phi
,
430 EDGE_SUCC (loop
->latch
,
432 guard_arg
= PHI_ARG_DEF_FROM_EDGE (orig_phi
, loop_preheader_edge (loop
));
434 = gimple_phi_arg_location_from_edge (orig_phi
,
435 loop_preheader_edge (loop
));
437 add_phi_arg (new_phi
, loop_arg
, new_exit_e
, loop_locus
);
438 add_phi_arg (new_phi
, guard_arg
, guard_edge
, guard_locus
);
440 /* 1.3. Update phi in successor block. */
441 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi
, e
) == loop_arg
442 || PHI_ARG_DEF_FROM_EDGE (update_phi
, e
) == guard_arg
);
443 adjust_phi_and_debug_stmts (update_phi
, e
, PHI_RESULT (new_phi
));
444 update_phi2
= new_phi
;
447 /** 2. Handle loop-closed-ssa-form phis **/
449 if (virtual_operand_p (PHI_RESULT (orig_phi
)))
452 /* 2.1. Generate new phi node in NEW_EXIT_BB: */
453 new_res
= copy_ssa_name (PHI_RESULT (orig_phi
), NULL
);
454 new_phi
= create_phi_node (new_res
, *new_exit_bb
);
456 /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */
457 add_phi_arg (new_phi
, loop_arg
, single_exit (loop
), loop_locus
);
459 /* 2.3. Update phi in successor of NEW_EXIT_BB: */
460 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2
, new_exit_e
) == loop_arg
);
461 adjust_phi_and_debug_stmts (update_phi2
, new_exit_e
,
462 PHI_RESULT (new_phi
));
464 /* 2.4. Record the newly created name with set_current_def.
465 We want to find a name such that
466 name = get_current_def (orig_loop_name)
467 and to set its current definition as follows:
468 set_current_def (name, new_phi_name)
470 If LOOP is a new loop then loop_arg is already the name we're
471 looking for. If LOOP is the original loop, then loop_arg is
472 the orig_loop_name and the relevant name is recorded in its
473 current reaching definition. */
475 current_new_name
= loop_arg
;
478 current_new_name
= get_current_def (loop_arg
);
479 /* current_def is not available only if the variable does not
480 change inside the loop, in which case we also don't care
481 about recording a current_def for it because we won't be
482 trying to create loop-exit-phis for it. */
483 if (!current_new_name
)
486 gcc_assert (get_current_def (current_new_name
) == NULL_TREE
);
488 set_current_def (current_new_name
, PHI_RESULT (new_phi
));
493 /* Function slpeel_update_phi_nodes_for_guard2
496 - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
498 In the context of the overall structure, we have:
501 guard1 (goto loop1/merge1_bb)
504 guard2 (goto merge1_bb/merge2_bb)
511 For each name used out side the loop (i.e - for each name that has an exit
512 phi in next_bb) we create a new phi in:
513 1. merge2_bb (to account for the edge from guard_bb)
514 2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
515 3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
516 if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
520 slpeel_update_phi_nodes_for_guard2 (edge guard_edge
, struct loop
*loop
,
521 bool is_new_loop
, basic_block
*new_exit_bb
)
523 gimple orig_phi
, new_phi
;
524 gimple update_phi
, update_phi2
;
525 tree guard_arg
, loop_arg
;
526 basic_block new_merge_bb
= guard_edge
->dest
;
527 edge e
= EDGE_SUCC (new_merge_bb
, 0);
528 basic_block update_bb
= e
->dest
;
530 tree orig_def
, orig_def_new_name
;
531 tree new_name
, new_name2
;
533 gimple_stmt_iterator gsi
;
535 /* Create new bb between loop and new_merge_bb. */
536 *new_exit_bb
= split_edge (single_exit (loop
));
538 new_exit_e
= EDGE_SUCC (*new_exit_bb
, 0);
540 for (gsi
= gsi_start_phis (update_bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
543 update_phi
= gsi_stmt (gsi
);
544 orig_phi
= update_phi
;
545 orig_def
= PHI_ARG_DEF_FROM_EDGE (orig_phi
, e
);
546 /* This loop-closed-phi actually doesn't represent a use
547 out of the loop - the phi arg is a constant. */
548 if (TREE_CODE (orig_def
) != SSA_NAME
)
550 orig_def_new_name
= get_current_def (orig_def
);
553 /** 1. Handle new-merge-point phis **/
555 /* 1.1. Generate new phi node in NEW_MERGE_BB: */
556 new_res
= copy_ssa_name (PHI_RESULT (orig_phi
), NULL
);
557 new_phi
= create_phi_node (new_res
, new_merge_bb
);
559 /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
560 of LOOP. Set the two PHI args in NEW_PHI for these edges: */
562 new_name2
= NULL_TREE
;
563 if (orig_def_new_name
)
565 new_name
= orig_def_new_name
;
566 /* Some variables have both loop-entry-phis and loop-exit-phis.
567 Such variables were given yet newer names by phis placed in
568 guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
569 new_name2 = get_current_def (get_current_def (orig_name)). */
570 new_name2
= get_current_def (new_name
);
575 guard_arg
= orig_def
;
580 guard_arg
= new_name
;
584 guard_arg
= new_name2
;
586 add_phi_arg (new_phi
, loop_arg
, new_exit_e
, UNKNOWN_LOCATION
);
587 add_phi_arg (new_phi
, guard_arg
, guard_edge
, UNKNOWN_LOCATION
);
589 /* 1.3. Update phi in successor block. */
590 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi
, e
) == orig_def
);
591 adjust_phi_and_debug_stmts (update_phi
, e
, PHI_RESULT (new_phi
));
592 update_phi2
= new_phi
;
595 /** 2. Handle loop-closed-ssa-form phis **/
597 /* 2.1. Generate new phi node in NEW_EXIT_BB: */
598 new_res
= copy_ssa_name (PHI_RESULT (orig_phi
), NULL
);
599 new_phi
= create_phi_node (new_res
, *new_exit_bb
);
601 /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */
602 add_phi_arg (new_phi
, loop_arg
, single_exit (loop
), UNKNOWN_LOCATION
);
604 /* 2.3. Update phi in successor of NEW_EXIT_BB: */
605 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2
, new_exit_e
) == loop_arg
);
606 adjust_phi_and_debug_stmts (update_phi2
, new_exit_e
,
607 PHI_RESULT (new_phi
));
610 /** 3. Handle loop-closed-ssa-form phis for first loop **/
612 /* 3.1. Find the relevant names that need an exit-phi in
613 GUARD_BB, i.e. names for which
614 slpeel_update_phi_nodes_for_guard1 had not already created a
615 phi node. This is the case for names that are used outside
616 the loop (and therefore need an exit phi) but are not updated
617 across loop iterations (and therefore don't have a
620 slpeel_update_phi_nodes_for_guard1 is responsible for
621 creating loop-exit phis in GUARD_BB for names that have a
622 loop-header-phi. When such a phi is created we also record
623 the new name in its current definition. If this new name
624 exists, then guard_arg was set to this new name (see 1.2
625 above). Therefore, if guard_arg is not this new name, this
626 is an indication that an exit-phi in GUARD_BB was not yet
627 created, so we take care of it here. */
628 if (guard_arg
== new_name2
)
632 /* 3.2. Generate new phi node in GUARD_BB: */
633 new_res
= copy_ssa_name (PHI_RESULT (orig_phi
), NULL
);
634 new_phi
= create_phi_node (new_res
, guard_edge
->src
);
636 /* 3.3. GUARD_BB has one incoming edge: */
637 gcc_assert (EDGE_COUNT (guard_edge
->src
->preds
) == 1);
638 add_phi_arg (new_phi
, arg
, EDGE_PRED (guard_edge
->src
, 0),
641 /* 3.4. Update phi in successor of GUARD_BB: */
642 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2
, guard_edge
)
644 adjust_phi_and_debug_stmts (update_phi2
, guard_edge
,
645 PHI_RESULT (new_phi
));
650 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
651 that starts at zero, increases by one and its limit is NITERS.
653 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
656 slpeel_make_loop_iterate_ntimes (struct loop
*loop
, tree niters
)
658 tree indx_before_incr
, indx_after_incr
;
661 edge exit_edge
= single_exit (loop
);
662 gimple_stmt_iterator loop_cond_gsi
;
663 gimple_stmt_iterator incr_gsi
;
665 tree init
= build_int_cst (TREE_TYPE (niters
), 0);
666 tree step
= build_int_cst (TREE_TYPE (niters
), 1);
667 source_location loop_loc
;
670 orig_cond
= get_loop_exit_condition (loop
);
671 gcc_assert (orig_cond
);
672 loop_cond_gsi
= gsi_for_stmt (orig_cond
);
674 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
675 create_iv (init
, step
, NULL_TREE
, loop
,
676 &incr_gsi
, insert_after
, &indx_before_incr
, &indx_after_incr
);
678 indx_after_incr
= force_gimple_operand_gsi (&loop_cond_gsi
, indx_after_incr
,
679 true, NULL_TREE
, true,
681 niters
= force_gimple_operand_gsi (&loop_cond_gsi
, niters
, true, NULL_TREE
,
682 true, GSI_SAME_STMT
);
684 code
= (exit_edge
->flags
& EDGE_TRUE_VALUE
) ? GE_EXPR
: LT_EXPR
;
685 cond_stmt
= gimple_build_cond (code
, indx_after_incr
, niters
, NULL_TREE
,
688 gsi_insert_before (&loop_cond_gsi
, cond_stmt
, GSI_SAME_STMT
);
690 /* Remove old loop exit test: */
691 gsi_remove (&loop_cond_gsi
, true);
692 free_stmt_vec_info (orig_cond
);
694 loop_loc
= find_loop_location (loop
);
695 if (dump_enabled_p ())
697 if (LOCATION_LOCUS (loop_loc
) != UNKNOWN_LOCATION
)
698 dump_printf (MSG_NOTE
, "\nloop at %s:%d: ", LOCATION_FILE (loop_loc
),
699 LOCATION_LINE (loop_loc
));
700 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, cond_stmt
, 0);
701 dump_printf (MSG_NOTE
, "\n");
703 loop
->nb_iterations
= niters
;
707 /* Given LOOP this function generates a new copy of it and puts it
708 on E which is either the entry or exit of LOOP. */
711 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop
*loop
, edge e
)
713 struct loop
*new_loop
;
714 basic_block
*new_bbs
, *bbs
;
717 basic_block exit_dest
;
720 exit
= single_exit (loop
);
721 at_exit
= (e
== exit
);
722 if (!at_exit
&& e
!= loop_preheader_edge (loop
))
725 bbs
= XNEWVEC (basic_block
, loop
->num_nodes
+ 1);
726 get_loop_body_with_size (loop
, bbs
, loop
->num_nodes
);
728 /* Check whether duplication is possible. */
729 if (!can_copy_bbs_p (bbs
, loop
->num_nodes
))
735 /* Generate new loop structure. */
736 new_loop
= duplicate_loop (loop
, loop_outer (loop
));
737 duplicate_subloops (loop
, new_loop
);
739 exit_dest
= exit
->dest
;
740 was_imm_dom
= (get_immediate_dominator (CDI_DOMINATORS
,
741 exit_dest
) == loop
->header
?
744 /* Also copy the pre-header, this avoids jumping through hoops to
745 duplicate the loop entry PHI arguments. Create an empty
746 pre-header unconditionally for this. */
747 basic_block preheader
= split_edge (loop_preheader_edge (loop
));
748 edge entry_e
= single_pred_edge (preheader
);
749 bbs
[loop
->num_nodes
] = preheader
;
750 new_bbs
= XNEWVEC (basic_block
, loop
->num_nodes
+ 1);
752 copy_bbs (bbs
, loop
->num_nodes
+ 1, new_bbs
,
753 &exit
, 1, &new_exit
, NULL
,
755 basic_block new_preheader
= new_bbs
[loop
->num_nodes
];
757 add_phi_args_after_copy (new_bbs
, loop
->num_nodes
+ 1, NULL
);
759 if (at_exit
) /* Add the loop copy at exit. */
761 redirect_edge_and_branch_force (e
, new_preheader
);
762 flush_pending_stmts (e
);
763 set_immediate_dominator (CDI_DOMINATORS
, new_preheader
, e
->src
);
765 set_immediate_dominator (CDI_DOMINATORS
, exit_dest
, new_loop
->header
);
767 /* And remove the non-necessary forwarder again. Keep the other
768 one so we have a proper pre-header for the loop at the exit edge. */
769 redirect_edge_pred (single_succ_edge (preheader
), single_pred (preheader
));
770 delete_basic_block (preheader
);
771 set_immediate_dominator (CDI_DOMINATORS
, loop
->header
,
772 loop_preheader_edge (loop
)->src
);
774 else /* Add the copy at entry. */
776 redirect_edge_and_branch_force (entry_e
, new_preheader
);
777 flush_pending_stmts (entry_e
);
778 set_immediate_dominator (CDI_DOMINATORS
, new_preheader
, entry_e
->src
);
780 redirect_edge_and_branch_force (new_exit
, preheader
);
781 flush_pending_stmts (new_exit
);
782 set_immediate_dominator (CDI_DOMINATORS
, preheader
, new_exit
->src
);
784 /* And remove the non-necessary forwarder again. Keep the other
785 one so we have a proper pre-header for the loop at the exit edge. */
786 redirect_edge_pred (single_succ_edge (new_preheader
), single_pred (new_preheader
));
787 delete_basic_block (new_preheader
);
788 set_immediate_dominator (CDI_DOMINATORS
, new_loop
->header
,
789 loop_preheader_edge (new_loop
)->src
);
792 for (unsigned i
= 0; i
< loop
->num_nodes
+1; i
++)
793 rename_variables_in_bb (new_bbs
[i
]);
798 #ifdef ENABLE_CHECKING
799 verify_dominators (CDI_DOMINATORS
);
806 /* Given the condition statement COND, put it as the last statement
807 of GUARD_BB; EXIT_BB is the basic block to skip the loop;
808 Assumes that this is the single exit of the guarded loop.
809 Returns the skip edge, inserts new stmts on the COND_EXPR_STMT_LIST. */
812 slpeel_add_loop_guard (basic_block guard_bb
, tree cond
,
813 gimple_seq cond_expr_stmt_list
,
814 basic_block exit_bb
, basic_block dom_bb
,
817 gimple_stmt_iterator gsi
;
820 gimple_seq gimplify_stmt_list
= NULL
;
822 enter_e
= EDGE_SUCC (guard_bb
, 0);
823 enter_e
->flags
&= ~EDGE_FALLTHRU
;
824 enter_e
->flags
|= EDGE_FALSE_VALUE
;
825 gsi
= gsi_last_bb (guard_bb
);
827 cond
= force_gimple_operand_1 (cond
, &gimplify_stmt_list
, is_gimple_condexpr
,
829 if (gimplify_stmt_list
)
830 gimple_seq_add_seq (&cond_expr_stmt_list
, gimplify_stmt_list
);
831 cond_stmt
= gimple_build_cond_from_tree (cond
, NULL_TREE
, NULL_TREE
);
832 if (cond_expr_stmt_list
)
833 gsi_insert_seq_after (&gsi
, cond_expr_stmt_list
, GSI_NEW_STMT
);
835 gsi
= gsi_last_bb (guard_bb
);
836 gsi_insert_after (&gsi
, cond_stmt
, GSI_NEW_STMT
);
838 /* Add new edge to connect guard block to the merge/loop-exit block. */
839 new_e
= make_edge (guard_bb
, exit_bb
, EDGE_TRUE_VALUE
);
841 new_e
->count
= guard_bb
->count
;
842 new_e
->probability
= probability
;
843 new_e
->count
= apply_probability (enter_e
->count
, probability
);
844 enter_e
->count
-= new_e
->count
;
845 enter_e
->probability
= inverse_probability (probability
);
846 set_immediate_dominator (CDI_DOMINATORS
, exit_bb
, dom_bb
);
851 /* This function verifies that the following restrictions apply to LOOP:
853 (2) it consists of exactly 2 basic blocks - header, and an empty latch.
854 (3) it is single entry, single exit
855 (4) its exit condition is the last stmt in the header
856 (5) E is the entry/exit edge of LOOP.
860 slpeel_can_duplicate_loop_p (const struct loop
*loop
, const_edge e
)
862 edge exit_e
= single_exit (loop
);
863 edge entry_e
= loop_preheader_edge (loop
);
864 gimple orig_cond
= get_loop_exit_condition (loop
);
865 gimple_stmt_iterator loop_exit_gsi
= gsi_last_bb (exit_e
->src
);
868 /* All loops have an outer scope; the only case loop->outer is NULL is for
869 the function itself. */
870 || !loop_outer (loop
)
871 || loop
->num_nodes
!= 2
872 || !empty_block_p (loop
->latch
)
873 || !single_exit (loop
)
874 /* Verify that new loop exit condition can be trivially modified. */
875 || (!orig_cond
|| orig_cond
!= gsi_stmt (loop_exit_gsi
))
876 || (e
!= exit_e
&& e
!= entry_e
))
882 #ifdef ENABLE_CHECKING
884 slpeel_verify_cfg_after_peeling (struct loop
*first_loop
,
885 struct loop
*second_loop
)
887 basic_block loop1_exit_bb
= single_exit (first_loop
)->dest
;
888 basic_block loop2_entry_bb
= loop_preheader_edge (second_loop
)->src
;
889 basic_block loop1_entry_bb
= loop_preheader_edge (first_loop
)->src
;
891 /* A guard that controls whether the second_loop is to be executed or skipped
892 is placed in first_loop->exit. first_loop->exit therefore has two
893 successors - one is the preheader of second_loop, and the other is a bb
896 gcc_assert (EDGE_COUNT (loop1_exit_bb
->succs
) == 2);
898 /* 1. Verify that one of the successors of first_loop->exit is the preheader
901 /* The preheader of new_loop is expected to have two predecessors:
902 first_loop->exit and the block that precedes first_loop. */
904 gcc_assert (EDGE_COUNT (loop2_entry_bb
->preds
) == 2
905 && ((EDGE_PRED (loop2_entry_bb
, 0)->src
== loop1_exit_bb
906 && EDGE_PRED (loop2_entry_bb
, 1)->src
== loop1_entry_bb
)
907 || (EDGE_PRED (loop2_entry_bb
, 1)->src
== loop1_exit_bb
908 && EDGE_PRED (loop2_entry_bb
, 0)->src
== loop1_entry_bb
)));
910 /* Verify that the other successor of first_loop->exit is after the
916 /* If the run time cost model check determines that vectorization is
917 not profitable and hence scalar loop should be generated then set
918 FIRST_NITERS to prologue peeled iterations. This will allow all the
919 iterations to be executed in the prologue peeled scalar loop. */
922 set_prologue_iterations (basic_block bb_before_first_loop
,
929 basic_block cond_bb
, then_bb
;
930 tree var
, prologue_after_cost_adjust_name
;
931 gimple_stmt_iterator gsi
;
933 edge e_true
, e_false
, e_fallthru
;
935 gimple_seq stmts
= NULL
;
936 tree cost_pre_condition
= NULL_TREE
;
937 tree scalar_loop_iters
=
938 unshare_expr (LOOP_VINFO_NITERS_UNCHANGED (loop_vec_info_for_loop (loop
)));
940 e
= single_pred_edge (bb_before_first_loop
);
941 cond_bb
= split_edge (e
);
943 e
= single_pred_edge (bb_before_first_loop
);
944 then_bb
= split_edge (e
);
945 set_immediate_dominator (CDI_DOMINATORS
, then_bb
, cond_bb
);
947 e_false
= make_single_succ_edge (cond_bb
, bb_before_first_loop
,
949 set_immediate_dominator (CDI_DOMINATORS
, bb_before_first_loop
, cond_bb
);
951 e_true
= EDGE_PRED (then_bb
, 0);
952 e_true
->flags
&= ~EDGE_FALLTHRU
;
953 e_true
->flags
|= EDGE_TRUE_VALUE
;
955 e_true
->probability
= probability
;
956 e_false
->probability
= inverse_probability (probability
);
957 e_true
->count
= apply_probability (cond_bb
->count
, probability
);
958 e_false
->count
= cond_bb
->count
- e_true
->count
;
959 then_bb
->frequency
= EDGE_FREQUENCY (e_true
);
960 then_bb
->count
= e_true
->count
;
962 e_fallthru
= EDGE_SUCC (then_bb
, 0);
963 e_fallthru
->count
= then_bb
->count
;
965 gsi
= gsi_last_bb (cond_bb
);
967 fold_build2 (LE_EXPR
, boolean_type_node
, scalar_loop_iters
,
968 build_int_cst (TREE_TYPE (scalar_loop_iters
), th
));
970 force_gimple_operand_gsi_1 (&gsi
, cost_pre_condition
, is_gimple_condexpr
,
971 NULL_TREE
, false, GSI_CONTINUE_LINKING
);
972 cond_stmt
= gimple_build_cond_from_tree (cost_pre_condition
,
973 NULL_TREE
, NULL_TREE
);
974 gsi_insert_after (&gsi
, cond_stmt
, GSI_NEW_STMT
);
976 var
= create_tmp_var (TREE_TYPE (scalar_loop_iters
),
977 "prologue_after_cost_adjust");
978 prologue_after_cost_adjust_name
=
979 force_gimple_operand (scalar_loop_iters
, &stmts
, false, var
);
981 gsi
= gsi_last_bb (then_bb
);
983 gsi_insert_seq_after (&gsi
, stmts
, GSI_NEW_STMT
);
985 newphi
= create_phi_node (var
, bb_before_first_loop
);
986 add_phi_arg (newphi
, prologue_after_cost_adjust_name
, e_fallthru
,
988 add_phi_arg (newphi
, *first_niters
, e_false
, UNKNOWN_LOCATION
);
990 *first_niters
= PHI_RESULT (newphi
);
993 /* Function slpeel_tree_peel_loop_to_edge.
995 Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
996 that is placed on the entry (exit) edge E of LOOP. After this transformation
997 we have two loops one after the other - first-loop iterates FIRST_NITERS
998 times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
999 If the cost model indicates that it is profitable to emit a scalar
1000 loop instead of the vector one, then the prolog (epilog) loop will iterate
1001 for the entire unchanged scalar iterations of the loop.
1004 - LOOP: the loop to be peeled.
1005 - E: the exit or entry edge of LOOP.
1006 If it is the entry edge, we peel the first iterations of LOOP. In this
1007 case first-loop is LOOP, and second-loop is the newly created loop.
1008 If it is the exit edge, we peel the last iterations of LOOP. In this
1009 case, first-loop is the newly created loop, and second-loop is LOOP.
1010 - NITERS: the number of iterations that LOOP iterates.
1011 - FIRST_NITERS: the number of iterations that the first-loop should iterate.
1012 - UPDATE_FIRST_LOOP_COUNT: specified whether this function is responsible
1013 for updating the loop bound of the first-loop to FIRST_NITERS. If it
1014 is false, the caller of this function may want to take care of this
1015 (this can be useful if we don't want new stmts added to first-loop).
1016 - TH: cost model profitability threshold of iterations for vectorization.
1017 - CHECK_PROFITABILITY: specify whether cost model check has not occurred
1018 during versioning and hence needs to occur during
1019 prologue generation or whether cost model check
1020 has not occurred during prologue generation and hence
1021 needs to occur during epilogue generation.
1022 - BOUND1 is the upper bound on number of iterations of the first loop (if known)
1023 - BOUND2 is the upper bound on number of iterations of the second loop (if known)
1027 The function returns a pointer to the new loop-copy, or NULL if it failed
1028 to perform the transformation.
1030 The function generates two if-then-else guards: one before the first loop,
1031 and the other before the second loop:
1033 if (FIRST_NITERS == 0) then skip the first loop,
1034 and go directly to the second loop.
1035 The second guard is:
1036 if (FIRST_NITERS == NITERS) then skip the second loop.
1038 If the optional COND_EXPR and COND_EXPR_STMT_LIST arguments are given
1039 then the generated condition is combined with COND_EXPR and the
1040 statements in COND_EXPR_STMT_LIST are emitted together with it.
1042 FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1043 FORNOW the resulting code will not be in loop-closed-ssa form.
1047 slpeel_tree_peel_loop_to_edge (struct loop
*loop
,
1048 edge e
, tree
*first_niters
,
1049 tree niters
, bool update_first_loop_count
,
1050 unsigned int th
, bool check_profitability
,
1051 tree cond_expr
, gimple_seq cond_expr_stmt_list
,
1052 int bound1
, int bound2
)
1054 struct loop
*new_loop
= NULL
, *first_loop
, *second_loop
;
1056 tree pre_condition
= NULL_TREE
;
1057 basic_block bb_before_second_loop
, bb_after_second_loop
;
1058 basic_block bb_before_first_loop
;
1059 basic_block bb_between_loops
;
1060 basic_block new_exit_bb
;
1061 gimple_stmt_iterator gsi
;
1062 edge exit_e
= single_exit (loop
);
1063 source_location loop_loc
;
1064 /* There are many aspects to how likely the first loop is going to be executed.
1065 Without histogram we can't really do good job. Simply set it to
1066 2/3, so the first loop is not reordered to the end of function and
1067 the hot path through stays short. */
1068 int first_guard_probability
= 2 * REG_BR_PROB_BASE
/ 3;
1069 int second_guard_probability
= 2 * REG_BR_PROB_BASE
/ 3;
1070 int probability_of_second_loop
;
1072 if (!slpeel_can_duplicate_loop_p (loop
, e
))
1075 /* We might have a queued need to update virtual SSA form. As we
1076 delete the update SSA machinery below after doing a regular
1077 incremental SSA update during loop copying make sure we don't
1079 ??? Needing to update virtual SSA form by renaming is unfortunate
1080 but not all of the vectorizer code inserting new loads / stores
1081 properly assigns virtual operands to those statements. */
1082 update_ssa (TODO_update_ssa_only_virtuals
);
1084 /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
1085 in the exit bb and rename all the uses after the loop. This simplifies
1086 the *guard[12] routines, which assume loop closed SSA form for all PHIs
1087 (but normally loop closed SSA form doesn't require virtual PHIs to be
1088 in the same form). Doing this early simplifies the checking what
1089 uses should be renamed. */
1090 for (gsi
= gsi_start_phis (loop
->header
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1091 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi
))))
1093 gimple phi
= gsi_stmt (gsi
);
1094 for (gsi
= gsi_start_phis (exit_e
->dest
);
1095 !gsi_end_p (gsi
); gsi_next (&gsi
))
1096 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi
))))
1098 if (gsi_end_p (gsi
))
1100 tree new_vop
= copy_ssa_name (PHI_RESULT (phi
), NULL
);
1101 gimple new_phi
= create_phi_node (new_vop
, exit_e
->dest
);
1102 tree vop
= PHI_ARG_DEF_FROM_EDGE (phi
, EDGE_SUCC (loop
->latch
, 0));
1103 imm_use_iterator imm_iter
;
1105 use_operand_p use_p
;
1107 add_phi_arg (new_phi
, vop
, exit_e
, UNKNOWN_LOCATION
);
1108 gimple_phi_set_result (new_phi
, new_vop
);
1109 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, vop
)
1110 if (stmt
!= new_phi
&& gimple_bb (stmt
) != loop
->header
)
1111 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
1112 SET_USE (use_p
, new_vop
);
1117 /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1118 Resulting CFG would be:
1131 if (!(new_loop
= slpeel_tree_duplicate_loop_to_edge_cfg (loop
, e
)))
1133 loop_loc
= find_loop_location (loop
);
1134 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, loop_loc
,
1135 "tree_duplicate_loop_to_edge_cfg failed.\n");
1139 if (MAY_HAVE_DEBUG_STMTS
)
1141 gcc_assert (!adjust_vec
.exists ());
1142 adjust_vec
.create (32);
1147 /* NEW_LOOP was placed after LOOP. */
1149 second_loop
= new_loop
;
1153 /* NEW_LOOP was placed before LOOP. */
1154 first_loop
= new_loop
;
1158 /* 2. Add the guard code in one of the following ways:
1160 2.a Add the guard that controls whether the first loop is executed.
1161 This occurs when this function is invoked for prologue or epilogue
1162 generation and when the cost model check can be done at compile time.
1164 Resulting CFG would be:
1166 bb_before_first_loop:
1167 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1174 bb_before_second_loop:
1182 2.b Add the cost model check that allows the prologue
1183 to iterate for the entire unchanged scalar
1184 iterations of the loop in the event that the cost
1185 model indicates that the scalar loop is more
1186 profitable than the vector one. This occurs when
1187 this function is invoked for prologue generation
1188 and the cost model check needs to be done at run
1191 Resulting CFG after prologue peeling would be:
1193 if (scalar_loop_iterations <= th)
1194 FIRST_NITERS = scalar_loop_iterations
1196 bb_before_first_loop:
1197 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1204 bb_before_second_loop:
1212 2.c Add the cost model check that allows the epilogue
1213 to iterate for the entire unchanged scalar
1214 iterations of the loop in the event that the cost
1215 model indicates that the scalar loop is more
1216 profitable than the vector one. This occurs when
1217 this function is invoked for epilogue generation
1218 and the cost model check needs to be done at run
1219 time. This check is combined with any pre-existing
1220 check in COND_EXPR to avoid versioning.
1222 Resulting CFG after prologue peeling would be:
1224 bb_before_first_loop:
1225 if ((scalar_loop_iterations <= th)
1227 FIRST_NITERS == 0) GOTO bb_before_second_loop
1234 bb_before_second_loop:
1243 bb_before_first_loop
= split_edge (loop_preheader_edge (first_loop
));
1244 /* Loop copying insterted a forwarder block for us here. */
1245 bb_before_second_loop
= single_exit (first_loop
)->dest
;
1247 probability_of_second_loop
= (inverse_probability (first_guard_probability
)
1248 + combine_probabilities (second_guard_probability
,
1249 first_guard_probability
));
1250 /* Theoretically preheader edge of first loop and exit edge should have
1251 same frequencies. Loop exit probablities are however easy to get wrong.
1252 It is safer to copy value from original loop entry. */
1253 bb_before_second_loop
->frequency
1254 = combine_probabilities (bb_before_first_loop
->frequency
,
1255 probability_of_second_loop
);
1256 bb_before_second_loop
->count
1257 = apply_probability (bb_before_first_loop
->count
,
1258 probability_of_second_loop
);
1259 single_succ_edge (bb_before_second_loop
)->count
1260 = bb_before_second_loop
->count
;
1262 /* Epilogue peeling. */
1263 if (!update_first_loop_count
)
1265 loop_vec_info loop_vinfo
= loop_vec_info_for_loop (loop
);
1266 tree scalar_loop_iters
= LOOP_VINFO_NITERSM1 (loop_vinfo
);
1267 unsigned limit
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) - 1;
1268 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
))
1270 if (check_profitability
1274 fold_build2 (LT_EXPR
, boolean_type_node
, scalar_loop_iters
,
1275 build_int_cst (TREE_TYPE (scalar_loop_iters
), limit
));
1279 fold_build2 (TRUTH_OR_EXPR
, boolean_type_node
,
1281 fold_build1 (TRUTH_NOT_EXPR
, boolean_type_node
,
1286 /* Prologue peeling. */
1289 if (check_profitability
)
1290 set_prologue_iterations (bb_before_first_loop
, first_niters
,
1291 loop
, th
, first_guard_probability
);
1294 fold_build2 (LE_EXPR
, boolean_type_node
, *first_niters
,
1295 build_int_cst (TREE_TYPE (*first_niters
), 0));
1298 skip_e
= slpeel_add_loop_guard (bb_before_first_loop
, pre_condition
,
1299 cond_expr_stmt_list
,
1300 bb_before_second_loop
, bb_before_first_loop
,
1301 inverse_probability (first_guard_probability
));
1302 scale_loop_profile (first_loop
, first_guard_probability
,
1303 check_profitability
&& (int)th
> bound1
? th
: bound1
);
1304 slpeel_update_phi_nodes_for_guard1 (skip_e
, first_loop
,
1305 first_loop
== new_loop
,
1309 /* 3. Add the guard that controls whether the second loop is executed.
1310 Resulting CFG would be:
1312 bb_before_first_loop:
1313 if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1321 if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1322 GOTO bb_before_second_loop
1324 bb_before_second_loop:
1330 bb_after_second_loop:
1335 bb_between_loops
= new_exit_bb
;
1336 bb_after_second_loop
= split_edge (single_exit (second_loop
));
1339 fold_build2 (EQ_EXPR
, boolean_type_node
, *first_niters
, niters
);
1340 skip_e
= slpeel_add_loop_guard (bb_between_loops
, pre_condition
, NULL
,
1341 bb_after_second_loop
, bb_before_first_loop
,
1342 inverse_probability (second_guard_probability
));
1343 scale_loop_profile (second_loop
, probability_of_second_loop
, bound2
);
1344 slpeel_update_phi_nodes_for_guard2 (skip_e
, second_loop
,
1345 second_loop
== new_loop
, &new_exit_bb
);
1347 /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1349 if (update_first_loop_count
)
1350 slpeel_make_loop_iterate_ntimes (first_loop
, *first_niters
);
1352 delete_update_ssa ();
1354 adjust_vec_debug_stmts ();
1359 /* Function vect_get_loop_location.
1361 Extract the location of the loop in the source code.
1362 If the loop is not well formed for vectorization, an estimated
1363 location is calculated.
1364 Return the loop location if succeed and NULL if not. */
1367 find_loop_location (struct loop
*loop
)
1371 gimple_stmt_iterator si
;
1374 return UNKNOWN_LOCATION
;
1376 stmt
= get_loop_exit_condition (loop
);
1379 && LOCATION_LOCUS (gimple_location (stmt
)) > BUILTINS_LOCATION
)
1380 return gimple_location (stmt
);
1382 /* If we got here the loop is probably not "well formed",
1383 try to estimate the loop location */
1386 return UNKNOWN_LOCATION
;
1390 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
1392 stmt
= gsi_stmt (si
);
1393 if (LOCATION_LOCUS (gimple_location (stmt
)) > BUILTINS_LOCATION
)
1394 return gimple_location (stmt
);
1397 return UNKNOWN_LOCATION
;
1401 /* Function vect_can_advance_ivs_p
1403 In case the number of iterations that LOOP iterates is unknown at compile
1404 time, an epilog loop will be generated, and the loop induction variables
1405 (IVs) will be "advanced" to the value they are supposed to take just before
1406 the epilog loop. Here we check that the access function of the loop IVs
1407 and the expression that represents the loop bound are simple enough.
1408 These restrictions will be relaxed in the future. */
1411 vect_can_advance_ivs_p (loop_vec_info loop_vinfo
)
1413 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1414 basic_block bb
= loop
->header
;
1416 gimple_stmt_iterator gsi
;
1418 /* Analyze phi functions of the loop header. */
1420 if (dump_enabled_p ())
1421 dump_printf_loc (MSG_NOTE
, vect_location
, "vect_can_advance_ivs_p:\n");
1422 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1424 tree evolution_part
;
1426 phi
= gsi_stmt (gsi
);
1427 if (dump_enabled_p ())
1429 dump_printf_loc (MSG_NOTE
, vect_location
, "Analyze phi: ");
1430 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
1431 dump_printf (MSG_NOTE
, "\n");
1434 /* Skip virtual phi's. The data dependences that are associated with
1435 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
1437 if (virtual_operand_p (PHI_RESULT (phi
)))
1439 if (dump_enabled_p ())
1440 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1441 "virtual phi. skip.\n");
1445 /* Skip reduction phis. */
1447 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi
)) == vect_reduction_def
)
1449 if (dump_enabled_p ())
1450 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1451 "reduc phi. skip.\n");
1455 /* Analyze the evolution function. */
1458 = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi
));
1459 if (evolution_part
== NULL_TREE
)
1461 if (dump_enabled_p ())
1462 dump_printf (MSG_MISSED_OPTIMIZATION
,
1463 "No access function or evolution.\n");
1467 /* FORNOW: We do not transform initial conditions of IVs
1468 which evolution functions are a polynomial of degree >= 2. */
1470 if (tree_is_chrec (evolution_part
))
1478 /* Function vect_update_ivs_after_vectorizer.
1480 "Advance" the induction variables of LOOP to the value they should take
1481 after the execution of LOOP. This is currently necessary because the
1482 vectorizer does not handle induction variables that are used after the
1483 loop. Such a situation occurs when the last iterations of LOOP are
1485 1. We introduced new uses after LOOP for IVs that were not originally used
1486 after LOOP: the IVs of LOOP are now used by an epilog loop.
1487 2. LOOP is going to be vectorized; this means that it will iterate N/VF
1488 times, whereas the loop IVs should be bumped N times.
1491 - LOOP - a loop that is going to be vectorized. The last few iterations
1492 of LOOP were peeled.
1493 - NITERS - the number of iterations that LOOP executes (before it is
1494 vectorized). i.e, the number of times the ivs should be bumped.
1495 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
1496 coming out from LOOP on which there are uses of the LOOP ivs
1497 (this is the path from LOOP->exit to epilog_loop->preheader).
1499 The new definitions of the ivs are placed in LOOP->exit.
1500 The phi args associated with the edge UPDATE_E in the bb
1501 UPDATE_E->dest are updated accordingly.
1503 Assumption 1: Like the rest of the vectorizer, this function assumes
1504 a single loop exit that has a single predecessor.
1506 Assumption 2: The phi nodes in the LOOP header and in update_bb are
1507 organized in the same order.
1509 Assumption 3: The access function of the ivs is simple enough (see
1510 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
1512 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
1513 coming out of LOOP on which the ivs of LOOP are used (this is the path
1514 that leads to the epilog loop; other paths skip the epilog loop). This
1515 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
1516 needs to have its phis updated.
1520 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo
, tree niters
,
1523 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1524 basic_block exit_bb
= single_exit (loop
)->dest
;
1526 gimple_stmt_iterator gsi
, gsi1
;
1527 basic_block update_bb
= update_e
->dest
;
1529 gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo
));
1531 /* Make sure there exists a single-predecessor exit bb: */
1532 gcc_assert (single_pred_p (exit_bb
));
1534 for (gsi
= gsi_start_phis (loop
->header
), gsi1
= gsi_start_phis (update_bb
);
1535 !gsi_end_p (gsi
) && !gsi_end_p (gsi1
);
1536 gsi_next (&gsi
), gsi_next (&gsi1
))
1539 tree step_expr
, off
;
1541 tree var
, ni
, ni_name
;
1542 gimple_stmt_iterator last_gsi
;
1543 stmt_vec_info stmt_info
;
1545 phi
= gsi_stmt (gsi
);
1546 phi1
= gsi_stmt (gsi1
);
1547 if (dump_enabled_p ())
1549 dump_printf_loc (MSG_NOTE
, vect_location
,
1550 "vect_update_ivs_after_vectorizer: phi: ");
1551 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
1552 dump_printf (MSG_NOTE
, "\n");
1555 /* Skip virtual phi's. */
1556 if (virtual_operand_p (PHI_RESULT (phi
)))
1558 if (dump_enabled_p ())
1559 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1560 "virtual phi. skip.\n");
1564 /* Skip reduction phis. */
1565 stmt_info
= vinfo_for_stmt (phi
);
1566 if (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
)
1568 if (dump_enabled_p ())
1569 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1570 "reduc phi. skip.\n");
1574 type
= TREE_TYPE (gimple_phi_result (phi
));
1575 step_expr
= STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info
);
1576 step_expr
= unshare_expr (step_expr
);
1578 /* FORNOW: We do not support IVs whose evolution function is a polynomial
1579 of degree >= 2 or exponential. */
1580 gcc_assert (!tree_is_chrec (step_expr
));
1582 init_expr
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1584 off
= fold_build2 (MULT_EXPR
, TREE_TYPE (step_expr
),
1585 fold_convert (TREE_TYPE (step_expr
), niters
),
1587 if (POINTER_TYPE_P (type
))
1588 ni
= fold_build_pointer_plus (init_expr
, off
);
1590 ni
= fold_build2 (PLUS_EXPR
, type
,
1591 init_expr
, fold_convert (type
, off
));
1593 var
= create_tmp_var (type
, "tmp");
1595 last_gsi
= gsi_last_bb (exit_bb
);
1596 ni_name
= force_gimple_operand_gsi (&last_gsi
, ni
, false, var
,
1597 true, GSI_SAME_STMT
);
1599 /* Fix phi expressions in the successor bb. */
1600 adjust_phi_and_debug_stmts (phi1
, update_e
, ni_name
);
1604 /* Function vect_do_peeling_for_loop_bound
1606 Peel the last iterations of the loop represented by LOOP_VINFO.
1607 The peeled iterations form a new epilog loop. Given that the loop now
1608 iterates NITERS times, the new epilog loop iterates
1609 NITERS % VECTORIZATION_FACTOR times.
1611 The original loop will later be made to iterate
1612 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).
1614 COND_EXPR and COND_EXPR_STMT_LIST are combined with a new generated
1618 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo
,
1619 tree ni_name
, tree ratio_mult_vf_name
,
1620 unsigned int th
, bool check_profitability
)
1622 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1623 struct loop
*new_loop
;
1625 basic_block preheader
;
1628 tree cond_expr
= NULL_TREE
;
1629 gimple_seq cond_expr_stmt_list
= NULL
;
1631 if (dump_enabled_p ())
1632 dump_printf_loc (MSG_NOTE
, vect_location
,
1633 "=== vect_do_peeling_for_loop_bound ===\n");
1635 initialize_original_copy_tables ();
1637 loop_num
= loop
->num
;
1639 new_loop
= slpeel_tree_peel_loop_to_edge (loop
, single_exit (loop
),
1640 &ratio_mult_vf_name
, ni_name
, false,
1641 th
, check_profitability
,
1642 cond_expr
, cond_expr_stmt_list
,
1643 0, LOOP_VINFO_VECT_FACTOR (loop_vinfo
));
1644 gcc_assert (new_loop
);
1645 gcc_assert (loop_num
== loop
->num
);
1646 #ifdef ENABLE_CHECKING
1647 slpeel_verify_cfg_after_peeling (loop
, new_loop
);
1650 /* A guard that controls whether the new_loop is to be executed or skipped
1651 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
1652 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
1653 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
1654 is on the path where the LOOP IVs are used and need to be updated. */
1656 preheader
= loop_preheader_edge (new_loop
)->src
;
1657 if (EDGE_PRED (preheader
, 0)->src
== single_exit (loop
)->dest
)
1658 update_e
= EDGE_PRED (preheader
, 0);
1660 update_e
= EDGE_PRED (preheader
, 1);
1662 /* Update IVs of original loop as if they were advanced
1663 by ratio_mult_vf_name steps. */
1664 vect_update_ivs_after_vectorizer (loop_vinfo
, ratio_mult_vf_name
, update_e
);
1666 /* For vectorization factor N, we need to copy last N-1 values in epilogue
1667 and this means N-2 loopback edge executions.
1669 PEELING_FOR_GAPS works by subtracting last iteration and thus the epilogue
1670 will execute at least LOOP_VINFO_VECT_FACTOR times. */
1671 max_iter
= (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
)
1672 ? LOOP_VINFO_VECT_FACTOR (loop_vinfo
) * 2
1673 : LOOP_VINFO_VECT_FACTOR (loop_vinfo
)) - 2;
1674 if (check_profitability
)
1675 max_iter
= MAX (max_iter
, (int) th
- 1);
1676 record_niter_bound (new_loop
, double_int::from_shwi (max_iter
), false, true);
1677 dump_printf (MSG_NOTE
,
1678 "Setting upper bound of nb iterations for epilogue "
1679 "loop to %d\n", max_iter
);
1681 /* After peeling we have to reset scalar evolution analyzer. */
1684 free_original_copy_tables ();
1688 /* Function vect_gen_niters_for_prolog_loop
1690 Set the number of iterations for the loop represented by LOOP_VINFO
1691 to the minimum between LOOP_NITERS (the original iteration count of the loop)
1692 and the misalignment of DR - the data reference recorded in
1693 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
1694 this loop, the data reference DR will refer to an aligned location.
1696 The following computation is generated:
1698 If the misalignment of DR is known at compile time:
1699 addr_mis = int mis = DR_MISALIGNMENT (dr);
1700 Else, compute address misalignment in bytes:
1701 addr_mis = addr & (vectype_align - 1)
1703 prolog_niters = min (LOOP_NITERS, ((VF - addr_mis/elem_size)&(VF-1))/step)
1705 (elem_size = element type size; an element is the scalar element whose type
1706 is the inner type of the vectype)
1708 When the step of the data-ref in the loop is not 1 (as in interleaved data
1709 and SLP), the number of iterations of the prolog must be divided by the step
1710 (which is equal to the size of interleaved group).
1712 The above formulas assume that VF == number of elements in the vector. This
1713 may not hold when there are multiple-types in the loop.
1714 In this case, for some data-references in the loop the VF does not represent
1715 the number of elements that fit in the vector. Therefore, instead of VF we
1716 use TYPE_VECTOR_SUBPARTS. */
1719 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo
, tree loop_niters
, int *bound
)
1721 struct data_reference
*dr
= LOOP_VINFO_UNALIGNED_DR (loop_vinfo
);
1722 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1725 tree iters
, iters_name
;
1728 gimple dr_stmt
= DR_STMT (dr
);
1729 stmt_vec_info stmt_info
= vinfo_for_stmt (dr_stmt
);
1730 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1731 int vectype_align
= TYPE_ALIGN (vectype
) / BITS_PER_UNIT
;
1732 tree niters_type
= TREE_TYPE (loop_niters
);
1733 int nelements
= TYPE_VECTOR_SUBPARTS (vectype
);
1735 pe
= loop_preheader_edge (loop
);
1737 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo
) > 0)
1739 int npeel
= LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo
);
1741 if (dump_enabled_p ())
1742 dump_printf_loc (MSG_NOTE
, vect_location
,
1743 "known peeling = %d.\n", npeel
);
1745 iters
= build_int_cst (niters_type
, npeel
);
1746 *bound
= LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo
);
1750 gimple_seq new_stmts
= NULL
;
1751 bool negative
= tree_int_cst_compare (DR_STEP (dr
), size_zero_node
) < 0;
1752 tree offset
= negative
1753 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1) : NULL_TREE
;
1754 tree start_addr
= vect_create_addr_base_for_vector_ref (dr_stmt
,
1755 &new_stmts
, offset
, loop
);
1756 tree type
= unsigned_type_for (TREE_TYPE (start_addr
));
1757 tree vectype_align_minus_1
= build_int_cst (type
, vectype_align
- 1);
1758 HOST_WIDE_INT elem_size
=
1759 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
1760 tree elem_size_log
= build_int_cst (type
, exact_log2 (elem_size
));
1761 tree nelements_minus_1
= build_int_cst (type
, nelements
- 1);
1762 tree nelements_tree
= build_int_cst (type
, nelements
);
1766 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, new_stmts
);
1767 gcc_assert (!new_bb
);
1769 /* Create: byte_misalign = addr & (vectype_align - 1) */
1771 fold_build2 (BIT_AND_EXPR
, type
, fold_convert (type
, start_addr
),
1772 vectype_align_minus_1
);
1774 /* Create: elem_misalign = byte_misalign / element_size */
1776 fold_build2 (RSHIFT_EXPR
, type
, byte_misalign
, elem_size_log
);
1778 /* Create: (niters_type) (nelements - elem_misalign)&(nelements - 1) */
1780 iters
= fold_build2 (MINUS_EXPR
, type
, elem_misalign
, nelements_tree
);
1782 iters
= fold_build2 (MINUS_EXPR
, type
, nelements_tree
, elem_misalign
);
1783 iters
= fold_build2 (BIT_AND_EXPR
, type
, iters
, nelements_minus_1
);
1784 iters
= fold_convert (niters_type
, iters
);
1788 /* Create: prolog_loop_niters = min (iters, loop_niters) */
1789 /* If the loop bound is known at compile time we already verified that it is
1790 greater than vf; since the misalignment ('iters') is at most vf, there's
1791 no need to generate the MIN_EXPR in this case. */
1792 if (TREE_CODE (loop_niters
) != INTEGER_CST
)
1793 iters
= fold_build2 (MIN_EXPR
, niters_type
, iters
, loop_niters
);
1795 if (dump_enabled_p ())
1797 dump_printf_loc (MSG_NOTE
, vect_location
,
1798 "niters for prolog loop: ");
1799 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, iters
);
1800 dump_printf (MSG_NOTE
, "\n");
1803 var
= create_tmp_var (niters_type
, "prolog_loop_niters");
1805 iters_name
= force_gimple_operand (iters
, &stmts
, false, var
);
1807 /* Insert stmt on loop preheader edge. */
1810 basic_block new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
1811 gcc_assert (!new_bb
);
1818 /* Function vect_update_init_of_dr
1820 NITERS iterations were peeled from LOOP. DR represents a data reference
1821 in LOOP. This function updates the information recorded in DR to
1822 account for the fact that the first NITERS iterations had already been
1823 executed. Specifically, it updates the OFFSET field of DR. */
1826 vect_update_init_of_dr (struct data_reference
*dr
, tree niters
)
1828 tree offset
= DR_OFFSET (dr
);
1830 niters
= fold_build2 (MULT_EXPR
, sizetype
,
1831 fold_convert (sizetype
, niters
),
1832 fold_convert (sizetype
, DR_STEP (dr
)));
1833 offset
= fold_build2 (PLUS_EXPR
, sizetype
,
1834 fold_convert (sizetype
, offset
), niters
);
1835 DR_OFFSET (dr
) = offset
;
1839 /* Function vect_update_inits_of_drs
1841 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1842 This function updates the information recorded for the data references in
1843 the loop to account for the fact that the first NITERS iterations had
1844 already been executed. Specifically, it updates the initial_condition of
1845 the access_function of all the data_references in the loop. */
1848 vect_update_inits_of_drs (loop_vec_info loop_vinfo
, tree niters
)
1851 vec
<data_reference_p
> datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1852 struct data_reference
*dr
;
1854 if (dump_enabled_p ())
1855 dump_printf_loc (MSG_NOTE
, vect_location
,
1856 "=== vect_update_inits_of_dr ===\n");
1858 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
1859 vect_update_init_of_dr (dr
, niters
);
1863 /* Function vect_do_peeling_for_alignment
1865 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
1866 'niters' is set to the misalignment of one of the data references in the
1867 loop, thereby forcing it to refer to an aligned location at the beginning
1868 of the execution of this loop. The data reference for which we are
1869 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
1872 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo
, tree ni_name
,
1873 unsigned int th
, bool check_profitability
)
1875 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1876 tree niters_of_prolog_loop
;
1877 tree wide_prolog_niters
;
1878 struct loop
*new_loop
;
1882 if (dump_enabled_p ())
1883 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
1884 "loop peeled for vectorization to enhance"
1887 initialize_original_copy_tables ();
1889 gimple_seq stmts
= NULL
;
1890 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
1891 niters_of_prolog_loop
= vect_gen_niters_for_prolog_loop (loop_vinfo
,
1895 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
1897 slpeel_tree_peel_loop_to_edge (loop
, loop_preheader_edge (loop
),
1898 &niters_of_prolog_loop
, ni_name
, true,
1899 th
, check_profitability
, NULL_TREE
, NULL
,
1903 gcc_assert (new_loop
);
1904 #ifdef ENABLE_CHECKING
1905 slpeel_verify_cfg_after_peeling (new_loop
, loop
);
1907 /* For vectorization factor N, we need to copy at most N-1 values
1908 for alignment and this means N-2 loopback edge executions. */
1909 max_iter
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) - 2;
1910 if (check_profitability
)
1911 max_iter
= MAX (max_iter
, (int) th
- 1);
1912 record_niter_bound (new_loop
, double_int::from_shwi (max_iter
), false, true);
1913 dump_printf (MSG_NOTE
,
1914 "Setting upper bound of nb iterations for prologue "
1915 "loop to %d\n", max_iter
);
1917 /* Update number of times loop executes. */
1918 LOOP_VINFO_NITERS (loop_vinfo
) = fold_build2 (MINUS_EXPR
,
1919 TREE_TYPE (ni_name
), ni_name
, niters_of_prolog_loop
);
1920 LOOP_VINFO_NITERSM1 (loop_vinfo
) = fold_build2 (MINUS_EXPR
,
1921 TREE_TYPE (ni_name
),
1922 LOOP_VINFO_NITERSM1 (loop_vinfo
), niters_of_prolog_loop
);
1924 if (types_compatible_p (sizetype
, TREE_TYPE (niters_of_prolog_loop
)))
1925 wide_prolog_niters
= niters_of_prolog_loop
;
1928 gimple_seq seq
= NULL
;
1929 edge pe
= loop_preheader_edge (loop
);
1930 tree wide_iters
= fold_convert (sizetype
, niters_of_prolog_loop
);
1931 tree var
= create_tmp_var (sizetype
, "prolog_loop_adjusted_niters");
1932 wide_prolog_niters
= force_gimple_operand (wide_iters
, &seq
, false,
1936 /* Insert stmt on loop preheader edge. */
1937 basic_block new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
1938 gcc_assert (!new_bb
);
1942 /* Update the init conditions of the access functions of all data refs. */
1943 vect_update_inits_of_drs (loop_vinfo
, wide_prolog_niters
);
1945 /* After peeling we have to reset scalar evolution analyzer. */
1948 free_original_copy_tables ();
1952 /* Function vect_create_cond_for_align_checks.
1954 Create a conditional expression that represents the alignment checks for
1955 all of data references (array element references) whose alignment must be
1959 COND_EXPR - input conditional expression. New conditions will be chained
1960 with logical AND operation.
1961 LOOP_VINFO - two fields of the loop information are used.
1962 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
1963 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
1966 COND_EXPR_STMT_LIST - statements needed to construct the conditional
1968 The returned value is the conditional expression to be used in the if
1969 statement that controls which version of the loop gets executed at runtime.
1971 The algorithm makes two assumptions:
1972 1) The number of bytes "n" in a vector is a power of 2.
1973 2) An address "a" is aligned if a%n is zero and that this
1974 test can be done as a&(n-1) == 0. For example, for 16
1975 byte vectors the test is a&0xf == 0. */
1978 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo
,
1980 gimple_seq
*cond_expr_stmt_list
)
1982 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1983 vec
<gimple
> may_misalign_stmts
1984 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
);
1986 int mask
= LOOP_VINFO_PTR_MASK (loop_vinfo
);
1989 tree int_ptrsize_type
;
1991 tree or_tmp_name
= NULL_TREE
;
1995 tree part_cond_expr
;
1997 /* Check that mask is one less than a power of 2, i.e., mask is
1998 all zeros followed by all ones. */
1999 gcc_assert ((mask
!= 0) && ((mask
& (mask
+1)) == 0));
2001 int_ptrsize_type
= signed_type_for (ptr_type_node
);
2003 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2004 of the first vector of the i'th data reference. */
2006 FOR_EACH_VEC_ELT (may_misalign_stmts
, i
, ref_stmt
)
2008 gimple_seq new_stmt_list
= NULL
;
2011 tree new_or_tmp_name
;
2012 gimple addr_stmt
, or_stmt
;
2013 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (ref_stmt
);
2014 tree vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
2015 bool negative
= tree_int_cst_compare
2016 (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo
)), size_zero_node
) < 0;
2017 tree offset
= negative
2018 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1) : NULL_TREE
;
2020 /* create: addr_tmp = (int)(address_of_first_vector) */
2022 vect_create_addr_base_for_vector_ref (ref_stmt
, &new_stmt_list
,
2024 if (new_stmt_list
!= NULL
)
2025 gimple_seq_add_seq (cond_expr_stmt_list
, new_stmt_list
);
2027 sprintf (tmp_name
, "addr2int%d", i
);
2028 addr_tmp_name
= make_temp_ssa_name (int_ptrsize_type
, NULL
, tmp_name
);
2029 addr_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, addr_tmp_name
,
2030 addr_base
, NULL_TREE
);
2031 gimple_seq_add_stmt (cond_expr_stmt_list
, addr_stmt
);
2033 /* The addresses are OR together. */
2035 if (or_tmp_name
!= NULL_TREE
)
2037 /* create: or_tmp = or_tmp | addr_tmp */
2038 sprintf (tmp_name
, "orptrs%d", i
);
2039 new_or_tmp_name
= make_temp_ssa_name (int_ptrsize_type
, NULL
, tmp_name
);
2040 or_stmt
= gimple_build_assign_with_ops (BIT_IOR_EXPR
,
2042 or_tmp_name
, addr_tmp_name
);
2043 gimple_seq_add_stmt (cond_expr_stmt_list
, or_stmt
);
2044 or_tmp_name
= new_or_tmp_name
;
2047 or_tmp_name
= addr_tmp_name
;
2051 mask_cst
= build_int_cst (int_ptrsize_type
, mask
);
2053 /* create: and_tmp = or_tmp & mask */
2054 and_tmp_name
= make_temp_ssa_name (int_ptrsize_type
, NULL
, "andmask");
2056 and_stmt
= gimple_build_assign_with_ops (BIT_AND_EXPR
, and_tmp_name
,
2057 or_tmp_name
, mask_cst
);
2058 gimple_seq_add_stmt (cond_expr_stmt_list
, and_stmt
);
2060 /* Make and_tmp the left operand of the conditional test against zero.
2061 if and_tmp has a nonzero bit then some address is unaligned. */
2062 ptrsize_zero
= build_int_cst (int_ptrsize_type
, 0);
2063 part_cond_expr
= fold_build2 (EQ_EXPR
, boolean_type_node
,
2064 and_tmp_name
, ptrsize_zero
);
2066 *cond_expr
= fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
,
2067 *cond_expr
, part_cond_expr
);
2069 *cond_expr
= part_cond_expr
;
2072 /* Function vect_create_cond_for_alias_checks.
2074 Create a conditional expression that represents the run-time checks for
2075 overlapping of address ranges represented by a list of data references
2076 relations passed as input.
2079 COND_EXPR - input conditional expression. New conditions will be chained
2080 with logical AND operation. If it is NULL, then the function
2081 is used to return the number of alias checks.
2082 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2086 COND_EXPR - conditional expression.
2088 The returned COND_EXPR is the conditional expression to be used in the if
2089 statement that controls which version of the loop gets executed at runtime.
2093 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo
, tree
* cond_expr
)
2095 vec
<dr_with_seg_len_pair_t
> comp_alias_ddrs
=
2096 LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo
);
2097 tree part_cond_expr
;
2099 /* Create expression
2100 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
2101 || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
2105 ((store_ptr_n + store_segment_length_n) <= load_ptr_n)
2106 || (load_ptr_n + load_segment_length_n) <= store_ptr_n)) */
2108 if (comp_alias_ddrs
.is_empty ())
2111 for (size_t i
= 0, s
= comp_alias_ddrs
.length (); i
< s
; ++i
)
2113 const dr_with_seg_len
& dr_a
= comp_alias_ddrs
[i
].first
;
2114 const dr_with_seg_len
& dr_b
= comp_alias_ddrs
[i
].second
;
2115 tree segment_length_a
= dr_a
.seg_len
;
2116 tree segment_length_b
= dr_b
.seg_len
;
2119 = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a
.dr
), dr_a
.offset
);
2121 = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b
.dr
), dr_b
.offset
);
2123 if (dump_enabled_p ())
2125 dump_printf_loc (MSG_NOTE
, vect_location
,
2126 "create runtime check for data references ");
2127 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (dr_a
.dr
));
2128 dump_printf (MSG_NOTE
, " and ");
2129 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (dr_b
.dr
));
2130 dump_printf (MSG_NOTE
, "\n");
2133 tree seg_a_min
= addr_base_a
;
2134 tree seg_a_max
= fold_build_pointer_plus (addr_base_a
, segment_length_a
);
2135 if (tree_int_cst_compare (DR_STEP (dr_a
.dr
), size_zero_node
) < 0)
2136 seg_a_min
= seg_a_max
, seg_a_max
= addr_base_a
;
2138 tree seg_b_min
= addr_base_b
;
2139 tree seg_b_max
= fold_build_pointer_plus (addr_base_b
, segment_length_b
);
2140 if (tree_int_cst_compare (DR_STEP (dr_b
.dr
), size_zero_node
) < 0)
2141 seg_b_min
= seg_b_max
, seg_b_max
= addr_base_b
;
2144 fold_build2 (TRUTH_OR_EXPR
, boolean_type_node
,
2145 fold_build2 (LE_EXPR
, boolean_type_node
, seg_a_max
, seg_b_min
),
2146 fold_build2 (LE_EXPR
, boolean_type_node
, seg_b_max
, seg_a_min
));
2149 *cond_expr
= fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
,
2150 *cond_expr
, part_cond_expr
);
2152 *cond_expr
= part_cond_expr
;
2155 if (dump_enabled_p ())
2156 dump_printf_loc (MSG_NOTE
, vect_location
,
2157 "created %u versioning for alias checks.\n",
2158 comp_alias_ddrs
.length ());
2160 comp_alias_ddrs
.release ();
2164 /* Function vect_loop_versioning.
2166 If the loop has data references that may or may not be aligned or/and
2167 has data reference relations whose independence was not proven then
2168 two versions of the loop need to be generated, one which is vectorized
2169 and one which isn't. A test is then generated to control which of the
2170 loops is executed. The test checks for the alignment of all of the
2171 data references that may or may not be aligned. An additional
2172 sequence of runtime tests is generated for each pairs of DDRs whose
2173 independence was not proven. The vectorized version of loop is
2174 executed only if both alias and alignment tests are passed.
2176 The test generated to check which version of loop is executed
2177 is modified to also check for profitability as indicated by the
2178 cost model initially.
2180 The versioning precondition(s) are placed in *COND_EXPR and
2181 *COND_EXPR_STMT_LIST. */
2184 vect_loop_versioning (loop_vec_info loop_vinfo
,
2185 unsigned int th
, bool check_profitability
)
2187 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2188 basic_block condition_bb
;
2189 gimple_stmt_iterator gsi
, cond_exp_gsi
;
2190 basic_block merge_bb
;
2191 basic_block new_exit_bb
;
2193 gimple orig_phi
, new_phi
;
2194 tree cond_expr
= NULL_TREE
;
2195 gimple_seq cond_expr_stmt_list
= NULL
;
2197 unsigned prob
= 4 * REG_BR_PROB_BASE
/ 5;
2198 gimple_seq gimplify_stmt_list
= NULL
;
2199 tree scalar_loop_iters
= LOOP_VINFO_NITERS (loop_vinfo
);
2200 bool version_align
= LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo
);
2201 bool version_alias
= LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo
);
2203 if (check_profitability
)
2205 cond_expr
= fold_build2 (GT_EXPR
, boolean_type_node
, scalar_loop_iters
,
2206 build_int_cst (TREE_TYPE (scalar_loop_iters
), th
));
2207 cond_expr
= force_gimple_operand_1 (cond_expr
, &cond_expr_stmt_list
,
2208 is_gimple_condexpr
, NULL_TREE
);
2212 vect_create_cond_for_align_checks (loop_vinfo
, &cond_expr
,
2213 &cond_expr_stmt_list
);
2216 vect_create_cond_for_alias_checks (loop_vinfo
, &cond_expr
);
2218 cond_expr
= force_gimple_operand_1 (cond_expr
, &gimplify_stmt_list
,
2219 is_gimple_condexpr
, NULL_TREE
);
2220 gimple_seq_add_seq (&cond_expr_stmt_list
, gimplify_stmt_list
);
2222 initialize_original_copy_tables ();
2223 loop_version (loop
, cond_expr
, &condition_bb
,
2224 prob
, prob
, REG_BR_PROB_BASE
- prob
, true);
2226 if (LOCATION_LOCUS (vect_location
) != UNKNOWN_LOCATION
2227 && dump_enabled_p ())
2230 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
2231 "loop versioned for vectorization because of "
2232 "possible aliasing\n");
2234 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
2235 "loop versioned for vectorization to enhance "
2239 free_original_copy_tables ();
2241 /* Loop versioning violates an assumption we try to maintain during
2242 vectorization - that the loop exit block has a single predecessor.
2243 After versioning, the exit block of both loop versions is the same
2244 basic block (i.e. it has two predecessors). Just in order to simplify
2245 following transformations in the vectorizer, we fix this situation
2246 here by adding a new (empty) block on the exit-edge of the loop,
2247 with the proper loop-exit phis to maintain loop-closed-form. */
2249 merge_bb
= single_exit (loop
)->dest
;
2250 gcc_assert (EDGE_COUNT (merge_bb
->preds
) == 2);
2251 new_exit_bb
= split_edge (single_exit (loop
));
2252 new_exit_e
= single_exit (loop
);
2253 e
= EDGE_SUCC (new_exit_bb
, 0);
2255 for (gsi
= gsi_start_phis (merge_bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2258 orig_phi
= gsi_stmt (gsi
);
2259 new_res
= copy_ssa_name (PHI_RESULT (orig_phi
), NULL
);
2260 new_phi
= create_phi_node (new_res
, new_exit_bb
);
2261 arg
= PHI_ARG_DEF_FROM_EDGE (orig_phi
, e
);
2262 add_phi_arg (new_phi
, arg
, new_exit_e
,
2263 gimple_phi_arg_location_from_edge (orig_phi
, e
));
2264 adjust_phi_and_debug_stmts (orig_phi
, e
, PHI_RESULT (new_phi
));
2268 /* Extract load statements on memrefs with zero-stride accesses. */
2270 if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo
))
2272 /* In the loop body, we iterate each statement to check if it is a load.
2273 Then we check the DR_STEP of the data reference. If DR_STEP is zero,
2274 then we will hoist the load statement to the loop preheader. */
2276 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
2277 int nbbs
= loop
->num_nodes
;
2279 for (int i
= 0; i
< nbbs
; ++i
)
2281 for (gimple_stmt_iterator si
= gsi_start_bb (bbs
[i
]);
2284 gimple stmt
= gsi_stmt (si
);
2285 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2286 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2288 if (is_gimple_assign (stmt
)
2290 || (DR_IS_READ (dr
) && integer_zerop (DR_STEP (dr
)))))
2296 /* We hoist a statement if all SSA uses in it are defined
2297 outside of the loop. */
2298 FOR_EACH_SSA_TREE_OPERAND (var
, stmt
, iter
, SSA_OP_USE
)
2300 gimple def
= SSA_NAME_DEF_STMT (var
);
2301 if (!gimple_nop_p (def
)
2302 && flow_bb_inside_loop_p (loop
, gimple_bb (def
)))
2312 gimple_set_vuse (stmt
, NULL
);
2314 gsi_remove (&si
, false);
2315 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
),
2318 if (dump_enabled_p ())
2321 (MSG_NOTE
, vect_location
,
2322 "hoisting out of the vectorized loop: ");
2323 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
2324 dump_printf (MSG_NOTE
, "\n");
2334 /* End loop-exit-fixes after versioning. */
2336 if (cond_expr_stmt_list
)
2338 cond_exp_gsi
= gsi_last_bb (condition_bb
);
2339 gsi_insert_seq_before (&cond_exp_gsi
, cond_expr_stmt_list
,
2342 update_ssa (TODO_update_ssa
);