1 /* Vectorizer Specific Loop Manipulations
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
29 #include "tree-pass.h"
31 #include "fold-const.h"
34 #include "gimple-iterator.h"
35 #include "gimplify-me.h"
37 #include "tree-ssa-loop-manip.h"
38 #include "tree-into-ssa.h"
41 #include "tree-scalar-evolution.h"
42 #include "tree-vectorizer.h"
43 #include "tree-ssa-loop-ivopts.h"
45 /*************************************************************************
46 Simple Loop Peeling Utilities
48 Utilities to support loop peeling for vectorization purposes.
49 *************************************************************************/
52 /* Renames the use *OP_P. */
55 rename_use_op (use_operand_p op_p
)
59 if (TREE_CODE (USE_FROM_PTR (op_p
)) != SSA_NAME
)
62 new_name
= get_current_def (USE_FROM_PTR (op_p
));
64 /* Something defined outside of the loop. */
68 /* An ordinary ssa name defined in the loop. */
70 SET_USE (op_p
, new_name
);
74 /* Renames the variables in basic block BB. Allow renaming of PHI argumnets
75 on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
79 rename_variables_in_bb (basic_block bb
, bool rename_from_outer_loop
)
86 struct loop
*loop
= bb
->loop_father
;
87 struct loop
*outer_loop
= NULL
;
89 if (rename_from_outer_loop
)
92 outer_loop
= loop_outer (loop
);
95 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);
98 stmt
= gsi_stmt (gsi
);
99 FOR_EACH_SSA_USE_OPERAND (use_p
, stmt
, iter
, SSA_OP_ALL_USES
)
100 rename_use_op (use_p
);
103 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
105 if (!flow_bb_inside_loop_p (loop
, e
->src
)
106 && (!rename_from_outer_loop
|| e
->src
!= outer_loop
->header
))
108 for (gphi_iterator gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
);
110 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi
.phi (), e
));
121 /* A stack of values to be adjusted in debug stmts. We have to
122 process them LIFO, so that the closest substitution applies. If we
123 processed them FIFO, without the stack, we might substitute uses
124 with a PHI DEF that would soon become non-dominant, and when we got
125 to the suitable one, it wouldn't have anything to substitute any
127 static vec
<adjust_info
, va_heap
> adjust_vec
;
129 /* Adjust any debug stmts that referenced AI->from values to use the
130 loop-closed AI->to, if the references are dominated by AI->bb and
131 not by the definition of AI->from. */
134 adjust_debug_stmts_now (adjust_info
*ai
)
136 basic_block bbphi
= ai
->bb
;
137 tree orig_def
= ai
->from
;
138 tree new_def
= ai
->to
;
139 imm_use_iterator imm_iter
;
141 basic_block bbdef
= gimple_bb (SSA_NAME_DEF_STMT (orig_def
));
143 gcc_assert (dom_info_available_p (CDI_DOMINATORS
));
145 /* Adjust any debug stmts that held onto non-loop-closed
147 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, orig_def
)
152 if (!is_gimple_debug (stmt
))
155 gcc_assert (gimple_debug_bind_p (stmt
));
157 bbuse
= gimple_bb (stmt
);
160 || dominated_by_p (CDI_DOMINATORS
, bbuse
, bbphi
))
162 || dominated_by_p (CDI_DOMINATORS
, bbuse
, bbdef
)))
165 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
166 SET_USE (use_p
, new_def
);
169 gimple_debug_bind_reset_value (stmt
);
176 /* Adjust debug stmts as scheduled before. */
179 adjust_vec_debug_stmts (void)
181 if (!MAY_HAVE_DEBUG_STMTS
)
184 gcc_assert (adjust_vec
.exists ());
186 while (!adjust_vec
.is_empty ())
188 adjust_debug_stmts_now (&adjust_vec
.last ());
193 /* Adjust any debug stmts that referenced FROM values to use the
194 loop-closed TO, if the references are dominated by BB and not by
195 the definition of FROM. If adjust_vec is non-NULL, adjustments
196 will be postponed until adjust_vec_debug_stmts is called. */
199 adjust_debug_stmts (tree from
, tree to
, basic_block bb
)
203 if (MAY_HAVE_DEBUG_STMTS
204 && TREE_CODE (from
) == SSA_NAME
205 && ! SSA_NAME_IS_DEFAULT_DEF (from
)
206 && ! virtual_operand_p (from
))
212 if (adjust_vec
.exists ())
213 adjust_vec
.safe_push (ai
);
215 adjust_debug_stmts_now (&ai
);
219 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
220 to adjust any debug stmts that referenced the old phi arg,
221 presumably non-loop-closed references left over from other
225 adjust_phi_and_debug_stmts (gimple
*update_phi
, edge e
, tree new_def
)
227 tree orig_def
= PHI_ARG_DEF_FROM_EDGE (update_phi
, e
);
229 SET_PHI_ARG_DEF (update_phi
, e
->dest_idx
, new_def
);
231 if (MAY_HAVE_DEBUG_STMTS
)
232 adjust_debug_stmts (orig_def
, PHI_RESULT (update_phi
),
233 gimple_bb (update_phi
));
236 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
237 that starts at zero, increases by one and its limit is NITERS.
239 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
242 slpeel_make_loop_iterate_ntimes (struct loop
*loop
, tree niters
)
244 tree indx_before_incr
, indx_after_incr
;
247 edge exit_edge
= single_exit (loop
);
248 gimple_stmt_iterator loop_cond_gsi
;
249 gimple_stmt_iterator incr_gsi
;
251 tree init
= build_int_cst (TREE_TYPE (niters
), 0);
252 tree step
= build_int_cst (TREE_TYPE (niters
), 1);
253 source_location loop_loc
;
256 orig_cond
= get_loop_exit_condition (loop
);
257 gcc_assert (orig_cond
);
258 loop_cond_gsi
= gsi_for_stmt (orig_cond
);
260 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
261 create_iv (init
, step
, NULL_TREE
, loop
,
262 &incr_gsi
, insert_after
, &indx_before_incr
, &indx_after_incr
);
264 indx_after_incr
= force_gimple_operand_gsi (&loop_cond_gsi
, indx_after_incr
,
265 true, NULL_TREE
, true,
267 niters
= force_gimple_operand_gsi (&loop_cond_gsi
, niters
, true, NULL_TREE
,
268 true, GSI_SAME_STMT
);
270 code
= (exit_edge
->flags
& EDGE_TRUE_VALUE
) ? GE_EXPR
: LT_EXPR
;
271 cond_stmt
= gimple_build_cond (code
, indx_after_incr
, niters
, NULL_TREE
,
274 gsi_insert_before (&loop_cond_gsi
, cond_stmt
, GSI_SAME_STMT
);
276 /* Remove old loop exit test: */
277 gsi_remove (&loop_cond_gsi
, true);
278 free_stmt_vec_info (orig_cond
);
280 loop_loc
= find_loop_location (loop
);
281 if (dump_enabled_p ())
283 if (LOCATION_LOCUS (loop_loc
) != UNKNOWN_LOCATION
)
284 dump_printf (MSG_NOTE
, "\nloop at %s:%d: ", LOCATION_FILE (loop_loc
),
285 LOCATION_LINE (loop_loc
));
286 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, cond_stmt
, 0);
289 /* Record the number of latch iterations. */
290 loop
->nb_iterations
= fold_build2 (MINUS_EXPR
, TREE_TYPE (niters
), niters
,
291 build_int_cst (TREE_TYPE (niters
), 1));
294 /* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg.
295 For all PHI arguments in FROM->dest and TO->dest from those
296 edges ensure that TO->dest PHI arguments have current_def
300 slpeel_duplicate_current_defs_from_edges (edge from
, edge to
)
302 gimple_stmt_iterator gsi_from
, gsi_to
;
304 for (gsi_from
= gsi_start_phis (from
->dest
),
305 gsi_to
= gsi_start_phis (to
->dest
);
306 !gsi_end_p (gsi_from
) && !gsi_end_p (gsi_to
);)
308 gimple
*from_phi
= gsi_stmt (gsi_from
);
309 gimple
*to_phi
= gsi_stmt (gsi_to
);
310 tree from_arg
= PHI_ARG_DEF_FROM_EDGE (from_phi
, from
);
311 tree to_arg
= PHI_ARG_DEF_FROM_EDGE (to_phi
, to
);
312 if (virtual_operand_p (from_arg
))
314 gsi_next (&gsi_from
);
317 if (virtual_operand_p (to_arg
))
322 if (TREE_CODE (from_arg
) != SSA_NAME
)
323 gcc_assert (operand_equal_p (from_arg
, to_arg
, 0));
326 if (get_current_def (to_arg
) == NULL_TREE
)
327 set_current_def (to_arg
, get_current_def (from_arg
));
329 gsi_next (&gsi_from
);
333 gphi
*from_phi
= get_virtual_phi (from
->dest
);
334 gphi
*to_phi
= get_virtual_phi (to
->dest
);
336 set_current_def (PHI_ARG_DEF_FROM_EDGE (to_phi
, to
),
337 get_current_def (PHI_ARG_DEF_FROM_EDGE (from_phi
, from
)));
341 /* Given LOOP this function generates a new copy of it and puts it
342 on E which is either the entry or exit of LOOP. If SCALAR_LOOP is
343 non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
344 basic blocks from SCALAR_LOOP instead of LOOP, but to either the
345 entry or exit of LOOP. */
348 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop
*loop
,
349 struct loop
*scalar_loop
, edge e
)
351 struct loop
*new_loop
;
352 basic_block
*new_bbs
, *bbs
, *pbbs
;
355 basic_block exit_dest
;
357 bool duplicate_outer_loop
= false;
359 exit
= single_exit (loop
);
360 at_exit
= (e
== exit
);
361 if (!at_exit
&& e
!= loop_preheader_edge (loop
))
364 if (scalar_loop
== NULL
)
367 bbs
= XNEWVEC (basic_block
, scalar_loop
->num_nodes
+ 1);
369 get_loop_body_with_size (scalar_loop
, pbbs
, scalar_loop
->num_nodes
);
370 /* Allow duplication of outer loops. */
371 if (scalar_loop
->inner
)
372 duplicate_outer_loop
= true;
373 /* Check whether duplication is possible. */
374 if (!can_copy_bbs_p (pbbs
, scalar_loop
->num_nodes
))
380 /* Generate new loop structure. */
381 new_loop
= duplicate_loop (scalar_loop
, loop_outer (scalar_loop
));
382 duplicate_subloops (scalar_loop
, new_loop
);
384 exit_dest
= exit
->dest
;
385 was_imm_dom
= (get_immediate_dominator (CDI_DOMINATORS
,
386 exit_dest
) == loop
->header
?
389 /* Also copy the pre-header, this avoids jumping through hoops to
390 duplicate the loop entry PHI arguments. Create an empty
391 pre-header unconditionally for this. */
392 basic_block preheader
= split_edge (loop_preheader_edge (scalar_loop
));
393 edge entry_e
= single_pred_edge (preheader
);
395 new_bbs
= XNEWVEC (basic_block
, scalar_loop
->num_nodes
+ 1);
397 exit
= single_exit (scalar_loop
);
398 copy_bbs (bbs
, scalar_loop
->num_nodes
+ 1, new_bbs
,
399 &exit
, 1, &new_exit
, NULL
,
400 at_exit
? loop
->latch
: e
->src
, true);
401 exit
= single_exit (loop
);
402 basic_block new_preheader
= new_bbs
[0];
404 add_phi_args_after_copy (new_bbs
, scalar_loop
->num_nodes
+ 1, NULL
);
406 if (scalar_loop
!= loop
)
408 /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
409 SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop,
410 but LOOP will not. slpeel_update_phi_nodes_for_guard{1,2} expects
411 the LOOP SSA_NAMEs (on the exit edge and edge from latch to
412 header) to have current_def set, so copy them over. */
413 slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop
),
415 slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop
->latch
,
417 EDGE_SUCC (loop
->latch
, 0));
420 if (at_exit
) /* Add the loop copy at exit. */
422 if (scalar_loop
!= loop
)
425 new_exit
= redirect_edge_and_branch (new_exit
, exit_dest
);
427 for (gsi
= gsi_start_phis (exit_dest
); !gsi_end_p (gsi
);
430 gphi
*phi
= gsi
.phi ();
431 tree orig_arg
= PHI_ARG_DEF_FROM_EDGE (phi
, e
);
432 location_t orig_locus
433 = gimple_phi_arg_location_from_edge (phi
, e
);
435 add_phi_arg (phi
, orig_arg
, new_exit
, orig_locus
);
438 redirect_edge_and_branch_force (e
, new_preheader
);
439 flush_pending_stmts (e
);
440 set_immediate_dominator (CDI_DOMINATORS
, new_preheader
, e
->src
);
441 if (was_imm_dom
|| duplicate_outer_loop
)
442 set_immediate_dominator (CDI_DOMINATORS
, exit_dest
, new_exit
->src
);
444 /* And remove the non-necessary forwarder again. Keep the other
445 one so we have a proper pre-header for the loop at the exit edge. */
446 redirect_edge_pred (single_succ_edge (preheader
),
447 single_pred (preheader
));
448 delete_basic_block (preheader
);
449 set_immediate_dominator (CDI_DOMINATORS
, scalar_loop
->header
,
450 loop_preheader_edge (scalar_loop
)->src
);
452 else /* Add the copy at entry. */
454 if (scalar_loop
!= loop
)
456 /* Remove the non-necessary forwarder of scalar_loop again. */
457 redirect_edge_pred (single_succ_edge (preheader
),
458 single_pred (preheader
));
459 delete_basic_block (preheader
);
460 set_immediate_dominator (CDI_DOMINATORS
, scalar_loop
->header
,
461 loop_preheader_edge (scalar_loop
)->src
);
462 preheader
= split_edge (loop_preheader_edge (loop
));
463 entry_e
= single_pred_edge (preheader
);
466 redirect_edge_and_branch_force (entry_e
, new_preheader
);
467 flush_pending_stmts (entry_e
);
468 set_immediate_dominator (CDI_DOMINATORS
, new_preheader
, entry_e
->src
);
470 redirect_edge_and_branch_force (new_exit
, preheader
);
471 flush_pending_stmts (new_exit
);
472 set_immediate_dominator (CDI_DOMINATORS
, preheader
, new_exit
->src
);
474 /* And remove the non-necessary forwarder again. Keep the other
475 one so we have a proper pre-header for the loop at the exit edge. */
476 redirect_edge_pred (single_succ_edge (new_preheader
),
477 single_pred (new_preheader
));
478 delete_basic_block (new_preheader
);
479 set_immediate_dominator (CDI_DOMINATORS
, new_loop
->header
,
480 loop_preheader_edge (new_loop
)->src
);
483 for (unsigned i
= 0; i
< scalar_loop
->num_nodes
+ 1; i
++)
484 rename_variables_in_bb (new_bbs
[i
], duplicate_outer_loop
);
486 if (scalar_loop
!= loop
)
488 /* Update new_loop->header PHIs, so that on the preheader
489 edge they are the ones from loop rather than scalar_loop. */
490 gphi_iterator gsi_orig
, gsi_new
;
491 edge orig_e
= loop_preheader_edge (loop
);
492 edge new_e
= loop_preheader_edge (new_loop
);
494 for (gsi_orig
= gsi_start_phis (loop
->header
),
495 gsi_new
= gsi_start_phis (new_loop
->header
);
496 !gsi_end_p (gsi_orig
) && !gsi_end_p (gsi_new
);
497 gsi_next (&gsi_orig
), gsi_next (&gsi_new
))
499 gphi
*orig_phi
= gsi_orig
.phi ();
500 gphi
*new_phi
= gsi_new
.phi ();
501 tree orig_arg
= PHI_ARG_DEF_FROM_EDGE (orig_phi
, orig_e
);
502 location_t orig_locus
503 = gimple_phi_arg_location_from_edge (orig_phi
, orig_e
);
505 add_phi_arg (new_phi
, orig_arg
, new_e
, orig_locus
);
512 checking_verify_dominators (CDI_DOMINATORS
);
518 /* Given the condition expression COND, put it as the last statement of
519 GUARD_BB; set both edges' probability; set dominator of GUARD_TO to
520 DOM_BB; return the skip edge. GUARD_TO is the target basic block to
521 skip the loop. PROBABILITY is the skip edge's probability. */
524 slpeel_add_loop_guard (basic_block guard_bb
, tree cond
,
525 basic_block guard_to
, basic_block dom_bb
,
528 gimple_stmt_iterator gsi
;
531 gimple_seq gimplify_stmt_list
= NULL
;
533 enter_e
= EDGE_SUCC (guard_bb
, 0);
534 enter_e
->flags
&= ~EDGE_FALLTHRU
;
535 enter_e
->flags
|= EDGE_FALSE_VALUE
;
536 gsi
= gsi_last_bb (guard_bb
);
538 cond
= force_gimple_operand_1 (cond
, &gimplify_stmt_list
, is_gimple_condexpr
,
540 if (gimplify_stmt_list
)
541 gsi_insert_seq_after (&gsi
, gimplify_stmt_list
, GSI_NEW_STMT
);
543 cond_stmt
= gimple_build_cond_from_tree (cond
, NULL_TREE
, NULL_TREE
);
544 gsi
= gsi_last_bb (guard_bb
);
545 gsi_insert_after (&gsi
, cond_stmt
, GSI_NEW_STMT
);
547 /* Add new edge to connect guard block to the merge/loop-exit block. */
548 new_e
= make_edge (guard_bb
, guard_to
, EDGE_TRUE_VALUE
);
550 new_e
->count
= guard_bb
->count
;
551 new_e
->probability
= probability
;
552 new_e
->count
= apply_probability (enter_e
->count
, probability
);
553 enter_e
->count
-= new_e
->count
;
554 enter_e
->probability
= inverse_probability (probability
);
555 set_immediate_dominator (CDI_DOMINATORS
, guard_to
, dom_bb
);
560 /* This function verifies that the following restrictions apply to LOOP:
561 (1) it consists of exactly 2 basic blocks - header, and an empty latch
562 for innermost loop and 5 basic blocks for outer-loop.
563 (2) it is single entry, single exit
564 (3) its exit condition is the last stmt in the header
565 (4) E is the entry/exit edge of LOOP.
569 slpeel_can_duplicate_loop_p (const struct loop
*loop
, const_edge e
)
571 edge exit_e
= single_exit (loop
);
572 edge entry_e
= loop_preheader_edge (loop
);
573 gcond
*orig_cond
= get_loop_exit_condition (loop
);
574 gimple_stmt_iterator loop_exit_gsi
= gsi_last_bb (exit_e
->src
);
575 unsigned int num_bb
= loop
->inner
? 5 : 2;
577 /* All loops have an outer scope; the only case loop->outer is NULL is for
578 the function itself. */
579 if (!loop_outer (loop
)
580 || loop
->num_nodes
!= num_bb
581 || !empty_block_p (loop
->latch
)
582 || !single_exit (loop
)
583 /* Verify that new loop exit condition can be trivially modified. */
584 || (!orig_cond
|| orig_cond
!= gsi_stmt (loop_exit_gsi
))
585 || (e
!= exit_e
&& e
!= entry_e
))
591 /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
592 in the exit bb and rename all the uses after the loop. This simplifies
593 the *guard[12] routines, which assume loop closed SSA form for all PHIs
594 (but normally loop closed SSA form doesn't require virtual PHIs to be
595 in the same form). Doing this early simplifies the checking what
596 uses should be renamed. */
599 create_lcssa_for_virtual_phi (struct loop
*loop
)
602 edge exit_e
= single_exit (loop
);
604 for (gsi
= gsi_start_phis (loop
->header
); !gsi_end_p (gsi
); gsi_next (&gsi
))
605 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi
))))
607 gphi
*phi
= gsi
.phi ();
608 for (gsi
= gsi_start_phis (exit_e
->dest
);
609 !gsi_end_p (gsi
); gsi_next (&gsi
))
610 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi
))))
614 tree new_vop
= copy_ssa_name (PHI_RESULT (phi
));
615 gphi
*new_phi
= create_phi_node (new_vop
, exit_e
->dest
);
616 tree vop
= PHI_ARG_DEF_FROM_EDGE (phi
, EDGE_SUCC (loop
->latch
, 0));
617 imm_use_iterator imm_iter
;
621 add_phi_arg (new_phi
, vop
, exit_e
, UNKNOWN_LOCATION
);
622 gimple_phi_set_result (new_phi
, new_vop
);
623 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, vop
)
625 && !flow_bb_inside_loop_p (loop
, gimple_bb (stmt
)))
626 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
627 SET_USE (use_p
, new_vop
);
634 /* Function vect_get_loop_location.
636 Extract the location of the loop in the source code.
637 If the loop is not well formed for vectorization, an estimated
638 location is calculated.
639 Return the loop location if succeed and NULL if not. */
642 find_loop_location (struct loop
*loop
)
646 gimple_stmt_iterator si
;
649 return UNKNOWN_LOCATION
;
651 stmt
= get_loop_exit_condition (loop
);
654 && LOCATION_LOCUS (gimple_location (stmt
)) > BUILTINS_LOCATION
)
655 return gimple_location (stmt
);
657 /* If we got here the loop is probably not "well formed",
658 try to estimate the loop location */
661 return UNKNOWN_LOCATION
;
665 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
667 stmt
= gsi_stmt (si
);
668 if (LOCATION_LOCUS (gimple_location (stmt
)) > BUILTINS_LOCATION
)
669 return gimple_location (stmt
);
672 return UNKNOWN_LOCATION
;
675 /* Return true if PHI defines an IV of the loop to be vectorized. */
680 if (virtual_operand_p (PHI_RESULT (phi
)))
683 stmt_vec_info stmt_info
= vinfo_for_stmt (phi
);
684 gcc_assert (stmt_info
!= NULL
);
685 if (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
686 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_double_reduction_def
)
692 /* Function vect_can_advance_ivs_p
694 In case the number of iterations that LOOP iterates is unknown at compile
695 time, an epilog loop will be generated, and the loop induction variables
696 (IVs) will be "advanced" to the value they are supposed to take just before
697 the epilog loop. Here we check that the access function of the loop IVs
698 and the expression that represents the loop bound are simple enough.
699 These restrictions will be relaxed in the future. */
702 vect_can_advance_ivs_p (loop_vec_info loop_vinfo
)
704 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
705 basic_block bb
= loop
->header
;
708 /* Analyze phi functions of the loop header. */
710 if (dump_enabled_p ())
711 dump_printf_loc (MSG_NOTE
, vect_location
, "vect_can_advance_ivs_p:\n");
712 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
716 gphi
*phi
= gsi
.phi ();
717 if (dump_enabled_p ())
719 dump_printf_loc (MSG_NOTE
, vect_location
, "Analyze phi: ");
720 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
723 /* Skip virtual phi's. The data dependences that are associated with
724 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.
726 Skip reduction phis. */
729 if (dump_enabled_p ())
730 dump_printf_loc (MSG_NOTE
, vect_location
,
731 "reduc or virtual phi. skip.\n");
735 /* Analyze the evolution function. */
738 = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi
));
739 if (evolution_part
== NULL_TREE
)
741 if (dump_enabled_p ())
742 dump_printf (MSG_MISSED_OPTIMIZATION
,
743 "No access function or evolution.\n");
747 /* FORNOW: We do not transform initial conditions of IVs
748 which evolution functions are not invariants in the loop. */
750 if (!expr_invariant_in_loop_p (loop
, evolution_part
))
752 if (dump_enabled_p ())
753 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
754 "evolution not invariant in loop.\n");
758 /* FORNOW: We do not transform initial conditions of IVs
759 which evolution functions are a polynomial of degree >= 2. */
761 if (tree_is_chrec (evolution_part
))
763 if (dump_enabled_p ())
764 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
765 "evolution is chrec.\n");
774 /* Function vect_update_ivs_after_vectorizer.
776 "Advance" the induction variables of LOOP to the value they should take
777 after the execution of LOOP. This is currently necessary because the
778 vectorizer does not handle induction variables that are used after the
779 loop. Such a situation occurs when the last iterations of LOOP are
781 1. We introduced new uses after LOOP for IVs that were not originally used
782 after LOOP: the IVs of LOOP are now used by an epilog loop.
783 2. LOOP is going to be vectorized; this means that it will iterate N/VF
784 times, whereas the loop IVs should be bumped N times.
787 - LOOP - a loop that is going to be vectorized. The last few iterations
789 - NITERS - the number of iterations that LOOP executes (before it is
790 vectorized). i.e, the number of times the ivs should be bumped.
791 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
792 coming out from LOOP on which there are uses of the LOOP ivs
793 (this is the path from LOOP->exit to epilog_loop->preheader).
795 The new definitions of the ivs are placed in LOOP->exit.
796 The phi args associated with the edge UPDATE_E in the bb
797 UPDATE_E->dest are updated accordingly.
799 Assumption 1: Like the rest of the vectorizer, this function assumes
800 a single loop exit that has a single predecessor.
802 Assumption 2: The phi nodes in the LOOP header and in update_bb are
803 organized in the same order.
805 Assumption 3: The access function of the ivs is simple enough (see
806 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
808 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
809 coming out of LOOP on which the ivs of LOOP are used (this is the path
810 that leads to the epilog loop; other paths skip the epilog loop). This
811 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
812 needs to have its phis updated.
816 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo
,
817 tree niters
, edge update_e
)
819 gphi_iterator gsi
, gsi1
;
820 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
821 basic_block update_bb
= update_e
->dest
;
822 basic_block exit_bb
= single_exit (loop
)->dest
;
824 /* Make sure there exists a single-predecessor exit bb: */
825 gcc_assert (single_pred_p (exit_bb
));
826 gcc_assert (single_succ_edge (exit_bb
) == update_e
);
828 for (gsi
= gsi_start_phis (loop
->header
), gsi1
= gsi_start_phis (update_bb
);
829 !gsi_end_p (gsi
) && !gsi_end_p (gsi1
);
830 gsi_next (&gsi
), gsi_next (&gsi1
))
835 tree var
, ni
, ni_name
;
836 gimple_stmt_iterator last_gsi
;
838 gphi
*phi
= gsi
.phi ();
839 gphi
*phi1
= gsi1
.phi ();
840 if (dump_enabled_p ())
842 dump_printf_loc (MSG_NOTE
, vect_location
,
843 "vect_update_ivs_after_vectorizer: phi: ");
844 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
847 /* Skip reduction and virtual phis. */
850 if (dump_enabled_p ())
851 dump_printf_loc (MSG_NOTE
, vect_location
,
852 "reduc or virtual phi. skip.\n");
856 type
= TREE_TYPE (gimple_phi_result (phi
));
857 step_expr
= STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi
));
858 step_expr
= unshare_expr (step_expr
);
860 /* FORNOW: We do not support IVs whose evolution function is a polynomial
861 of degree >= 2 or exponential. */
862 gcc_assert (!tree_is_chrec (step_expr
));
864 init_expr
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
866 off
= fold_build2 (MULT_EXPR
, TREE_TYPE (step_expr
),
867 fold_convert (TREE_TYPE (step_expr
), niters
),
869 if (POINTER_TYPE_P (type
))
870 ni
= fold_build_pointer_plus (init_expr
, off
);
872 ni
= fold_build2 (PLUS_EXPR
, type
,
873 init_expr
, fold_convert (type
, off
));
875 var
= create_tmp_var (type
, "tmp");
877 last_gsi
= gsi_last_bb (exit_bb
);
878 gimple_seq new_stmts
= NULL
;
879 ni_name
= force_gimple_operand (ni
, &new_stmts
, false, var
);
880 /* Exit_bb shouldn't be empty. */
881 if (!gsi_end_p (last_gsi
))
882 gsi_insert_seq_after (&last_gsi
, new_stmts
, GSI_SAME_STMT
);
884 gsi_insert_seq_before (&last_gsi
, new_stmts
, GSI_SAME_STMT
);
886 /* Fix phi expressions in the successor bb. */
887 adjust_phi_and_debug_stmts (phi1
, update_e
, ni_name
);
891 /* Function vect_gen_prolog_loop_niters
893 Generate the number of iterations which should be peeled as prolog for the
894 loop represented by LOOP_VINFO. It is calculated as the misalignment of
895 DR - the data reference recorded in LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).
896 As a result, after the execution of this loop, the data reference DR will
897 refer to an aligned location. The following computation is generated:
899 If the misalignment of DR is known at compile time:
900 addr_mis = int mis = DR_MISALIGNMENT (dr);
901 Else, compute address misalignment in bytes:
902 addr_mis = addr & (vectype_align - 1)
904 prolog_niters = ((VF - addr_mis/elem_size)&(VF-1))/step
906 (elem_size = element type size; an element is the scalar element whose type
907 is the inner type of the vectype)
909 The computations will be emitted at the end of BB. We also compute and
910 store upper bound (included) of the result in BOUND.
912 When the step of the data-ref in the loop is not 1 (as in interleaved data
913 and SLP), the number of iterations of the prolog must be divided by the step
914 (which is equal to the size of interleaved group).
916 The above formulas assume that VF == number of elements in the vector. This
917 may not hold when there are multiple-types in the loop.
918 In this case, for some data-references in the loop the VF does not represent
919 the number of elements that fit in the vector. Therefore, instead of VF we
920 use TYPE_VECTOR_SUBPARTS. */
923 vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo
,
924 basic_block bb
, int *bound
)
926 struct data_reference
*dr
= LOOP_VINFO_UNALIGNED_DR (loop_vinfo
);
927 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
929 tree niters_type
= TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo
));
930 gimple_seq stmts
= NULL
, new_stmts
= NULL
;
931 tree iters
, iters_name
;
932 gimple
*dr_stmt
= DR_STMT (dr
);
933 stmt_vec_info stmt_info
= vinfo_for_stmt (dr_stmt
);
934 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
935 int vectype_align
= TYPE_ALIGN (vectype
) / BITS_PER_UNIT
;
936 int nelements
= TYPE_VECTOR_SUBPARTS (vectype
);
938 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo
) > 0)
940 int npeel
= LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo
);
942 if (dump_enabled_p ())
943 dump_printf_loc (MSG_NOTE
, vect_location
,
944 "known peeling = %d.\n", npeel
);
946 iters
= build_int_cst (niters_type
, npeel
);
947 *bound
= LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo
);
951 bool negative
= tree_int_cst_compare (DR_STEP (dr
), size_zero_node
) < 0;
952 tree offset
= negative
953 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1) : size_zero_node
;
954 tree start_addr
= vect_create_addr_base_for_vector_ref (dr_stmt
,
955 &stmts
, offset
, loop
);
956 tree type
= unsigned_type_for (TREE_TYPE (start_addr
));
957 tree vectype_align_minus_1
= build_int_cst (type
, vectype_align
- 1);
958 HOST_WIDE_INT elem_size
=
959 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
960 tree elem_size_log
= build_int_cst (type
, exact_log2 (elem_size
));
961 tree nelements_minus_1
= build_int_cst (type
, nelements
- 1);
962 tree nelements_tree
= build_int_cst (type
, nelements
);
966 /* Create: byte_misalign = addr & (vectype_align - 1) */
968 fold_build2 (BIT_AND_EXPR
, type
, fold_convert (type
, start_addr
),
969 vectype_align_minus_1
);
971 /* Create: elem_misalign = byte_misalign / element_size */
973 fold_build2 (RSHIFT_EXPR
, type
, byte_misalign
, elem_size_log
);
975 /* Create: (niters_type) (nelements - elem_misalign)&(nelements - 1) */
977 iters
= fold_build2 (MINUS_EXPR
, type
, elem_misalign
, nelements_tree
);
979 iters
= fold_build2 (MINUS_EXPR
, type
, nelements_tree
, elem_misalign
);
980 iters
= fold_build2 (BIT_AND_EXPR
, type
, iters
, nelements_minus_1
);
981 iters
= fold_convert (niters_type
, iters
);
982 *bound
= nelements
- 1;
985 if (dump_enabled_p ())
987 dump_printf_loc (MSG_NOTE
, vect_location
,
988 "niters for prolog loop: ");
989 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, iters
);
990 dump_printf (MSG_NOTE
, "\n");
993 var
= create_tmp_var (niters_type
, "prolog_loop_niters");
994 iters_name
= force_gimple_operand (iters
, &new_stmts
, false, var
);
997 gimple_seq_add_seq (&stmts
, new_stmts
);
1000 gcc_assert (single_succ_p (bb
));
1001 gimple_stmt_iterator gsi
= gsi_last_bb (bb
);
1002 if (gsi_end_p (gsi
))
1003 gsi_insert_seq_before (&gsi
, stmts
, GSI_SAME_STMT
);
1005 gsi_insert_seq_after (&gsi
, stmts
, GSI_SAME_STMT
);
1011 /* Function vect_update_init_of_dr
1013 NITERS iterations were peeled from LOOP. DR represents a data reference
1014 in LOOP. This function updates the information recorded in DR to
1015 account for the fact that the first NITERS iterations had already been
1016 executed. Specifically, it updates the OFFSET field of DR. */
1019 vect_update_init_of_dr (struct data_reference
*dr
, tree niters
)
1021 tree offset
= DR_OFFSET (dr
);
1023 niters
= fold_build2 (MULT_EXPR
, sizetype
,
1024 fold_convert (sizetype
, niters
),
1025 fold_convert (sizetype
, DR_STEP (dr
)));
1026 offset
= fold_build2 (PLUS_EXPR
, sizetype
,
1027 fold_convert (sizetype
, offset
), niters
);
1028 DR_OFFSET (dr
) = offset
;
1032 /* Function vect_update_inits_of_drs
1034 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1035 This function updates the information recorded for the data references in
1036 the loop to account for the fact that the first NITERS iterations had
1037 already been executed. Specifically, it updates the initial_condition of
1038 the access_function of all the data_references in the loop. */
1041 vect_update_inits_of_drs (loop_vec_info loop_vinfo
, tree niters
)
1044 vec
<data_reference_p
> datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1045 struct data_reference
*dr
;
1047 if (dump_enabled_p ())
1048 dump_printf_loc (MSG_NOTE
, vect_location
,
1049 "=== vect_update_inits_of_dr ===\n");
1051 /* Adjust niters to sizetype and insert stmts on loop preheader edge. */
1052 if (!types_compatible_p (sizetype
, TREE_TYPE (niters
)))
1055 edge pe
= loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo
));
1056 tree var
= create_tmp_var (sizetype
, "prolog_loop_adjusted_niters");
1058 niters
= fold_convert (sizetype
, niters
);
1059 niters
= force_gimple_operand (niters
, &seq
, false, var
);
1062 basic_block new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
1063 gcc_assert (!new_bb
);
1067 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
1068 vect_update_init_of_dr (dr
, niters
);
1072 /* This function builds ni_name = number of iterations. Statements
1073 are emitted on the loop preheader edge. */
1076 vect_build_loop_niters (loop_vec_info loop_vinfo
)
1078 tree ni
= unshare_expr (LOOP_VINFO_NITERS (loop_vinfo
));
1079 if (TREE_CODE (ni
) == INTEGER_CST
)
1084 gimple_seq stmts
= NULL
;
1085 edge pe
= loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo
));
1087 var
= create_tmp_var (TREE_TYPE (ni
), "niters");
1088 ni_name
= force_gimple_operand (ni
, &stmts
, false, var
);
1090 gsi_insert_seq_on_edge_immediate (pe
, stmts
);
1096 /* Calculate the number of iterations above which vectorized loop will be
1097 preferred than scalar loop. NITERS_PROLOG is the number of iterations
1098 of prolog loop. If it's integer const, the integer number is also passed
1099 in INT_NITERS_PROLOG. BOUND_PROLOG is the upper bound (included) of
1100 number of iterations of prolog loop. VFM1 is vector factor minus one.
1101 If CHECK_PROFITABILITY is true, TH is the threshold below which scalar
1102 (rather than vectorized) loop will be executed. This function stores
1103 upper bound (included) of the result in BOUND_SCALAR. */
1106 vect_gen_scalar_loop_niters (tree niters_prolog
, int int_niters_prolog
,
1107 int bound_prolog
, int vfm1
, int th
,
1108 int *bound_scalar
, bool check_profitability
)
1110 tree type
= TREE_TYPE (niters_prolog
);
1111 tree niters
= fold_build2 (PLUS_EXPR
, type
, niters_prolog
,
1112 build_int_cst (type
, vfm1
));
1114 *bound_scalar
= vfm1
+ bound_prolog
;
1115 if (check_profitability
)
1117 /* TH indicates the minimum niters of vectorized loop, while we
1118 compute the maximum niters of scalar loop. */
1120 /* Peeling for constant times. */
1121 if (int_niters_prolog
>= 0)
1123 *bound_scalar
= (int_niters_prolog
+ vfm1
< th
1125 : vfm1
+ int_niters_prolog
);
1126 return build_int_cst (type
, *bound_scalar
);
1128 /* Peeling for unknown times. Note BOUND_PROLOG is the upper
1129 bound (inlcuded) of niters of prolog loop. */
1130 if (th
>= vfm1
+ bound_prolog
)
1133 return build_int_cst (type
, th
);
1135 /* Need to do runtime comparison, but BOUND_SCALAR remains the same. */
1137 return fold_build2 (MAX_EXPR
, type
, build_int_cst (type
, th
), niters
);
1142 /* This function generates the following statements:
1144 niters = number of iterations loop executes (after peeling)
1145 niters_vector = niters / vf
1147 and places them on the loop preheader edge. NITERS_NO_OVERFLOW is
1148 true if NITERS doesn't overflow. */
1151 vect_gen_vector_loop_niters (loop_vec_info loop_vinfo
, tree niters
,
1152 tree
*niters_vector_ptr
, bool niters_no_overflow
)
1154 tree ni_minus_gap
, var
;
1156 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1157 edge pe
= loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo
));
1158 tree log_vf
= build_int_cst (TREE_TYPE (niters
), exact_log2 (vf
));
1160 /* If epilogue loop is required because of data accesses with gaps, we
1161 subtract one iteration from the total number of iterations here for
1162 correct calculation of RATIO. */
1163 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
))
1165 ni_minus_gap
= fold_build2 (MINUS_EXPR
, TREE_TYPE (niters
),
1167 build_one_cst (TREE_TYPE (niters
)));
1168 if (!is_gimple_val (ni_minus_gap
))
1170 var
= create_tmp_var (TREE_TYPE (niters
), "ni_gap");
1171 gimple
*stmts
= NULL
;
1172 ni_minus_gap
= force_gimple_operand (ni_minus_gap
, &stmts
,
1174 gsi_insert_seq_on_edge_immediate (pe
, stmts
);
1178 ni_minus_gap
= niters
;
1180 /* Create: niters >> log2(vf) */
1181 /* If it's known that niters == number of latch executions + 1 doesn't
1182 overflow, we can generate niters >> log2(vf); otherwise we generate
1183 (niters - vf) >> log2(vf) + 1 by using the fact that we know ratio
1184 will be at least one. */
1185 if (niters_no_overflow
)
1186 niters_vector
= fold_build2 (RSHIFT_EXPR
, TREE_TYPE (niters
),
1187 ni_minus_gap
, log_vf
);
1190 = fold_build2 (PLUS_EXPR
, TREE_TYPE (niters
),
1191 fold_build2 (RSHIFT_EXPR
, TREE_TYPE (niters
),
1192 fold_build2 (MINUS_EXPR
, TREE_TYPE (niters
),
1195 (TREE_TYPE (niters
), vf
)),
1197 build_int_cst (TREE_TYPE (niters
), 1));
1199 if (!is_gimple_val (niters_vector
))
1201 var
= create_tmp_var (TREE_TYPE (niters
), "bnd");
1202 gimple
*stmts
= NULL
;
1203 niters_vector
= force_gimple_operand (niters_vector
, &stmts
, true, var
);
1204 gsi_insert_seq_on_edge_immediate (pe
, stmts
);
1206 *niters_vector_ptr
= niters_vector
;
1211 /* Given NITERS_VECTOR which is the number of iterations for vectorized
1212 loop specified by LOOP_VINFO after vectorization, compute the number
1213 of iterations before vectorization (niters_vector * vf) and store it
1214 to NITERS_VECTOR_MULT_VF_PTR. */
1217 vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo
,
1219 tree
*niters_vector_mult_vf_ptr
)
1221 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1222 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1223 tree type
= TREE_TYPE (niters_vector
);
1224 tree log_vf
= build_int_cst (type
, exact_log2 (vf
));
1225 basic_block exit_bb
= single_exit (loop
)->dest
;
1227 gcc_assert (niters_vector_mult_vf_ptr
!= NULL
);
1228 tree niters_vector_mult_vf
= fold_build2 (LSHIFT_EXPR
, type
,
1229 niters_vector
, log_vf
);
1230 if (!is_gimple_val (niters_vector_mult_vf
))
1232 tree var
= create_tmp_var (type
, "niters_vector_mult_vf");
1233 gimple_seq stmts
= NULL
;
1234 niters_vector_mult_vf
= force_gimple_operand (niters_vector_mult_vf
,
1236 gimple_stmt_iterator gsi
= gsi_start_bb (exit_bb
);
1237 gsi_insert_seq_before (&gsi
, stmts
, GSI_SAME_STMT
);
1239 *niters_vector_mult_vf_ptr
= niters_vector_mult_vf
;
1242 /* Function slpeel_tree_duplicate_loop_to_edge_cfg duplciates FIRST/SECOND
1243 from SECOND/FIRST and puts it at the original loop's preheader/exit
1244 edge, the two loops are arranged as below:
1249 i_1 = PHI<i_0, i_2>;
1260 ;; i_x = PHI<i_2>; ;; LCSSA phi node to be created for FIRST,
1264 i_3 = PHI<i_0, i_4>; ;; Use of i_0 to be replaced with i_x,
1265 or with i_2 if no LCSSA phi is created
1266 under condition of CREATE_LCSSA_FOR_IV_PHIS.
1278 This function creates loop closed SSA for the first loop; update the
1279 second loop's PHI nodes by replacing argument on incoming edge with the
1280 result of newly created lcssa PHI nodes. IF CREATE_LCSSA_FOR_IV_PHIS
1281 is false, Loop closed ssa phis will only be created for non-iv phis for
1284 This function assumes exit bb of the first loop is preheader bb of the
1285 second loop, i.e, between_bb in the example code. With PHIs updated,
1286 the second loop will execute rest iterations of the first. */
1289 slpeel_update_phi_nodes_for_loops (loop_vec_info loop_vinfo
,
1290 struct loop
*first
, struct loop
*second
,
1291 bool create_lcssa_for_iv_phis
)
1293 gphi_iterator gsi_update
, gsi_orig
;
1294 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1296 edge first_latch_e
= EDGE_SUCC (first
->latch
, 0);
1297 edge second_preheader_e
= loop_preheader_edge (second
);
1298 basic_block between_bb
= single_exit (first
)->dest
;
1300 gcc_assert (between_bb
== second_preheader_e
->src
);
1301 gcc_assert (single_pred_p (between_bb
) && single_succ_p (between_bb
));
1302 /* Either the first loop or the second is the loop to be vectorized. */
1303 gcc_assert (loop
== first
|| loop
== second
);
1305 for (gsi_orig
= gsi_start_phis (first
->header
),
1306 gsi_update
= gsi_start_phis (second
->header
);
1307 !gsi_end_p (gsi_orig
) && !gsi_end_p (gsi_update
);
1308 gsi_next (&gsi_orig
), gsi_next (&gsi_update
))
1310 gphi
*orig_phi
= gsi_orig
.phi ();
1311 gphi
*update_phi
= gsi_update
.phi ();
1313 tree arg
= PHI_ARG_DEF_FROM_EDGE (orig_phi
, first_latch_e
);
1314 /* Generate lcssa PHI node for the first loop. */
1315 gphi
*vect_phi
= (loop
== first
) ? orig_phi
: update_phi
;
1316 if (create_lcssa_for_iv_phis
|| !iv_phi_p (vect_phi
))
1318 tree new_res
= copy_ssa_name (PHI_RESULT (orig_phi
));
1319 gphi
*lcssa_phi
= create_phi_node (new_res
, between_bb
);
1320 add_phi_arg (lcssa_phi
, arg
, single_exit (first
), UNKNOWN_LOCATION
);
1324 /* Update PHI node in the second loop by replacing arg on the loop's
1326 adjust_phi_and_debug_stmts (update_phi
, second_preheader_e
, arg
);
1330 /* Function slpeel_add_loop_guard adds guard skipping from the beginning
1331 of SKIP_LOOP to the beginning of UPDATE_LOOP. GUARD_EDGE and MERGE_EDGE
1332 are two pred edges of the merge point before UPDATE_LOOP. The two loops
1343 i_1 = PHI<i_0, i_2>;
1357 ;; PHI (i_x = PHI<i_0, i_5>) to be created at merge point.
1361 i_3 = PHI<i_5, i_4>; ;; Use of i_5 to be replaced with i_x.
1373 This function creates PHI nodes at merge_bb and replaces the use of i_5
1374 in the update_loop's PHI node with the result of new PHI result. */
1377 slpeel_update_phi_nodes_for_guard1 (struct loop
*skip_loop
,
1378 struct loop
*update_loop
,
1379 edge guard_edge
, edge merge_edge
)
1381 source_location merge_loc
, guard_loc
;
1382 edge orig_e
= loop_preheader_edge (skip_loop
);
1383 edge update_e
= loop_preheader_edge (update_loop
);
1384 gphi_iterator gsi_orig
, gsi_update
;
1386 for ((gsi_orig
= gsi_start_phis (skip_loop
->header
),
1387 gsi_update
= gsi_start_phis (update_loop
->header
));
1388 !gsi_end_p (gsi_orig
) && !gsi_end_p (gsi_update
);
1389 gsi_next (&gsi_orig
), gsi_next (&gsi_update
))
1391 gphi
*orig_phi
= gsi_orig
.phi ();
1392 gphi
*update_phi
= gsi_update
.phi ();
1394 /* Generate new phi node at merge bb of the guard. */
1395 tree new_res
= copy_ssa_name (PHI_RESULT (orig_phi
));
1396 gphi
*new_phi
= create_phi_node (new_res
, guard_edge
->dest
);
1398 /* Merge bb has two incoming edges: GUARD_EDGE and MERGE_EDGE. Set the
1399 args in NEW_PHI for these edges. */
1400 tree merge_arg
= PHI_ARG_DEF_FROM_EDGE (update_phi
, update_e
);
1401 tree guard_arg
= PHI_ARG_DEF_FROM_EDGE (orig_phi
, orig_e
);
1402 merge_loc
= gimple_phi_arg_location_from_edge (update_phi
, update_e
);
1403 guard_loc
= gimple_phi_arg_location_from_edge (orig_phi
, orig_e
);
1404 add_phi_arg (new_phi
, merge_arg
, merge_edge
, merge_loc
);
1405 add_phi_arg (new_phi
, guard_arg
, guard_edge
, guard_loc
);
1407 /* Update phi in UPDATE_PHI. */
1408 adjust_phi_and_debug_stmts (update_phi
, update_e
, new_res
);
1412 /* LCSSA_PHI is a lcssa phi of EPILOG loop which is copied from LOOP,
1413 this function searches for the corresponding lcssa phi node in exit
1414 bb of LOOP. If it is found, return the phi result; otherwise return
1418 find_guard_arg (struct loop
*loop
, struct loop
*epilog ATTRIBUTE_UNUSED
,
1422 edge e
= single_exit (loop
);
1424 gcc_assert (single_pred_p (e
->dest
));
1425 for (gsi
= gsi_start_phis (e
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1427 gphi
*phi
= gsi
.phi ();
1428 if (operand_equal_p (PHI_ARG_DEF (phi
, 0),
1429 PHI_ARG_DEF (lcssa_phi
, 0), 0))
1430 return PHI_RESULT (phi
);
1435 /* LOOP and EPILOG are two consecutive loops in CFG and EPILOG is copied
1436 from LOOP. Function slpeel_add_loop_guard adds guard skipping from a
1437 point between the two loops to the end of EPILOG. Edges GUARD_EDGE
1438 and MERGE_EDGE are the two pred edges of merge_bb at the end of EPILOG.
1443 i_1 = PHI<i_0, i_2>;
1465 i_3 = PHI<i_2, i_4>;
1476 ; PHI node (i_y = PHI<i_2, i_4>) to be created at merge point.
1479 i_x = PHI<i_4>; ;Use of i_4 to be replaced with i_y in merge_bb.
1481 For each name used out side EPILOG (i.e - for each name that has a lcssa
1482 phi in exit_bb) we create a new PHI in merge_bb. The new PHI has two
1483 args corresponding to GUARD_EDGE and MERGE_EDGE. Arg for MERGE_EDGE is
1484 the arg of the original PHI in exit_bb, arg for GUARD_EDGE is defined
1485 by LOOP and is found in the exit bb of LOOP. Arg of the original PHI
1486 in exit_bb will also be updated. */
1489 slpeel_update_phi_nodes_for_guard2 (struct loop
*loop
, struct loop
*epilog
,
1490 edge guard_edge
, edge merge_edge
)
1493 basic_block merge_bb
= guard_edge
->dest
;
1495 gcc_assert (single_succ_p (merge_bb
));
1496 edge e
= single_succ_edge (merge_bb
);
1497 basic_block exit_bb
= e
->dest
;
1498 gcc_assert (single_pred_p (exit_bb
));
1499 gcc_assert (single_pred (exit_bb
) == single_exit (epilog
)->dest
);
1501 for (gsi
= gsi_start_phis (exit_bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1503 gphi
*update_phi
= gsi
.phi ();
1504 tree old_arg
= PHI_ARG_DEF (update_phi
, 0);
1505 /* This loop-closed-phi actually doesn't represent a use out of the
1506 loop - the phi arg is a constant. */
1507 if (TREE_CODE (old_arg
) != SSA_NAME
)
1510 tree merge_arg
= get_current_def (old_arg
);
1512 merge_arg
= old_arg
;
1514 tree guard_arg
= find_guard_arg (loop
, epilog
, update_phi
);
1515 /* If the var is live after loop but not a reduction, we simply
1518 guard_arg
= old_arg
;
1520 /* Create new phi node in MERGE_BB: */
1521 tree new_res
= copy_ssa_name (PHI_RESULT (update_phi
));
1522 gphi
*merge_phi
= create_phi_node (new_res
, merge_bb
);
1524 /* MERGE_BB has two incoming edges: GUARD_EDGE and MERGE_EDGE, Set
1525 the two PHI args in merge_phi for these edges. */
1526 add_phi_arg (merge_phi
, merge_arg
, merge_edge
, UNKNOWN_LOCATION
);
1527 add_phi_arg (merge_phi
, guard_arg
, guard_edge
, UNKNOWN_LOCATION
);
1529 /* Update the original phi in exit_bb. */
1530 adjust_phi_and_debug_stmts (update_phi
, e
, new_res
);
1534 /* EPILOG loop is duplicated from the original loop for vectorizing,
1535 the arg of its loop closed ssa PHI needs to be updated. */
1538 slpeel_update_phi_nodes_for_lcssa (struct loop
*epilog
)
1541 basic_block exit_bb
= single_exit (epilog
)->dest
;
1543 gcc_assert (single_pred_p (exit_bb
));
1544 edge e
= EDGE_PRED (exit_bb
, 0);
1545 for (gsi
= gsi_start_phis (exit_bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1546 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi
.phi (), e
));
1549 /* Function vect_do_peeling.
1552 - LOOP_VINFO: Represent a loop to be vectorized, which looks like:
1558 if (exit_loop_cond) goto exit_bb
1562 - NITERS: The number of iterations of the loop.
1563 - NITERSM1: The number of iterations of the loop's latch.
1564 - NITERS_NO_OVERFLOW: No overflow in computing NITERS.
1565 - TH, CHECK_PROFITABILITY: Threshold of niters to vectorize loop if
1566 CHECK_PROFITABILITY is true.
1568 - NITERS_VECTOR: The number of iterations of loop after vectorization.
1570 This function peels prolog and epilog from the loop, adds guards skipping
1571 PROLOG and EPILOG for various conditions. As a result, the changed CFG
1575 if (prefer_scalar_loop) goto merge_bb_1
1576 else goto guard_bb_2
1579 if (skip_prolog) goto merge_bb_2
1580 else goto prolog_preheader
1586 if (exit_prolog_cond) goto prolog_exit_bb
1587 else goto prolog_header_bb
1596 if (exit_vector_cond) goto vector_exit_bb
1597 else goto vector_header_bb
1601 if (skip_epilog) goto merge_bb_3
1602 else goto epilog_preheader
1610 if (exit_epilog_cond) goto merge_bb_3
1611 else goto epilog_header_bb
1615 Note this function peels prolog and epilog only if it's necessary,
1617 Returns created epilogue or NULL.
1619 TODO: Guard for prefer_scalar_loop should be emitted along with
1620 versioning conditions if loop versioning is needed. */
1624 vect_do_peeling (loop_vec_info loop_vinfo
, tree niters
, tree nitersm1
,
1625 tree
*niters_vector
, int th
, bool check_profitability
,
1626 bool niters_no_overflow
)
1629 tree type
= TREE_TYPE (niters
), guard_cond
;
1630 basic_block guard_bb
, guard_to
;
1631 int prob_prolog
, prob_vector
, prob_epilog
;
1632 int bound_prolog
= 0, bound_scalar
= 0, bound
= 0;
1633 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1634 int prolog_peeling
= LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo
);
1635 bool epilog_peeling
= (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo
)
1636 || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
));
1638 if (!prolog_peeling
&& !epilog_peeling
)
1641 prob_vector
= 9 * REG_BR_PROB_BASE
/ 10;
1642 if ((vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
)) == 2)
1644 prob_prolog
= prob_epilog
= (vf
- 1) * REG_BR_PROB_BASE
/ vf
;
1645 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1647 struct loop
*prolog
, *epilog
= NULL
, *loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1648 struct loop
*first_loop
= loop
;
1649 create_lcssa_for_virtual_phi (loop
);
1650 update_ssa (TODO_update_ssa_only_virtuals
);
1652 if (MAY_HAVE_DEBUG_STMTS
)
1654 gcc_assert (!adjust_vec
.exists ());
1655 adjust_vec
.create (32);
1657 initialize_original_copy_tables ();
1659 /* Prolog loop may be skipped. */
1660 bool skip_prolog
= (prolog_peeling
!= 0);
1661 /* Skip to epilog if scalar loop may be preferred. It's only used when
1662 we peel for epilog loop. */
1663 bool skip_vector
= (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
));
1664 /* Epilog loop must be executed if the number of iterations for epilog
1665 loop is known at compile time, otherwise we need to add a check at
1666 the end of vector loop and skip to the end of epilog loop. */
1667 bool skip_epilog
= (prolog_peeling
< 0
1668 || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
));
1669 /* PEELING_FOR_GAPS is special because epilog loop must be executed. */
1670 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
))
1671 skip_epilog
= false;
1673 /* Record the anchor bb at which guard should be placed if scalar loop
1674 may be preferred. */
1675 basic_block anchor
= loop_preheader_edge (loop
)->src
;
1677 split_edge (loop_preheader_edge (loop
));
1679 tree niters_prolog
= build_int_cst (type
, 0);
1680 source_location loop_loc
= find_loop_location (loop
);
1681 struct loop
*scalar_loop
= LOOP_VINFO_SCALAR_LOOP (loop_vinfo
);
1684 e
= loop_preheader_edge (loop
);
1685 if (!slpeel_can_duplicate_loop_p (loop
, e
))
1687 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, loop_loc
,
1688 "loop can't be duplicated to preheader edge.\n");
1691 /* Peel prolog and put it on preheader edge of loop. */
1692 prolog
= slpeel_tree_duplicate_loop_to_edge_cfg (loop
, scalar_loop
, e
);
1695 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, loop_loc
,
1696 "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1699 slpeel_update_phi_nodes_for_loops (loop_vinfo
, prolog
, loop
, true);
1700 first_loop
= prolog
;
1701 reset_original_copy_tables ();
1703 /* Generate and update the number of iterations for prolog loop. */
1704 niters_prolog
= vect_gen_prolog_loop_niters (loop_vinfo
, anchor
,
1706 slpeel_make_loop_iterate_ntimes (prolog
, niters_prolog
);
1708 /* Skip the prolog loop. */
1711 guard_cond
= fold_build2 (EQ_EXPR
, boolean_type_node
,
1712 niters_prolog
, build_int_cst (type
, 0));
1713 guard_bb
= loop_preheader_edge (prolog
)->src
;
1714 guard_to
= split_edge (loop_preheader_edge (loop
));
1715 guard_e
= slpeel_add_loop_guard (guard_bb
, guard_cond
,
1717 inverse_probability (prob_prolog
));
1718 e
= EDGE_PRED (guard_to
, 0);
1719 e
= (e
!= guard_e
? e
: EDGE_PRED (guard_to
, 1));
1720 slpeel_update_phi_nodes_for_guard1 (prolog
, loop
, guard_e
, e
);
1721 scale_loop_profile (prolog
, prob_prolog
, bound_prolog
);
1723 /* Update init address of DRs. */
1724 vect_update_inits_of_drs (loop_vinfo
, niters_prolog
);
1725 /* Update niters for vector loop. */
1726 LOOP_VINFO_NITERS (loop_vinfo
)
1727 = fold_build2 (MINUS_EXPR
, type
, niters
, niters_prolog
);
1728 LOOP_VINFO_NITERSM1 (loop_vinfo
)
1729 = fold_build2 (MINUS_EXPR
, type
,
1730 LOOP_VINFO_NITERSM1 (loop_vinfo
), niters_prolog
);
1731 niters
= vect_build_loop_niters (loop_vinfo
);
1733 /* Prolog iterates at most bound_prolog times, latch iterates at
1734 most bound_prolog - 1 times. */
1735 record_niter_bound (prolog
, bound_prolog
- 1, false, true);
1736 delete_update_ssa ();
1737 adjust_vec_debug_stmts ();
1743 e
= single_exit (loop
);
1744 if (!slpeel_can_duplicate_loop_p (loop
, e
))
1746 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, loop_loc
,
1747 "loop can't be duplicated to exit edge.\n");
1750 /* Peel epilog and put it on exit edge of loop. */
1751 epilog
= slpeel_tree_duplicate_loop_to_edge_cfg (loop
, scalar_loop
, e
);
1754 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, loop_loc
,
1755 "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1758 slpeel_update_phi_nodes_for_loops (loop_vinfo
, loop
, epilog
, false);
1760 /* Scalar version loop may be preferred. In this case, add guard
1761 and skip to epilog. Note this only happens when the number of
1762 iterations of loop is unknown at compile time, otherwise this
1763 won't be vectorized. */
1766 /* Additional epilogue iteration is peeled if gap exists. */
1767 bool peel_for_gaps
= LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
);
1768 tree t
= vect_gen_scalar_loop_niters (niters_prolog
, prolog_peeling
,
1770 peel_for_gaps
? vf
: vf
- 1,
1772 check_profitability
);
1773 /* Build guard against NITERSM1 since NITERS may overflow. */
1774 guard_cond
= fold_build2 (LT_EXPR
, boolean_type_node
, nitersm1
, t
);
1776 guard_to
= split_edge (loop_preheader_edge (epilog
));
1777 guard_e
= slpeel_add_loop_guard (guard_bb
, guard_cond
,
1779 inverse_probability (prob_vector
));
1780 e
= EDGE_PRED (guard_to
, 0);
1781 e
= (e
!= guard_e
? e
: EDGE_PRED (guard_to
, 1));
1782 slpeel_update_phi_nodes_for_guard1 (first_loop
, epilog
, guard_e
, e
);
1783 scale_loop_profile (epilog
, prob_vector
, bound_scalar
);
1786 tree niters_vector_mult_vf
;
1787 /* If loop is peeled for non-zero constant times, now niters refers to
1788 orig_niters - prolog_peeling, it won't overflow even the orig_niters
1790 niters_no_overflow
|= (prolog_peeling
> 0);
1791 vect_gen_vector_loop_niters (loop_vinfo
, niters
,
1792 niters_vector
, niters_no_overflow
);
1793 vect_gen_vector_loop_niters_mult_vf (loop_vinfo
, *niters_vector
,
1794 &niters_vector_mult_vf
);
1795 /* Update IVs of original loop as if they were advanced by
1796 niters_vector_mult_vf steps. */
1797 gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo
));
1798 edge update_e
= skip_vector
? e
: loop_preheader_edge (epilog
);
1799 vect_update_ivs_after_vectorizer (loop_vinfo
, niters_vector_mult_vf
,
1804 guard_cond
= fold_build2 (EQ_EXPR
, boolean_type_node
,
1805 niters
, niters_vector_mult_vf
);
1806 guard_bb
= single_exit (loop
)->dest
;
1807 guard_to
= split_edge (single_exit (epilog
));
1808 guard_e
= slpeel_add_loop_guard (guard_bb
, guard_cond
, guard_to
,
1809 skip_vector
? anchor
: guard_bb
,
1810 inverse_probability (prob_epilog
));
1811 slpeel_update_phi_nodes_for_guard2 (loop
, epilog
, guard_e
,
1812 single_exit (epilog
));
1813 scale_loop_profile (epilog
, prob_epilog
, bound
);
1816 slpeel_update_phi_nodes_for_lcssa (epilog
);
1818 bound
= LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) ? vf
- 1 : vf
- 2;
1819 /* We share epilog loop with scalar version loop. */
1820 bound
= MAX (bound
, bound_scalar
- 1);
1821 record_niter_bound (epilog
, bound
, false, true);
1823 delete_update_ssa ();
1824 adjust_vec_debug_stmts ();
1827 adjust_vec
.release ();
1828 free_original_copy_tables ();
1833 /* Function vect_create_cond_for_niters_checks.
1835 Create a conditional expression that represents the run-time checks for
1836 loop's niter. The loop is guaranteed to to terminate if the run-time
1840 COND_EXPR - input conditional expression. New conditions will be chained
1841 with logical AND operation. If it is NULL, then the function
1842 is used to return the number of alias checks.
1843 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
1847 COND_EXPR - conditional expression.
1849 The returned COND_EXPR is the conditional expression to be used in the
1850 if statement that controls which version of the loop gets executed at
1854 vect_create_cond_for_niters_checks (loop_vec_info loop_vinfo
, tree
*cond_expr
)
1856 tree part_cond_expr
= LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo
);
1859 *cond_expr
= fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
,
1860 *cond_expr
, part_cond_expr
);
1862 *cond_expr
= part_cond_expr
;
1865 /* Function vect_create_cond_for_align_checks.
1867 Create a conditional expression that represents the alignment checks for
1868 all of data references (array element references) whose alignment must be
1872 COND_EXPR - input conditional expression. New conditions will be chained
1873 with logical AND operation.
1874 LOOP_VINFO - two fields of the loop information are used.
1875 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
1876 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
1879 COND_EXPR_STMT_LIST - statements needed to construct the conditional
1881 The returned value is the conditional expression to be used in the if
1882 statement that controls which version of the loop gets executed at runtime.
1884 The algorithm makes two assumptions:
1885 1) The number of bytes "n" in a vector is a power of 2.
1886 2) An address "a" is aligned if a%n is zero and that this
1887 test can be done as a&(n-1) == 0. For example, for 16
1888 byte vectors the test is a&0xf == 0. */
1891 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo
,
1893 gimple_seq
*cond_expr_stmt_list
)
1895 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1896 vec
<gimple
*> may_misalign_stmts
1897 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
);
1899 int mask
= LOOP_VINFO_PTR_MASK (loop_vinfo
);
1902 tree int_ptrsize_type
;
1904 tree or_tmp_name
= NULL_TREE
;
1908 tree part_cond_expr
;
1910 /* Check that mask is one less than a power of 2, i.e., mask is
1911 all zeros followed by all ones. */
1912 gcc_assert ((mask
!= 0) && ((mask
& (mask
+1)) == 0));
1914 int_ptrsize_type
= signed_type_for (ptr_type_node
);
1916 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
1917 of the first vector of the i'th data reference. */
1919 FOR_EACH_VEC_ELT (may_misalign_stmts
, i
, ref_stmt
)
1921 gimple_seq new_stmt_list
= NULL
;
1924 tree new_or_tmp_name
;
1925 gimple
*addr_stmt
, *or_stmt
;
1926 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (ref_stmt
);
1927 tree vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1928 bool negative
= tree_int_cst_compare
1929 (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo
)), size_zero_node
) < 0;
1930 tree offset
= negative
1931 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1) : size_zero_node
;
1933 /* create: addr_tmp = (int)(address_of_first_vector) */
1935 vect_create_addr_base_for_vector_ref (ref_stmt
, &new_stmt_list
,
1937 if (new_stmt_list
!= NULL
)
1938 gimple_seq_add_seq (cond_expr_stmt_list
, new_stmt_list
);
1940 sprintf (tmp_name
, "addr2int%d", i
);
1941 addr_tmp_name
= make_temp_ssa_name (int_ptrsize_type
, NULL
, tmp_name
);
1942 addr_stmt
= gimple_build_assign (addr_tmp_name
, NOP_EXPR
, addr_base
);
1943 gimple_seq_add_stmt (cond_expr_stmt_list
, addr_stmt
);
1945 /* The addresses are OR together. */
1947 if (or_tmp_name
!= NULL_TREE
)
1949 /* create: or_tmp = or_tmp | addr_tmp */
1950 sprintf (tmp_name
, "orptrs%d", i
);
1951 new_or_tmp_name
= make_temp_ssa_name (int_ptrsize_type
, NULL
, tmp_name
);
1952 or_stmt
= gimple_build_assign (new_or_tmp_name
, BIT_IOR_EXPR
,
1953 or_tmp_name
, addr_tmp_name
);
1954 gimple_seq_add_stmt (cond_expr_stmt_list
, or_stmt
);
1955 or_tmp_name
= new_or_tmp_name
;
1958 or_tmp_name
= addr_tmp_name
;
1962 mask_cst
= build_int_cst (int_ptrsize_type
, mask
);
1964 /* create: and_tmp = or_tmp & mask */
1965 and_tmp_name
= make_temp_ssa_name (int_ptrsize_type
, NULL
, "andmask");
1967 and_stmt
= gimple_build_assign (and_tmp_name
, BIT_AND_EXPR
,
1968 or_tmp_name
, mask_cst
);
1969 gimple_seq_add_stmt (cond_expr_stmt_list
, and_stmt
);
1971 /* Make and_tmp the left operand of the conditional test against zero.
1972 if and_tmp has a nonzero bit then some address is unaligned. */
1973 ptrsize_zero
= build_int_cst (int_ptrsize_type
, 0);
1974 part_cond_expr
= fold_build2 (EQ_EXPR
, boolean_type_node
,
1975 and_tmp_name
, ptrsize_zero
);
1977 *cond_expr
= fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
,
1978 *cond_expr
, part_cond_expr
);
1980 *cond_expr
= part_cond_expr
;
1983 /* Given two data references and segment lengths described by DR_A and DR_B,
1984 create expression checking if the two addresses ranges intersect with
1985 each other based on index of the two addresses. This can only be done
1986 if DR_A and DR_B referring to the same (array) object and the index is
1987 the only difference. For example:
1990 data-ref arr[i] arr[j]
1992 index {i_0, +, 1}_loop {j_0, +, 1}_loop
1994 The addresses and their index are like:
1996 |<- ADDR_A ->| |<- ADDR_B ->|
1997 ------------------------------------------------------->
1999 ------------------------------------------------------->
2000 i_0 ... i_0+4 j_0 ... j_0+4
2002 We can create expression based on index rather than address:
2004 (i_0 + 4 < j_0 || j_0 + 4 < i_0)
2006 Note evolution step of index needs to be considered in comparison. */
2009 create_intersect_range_checks_index (loop_vec_info loop_vinfo
, tree
*cond_expr
,
2010 const dr_with_seg_len
& dr_a
,
2011 const dr_with_seg_len
& dr_b
)
2013 if (integer_zerop (DR_STEP (dr_a
.dr
))
2014 || integer_zerop (DR_STEP (dr_b
.dr
))
2015 || DR_NUM_DIMENSIONS (dr_a
.dr
) != DR_NUM_DIMENSIONS (dr_b
.dr
))
2018 if (!tree_fits_uhwi_p (dr_a
.seg_len
) || !tree_fits_uhwi_p (dr_b
.seg_len
))
2021 if (!tree_fits_shwi_p (DR_STEP (dr_a
.dr
)))
2024 if (!operand_equal_p (DR_BASE_OBJECT (dr_a
.dr
), DR_BASE_OBJECT (dr_b
.dr
), 0))
2027 if (!operand_equal_p (DR_STEP (dr_a
.dr
), DR_STEP (dr_b
.dr
), 0))
2030 gcc_assert (TREE_CODE (DR_STEP (dr_a
.dr
)) == INTEGER_CST
);
2032 bool neg_step
= tree_int_cst_compare (DR_STEP (dr_a
.dr
), size_zero_node
) < 0;
2033 unsigned HOST_WIDE_INT abs_step
2034 = absu_hwi (tree_to_shwi (DR_STEP (dr_a
.dr
)));
2036 unsigned HOST_WIDE_INT seg_len1
= tree_to_uhwi (dr_a
.seg_len
);
2037 unsigned HOST_WIDE_INT seg_len2
= tree_to_uhwi (dr_b
.seg_len
);
2038 /* Infer the number of iterations with which the memory segment is accessed
2039 by DR. In other words, alias is checked if memory segment accessed by
2040 DR_A in some iterations intersect with memory segment accessed by DR_B
2041 in the same amount iterations.
2042 Note segnment length is a linear function of number of iterations with
2043 DR_STEP as the coefficient. */
2044 unsigned HOST_WIDE_INT niter_len1
= (seg_len1
+ abs_step
- 1) / abs_step
;
2045 unsigned HOST_WIDE_INT niter_len2
= (seg_len2
+ abs_step
- 1) / abs_step
;
2048 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2049 for (i
= 0; i
< DR_NUM_DIMENSIONS (dr_a
.dr
); i
++)
2051 tree access1
= DR_ACCESS_FN (dr_a
.dr
, i
);
2052 tree access2
= DR_ACCESS_FN (dr_b
.dr
, i
);
2053 /* Two indices must be the same if they are not scev, or not scev wrto
2054 current loop being vecorized. */
2055 if (TREE_CODE (access1
) != POLYNOMIAL_CHREC
2056 || TREE_CODE (access2
) != POLYNOMIAL_CHREC
2057 || CHREC_VARIABLE (access1
) != (unsigned)loop
->num
2058 || CHREC_VARIABLE (access2
) != (unsigned)loop
->num
)
2060 if (operand_equal_p (access1
, access2
, 0))
2065 /* The two indices must have the same step. */
2066 if (!operand_equal_p (CHREC_RIGHT (access1
), CHREC_RIGHT (access2
), 0))
2069 tree idx_step
= CHREC_RIGHT (access1
);
2070 /* Index must have const step, otherwise DR_STEP won't be constant. */
2071 gcc_assert (TREE_CODE (idx_step
) == INTEGER_CST
);
2072 /* Index must evaluate in the same direction as DR. */
2073 gcc_assert (!neg_step
|| tree_int_cst_sign_bit (idx_step
) == 1);
2075 tree min1
= CHREC_LEFT (access1
);
2076 tree min2
= CHREC_LEFT (access2
);
2077 if (!types_compatible_p (TREE_TYPE (min1
), TREE_TYPE (min2
)))
2080 /* Ideally, alias can be checked against loop's control IV, but we
2081 need to prove linear mapping between control IV and reference
2082 index. Although that should be true, we check against (array)
2083 index of data reference. Like segment length, index length is
2084 linear function of the number of iterations with index_step as
2085 the coefficient, i.e, niter_len * idx_step. */
2086 tree idx_len1
= fold_build2 (MULT_EXPR
, TREE_TYPE (min1
), idx_step
,
2087 build_int_cst (TREE_TYPE (min1
),
2089 tree idx_len2
= fold_build2 (MULT_EXPR
, TREE_TYPE (min2
), idx_step
,
2090 build_int_cst (TREE_TYPE (min2
),
2092 tree max1
= fold_build2 (PLUS_EXPR
, TREE_TYPE (min1
), min1
, idx_len1
);
2093 tree max2
= fold_build2 (PLUS_EXPR
, TREE_TYPE (min2
), min2
, idx_len2
);
2094 /* Adjust ranges for negative step. */
2097 min1
= fold_build2 (MINUS_EXPR
, TREE_TYPE (min1
), max1
, idx_step
);
2098 max1
= fold_build2 (MINUS_EXPR
, TREE_TYPE (min1
),
2099 CHREC_LEFT (access1
), idx_step
);
2100 min2
= fold_build2 (MINUS_EXPR
, TREE_TYPE (min2
), max2
, idx_step
);
2101 max2
= fold_build2 (MINUS_EXPR
, TREE_TYPE (min2
),
2102 CHREC_LEFT (access2
), idx_step
);
2105 = fold_build2 (TRUTH_OR_EXPR
, boolean_type_node
,
2106 fold_build2 (LE_EXPR
, boolean_type_node
, max1
, min2
),
2107 fold_build2 (LE_EXPR
, boolean_type_node
, max2
, min1
));
2109 *cond_expr
= fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
,
2110 *cond_expr
, part_cond_expr
);
2112 *cond_expr
= part_cond_expr
;
2117 /* Given two data references and segment lengths described by DR_A and DR_B,
2118 create expression checking if the two addresses ranges intersect with
2121 ((DR_A_addr_0 + DR_A_segment_length_0) <= DR_B_addr_0)
2122 || (DR_B_addr_0 + DER_B_segment_length_0) <= DR_A_addr_0)) */
2125 create_intersect_range_checks (loop_vec_info loop_vinfo
, tree
*cond_expr
,
2126 const dr_with_seg_len
& dr_a
,
2127 const dr_with_seg_len
& dr_b
)
2129 *cond_expr
= NULL_TREE
;
2130 if (create_intersect_range_checks_index (loop_vinfo
, cond_expr
, dr_a
, dr_b
))
2133 tree segment_length_a
= dr_a
.seg_len
;
2134 tree segment_length_b
= dr_b
.seg_len
;
2135 tree addr_base_a
= DR_BASE_ADDRESS (dr_a
.dr
);
2136 tree addr_base_b
= DR_BASE_ADDRESS (dr_b
.dr
);
2137 tree offset_a
= DR_OFFSET (dr_a
.dr
), offset_b
= DR_OFFSET (dr_b
.dr
);
2139 offset_a
= fold_build2 (PLUS_EXPR
, TREE_TYPE (offset_a
),
2140 offset_a
, DR_INIT (dr_a
.dr
));
2141 offset_b
= fold_build2 (PLUS_EXPR
, TREE_TYPE (offset_b
),
2142 offset_b
, DR_INIT (dr_b
.dr
));
2143 addr_base_a
= fold_build_pointer_plus (addr_base_a
, offset_a
);
2144 addr_base_b
= fold_build_pointer_plus (addr_base_b
, offset_b
);
2146 tree seg_a_min
= addr_base_a
;
2147 tree seg_a_max
= fold_build_pointer_plus (addr_base_a
, segment_length_a
);
2148 /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
2149 bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
2151 if (tree_int_cst_compare (DR_STEP (dr_a
.dr
), size_zero_node
) < 0)
2153 tree unit_size
= TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a
.dr
)));
2154 seg_a_min
= fold_build_pointer_plus (seg_a_max
, unit_size
);
2155 seg_a_max
= fold_build_pointer_plus (addr_base_a
, unit_size
);
2158 tree seg_b_min
= addr_base_b
;
2159 tree seg_b_max
= fold_build_pointer_plus (addr_base_b
, segment_length_b
);
2160 if (tree_int_cst_compare (DR_STEP (dr_b
.dr
), size_zero_node
) < 0)
2162 tree unit_size
= TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b
.dr
)));
2163 seg_b_min
= fold_build_pointer_plus (seg_b_max
, unit_size
);
2164 seg_b_max
= fold_build_pointer_plus (addr_base_b
, unit_size
);
2167 = fold_build2 (TRUTH_OR_EXPR
, boolean_type_node
,
2168 fold_build2 (LE_EXPR
, boolean_type_node
, seg_a_max
, seg_b_min
),
2169 fold_build2 (LE_EXPR
, boolean_type_node
, seg_b_max
, seg_a_min
));
2172 /* Function vect_create_cond_for_alias_checks.
2174 Create a conditional expression that represents the run-time checks for
2175 overlapping of address ranges represented by a list of data references
2176 relations passed as input.
2179 COND_EXPR - input conditional expression. New conditions will be chained
2180 with logical AND operation. If it is NULL, then the function
2181 is used to return the number of alias checks.
2182 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2186 COND_EXPR - conditional expression.
2188 The returned COND_EXPR is the conditional expression to be used in the if
2189 statement that controls which version of the loop gets executed at runtime.
2193 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo
, tree
* cond_expr
)
2195 vec
<dr_with_seg_len_pair_t
> comp_alias_ddrs
=
2196 LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo
);
2197 tree part_cond_expr
;
2199 if (comp_alias_ddrs
.is_empty ())
2202 for (size_t i
= 0, s
= comp_alias_ddrs
.length (); i
< s
; ++i
)
2204 const dr_with_seg_len
& dr_a
= comp_alias_ddrs
[i
].first
;
2205 const dr_with_seg_len
& dr_b
= comp_alias_ddrs
[i
].second
;
2207 if (dump_enabled_p ())
2209 dump_printf_loc (MSG_NOTE
, vect_location
,
2210 "create runtime check for data references ");
2211 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (dr_a
.dr
));
2212 dump_printf (MSG_NOTE
, " and ");
2213 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (dr_b
.dr
));
2214 dump_printf (MSG_NOTE
, "\n");
2217 /* Create condition expression for each pair data references. */
2218 create_intersect_range_checks (loop_vinfo
, &part_cond_expr
, dr_a
, dr_b
);
2220 *cond_expr
= fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
,
2221 *cond_expr
, part_cond_expr
);
2223 *cond_expr
= part_cond_expr
;
2226 if (dump_enabled_p ())
2227 dump_printf_loc (MSG_NOTE
, vect_location
,
2228 "created %u versioning for alias checks.\n",
2229 comp_alias_ddrs
.length ());
2233 /* Function vect_loop_versioning.
2235 If the loop has data references that may or may not be aligned or/and
2236 has data reference relations whose independence was not proven then
2237 two versions of the loop need to be generated, one which is vectorized
2238 and one which isn't. A test is then generated to control which of the
2239 loops is executed. The test checks for the alignment of all of the
2240 data references that may or may not be aligned. An additional
2241 sequence of runtime tests is generated for each pairs of DDRs whose
2242 independence was not proven. The vectorized version of loop is
2243 executed only if both alias and alignment tests are passed.
2245 The test generated to check which version of loop is executed
2246 is modified to also check for profitability as indicated by the
2247 cost model threshold TH.
2249 The versioning precondition(s) are placed in *COND_EXPR and
2250 *COND_EXPR_STMT_LIST. */
2253 vect_loop_versioning (loop_vec_info loop_vinfo
,
2254 unsigned int th
, bool check_profitability
)
2256 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
), *nloop
;
2257 struct loop
*scalar_loop
= LOOP_VINFO_SCALAR_LOOP (loop_vinfo
);
2258 basic_block condition_bb
;
2260 gimple_stmt_iterator cond_exp_gsi
;
2261 basic_block merge_bb
;
2262 basic_block new_exit_bb
;
2264 gphi
*orig_phi
, *new_phi
;
2265 tree cond_expr
= NULL_TREE
;
2266 gimple_seq cond_expr_stmt_list
= NULL
;
2268 unsigned prob
= 4 * REG_BR_PROB_BASE
/ 5;
2269 gimple_seq gimplify_stmt_list
= NULL
;
2270 tree scalar_loop_iters
= LOOP_VINFO_NITERS (loop_vinfo
);
2271 bool version_align
= LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo
);
2272 bool version_alias
= LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo
);
2273 bool version_niter
= LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo
);
2275 if (check_profitability
)
2276 cond_expr
= fold_build2 (GT_EXPR
, boolean_type_node
, scalar_loop_iters
,
2277 build_int_cst (TREE_TYPE (scalar_loop_iters
),
2281 vect_create_cond_for_niters_checks (loop_vinfo
, &cond_expr
);
2284 cond_expr
= force_gimple_operand_1 (cond_expr
, &cond_expr_stmt_list
,
2285 is_gimple_condexpr
, NULL_TREE
);
2288 vect_create_cond_for_align_checks (loop_vinfo
, &cond_expr
,
2289 &cond_expr_stmt_list
);
2292 vect_create_cond_for_alias_checks (loop_vinfo
, &cond_expr
);
2294 cond_expr
= force_gimple_operand_1 (cond_expr
, &gimplify_stmt_list
,
2295 is_gimple_condexpr
, NULL_TREE
);
2296 gimple_seq_add_seq (&cond_expr_stmt_list
, gimplify_stmt_list
);
2298 initialize_original_copy_tables ();
2302 basic_block preheader
, scalar_preheader
;
2304 /* We don't want to scale SCALAR_LOOP's frequencies, we need to
2305 scale LOOP's frequencies instead. */
2306 nloop
= loop_version (scalar_loop
, cond_expr
, &condition_bb
, prob
,
2307 REG_BR_PROB_BASE
, REG_BR_PROB_BASE
- prob
, true);
2308 scale_loop_frequencies (loop
, prob
, REG_BR_PROB_BASE
);
2309 /* CONDITION_BB was created above SCALAR_LOOP's preheader,
2310 while we need to move it above LOOP's preheader. */
2311 e
= loop_preheader_edge (loop
);
2312 scalar_e
= loop_preheader_edge (scalar_loop
);
2313 gcc_assert (empty_block_p (e
->src
)
2314 && single_pred_p (e
->src
));
2315 gcc_assert (empty_block_p (scalar_e
->src
)
2316 && single_pred_p (scalar_e
->src
));
2317 gcc_assert (single_pred_p (condition_bb
));
2319 scalar_preheader
= scalar_e
->src
;
2320 scalar_e
= find_edge (condition_bb
, scalar_preheader
);
2321 e
= single_pred_edge (preheader
);
2322 redirect_edge_and_branch_force (single_pred_edge (condition_bb
),
2324 redirect_edge_and_branch_force (scalar_e
, preheader
);
2325 redirect_edge_and_branch_force (e
, condition_bb
);
2326 set_immediate_dominator (CDI_DOMINATORS
, condition_bb
,
2327 single_pred (condition_bb
));
2328 set_immediate_dominator (CDI_DOMINATORS
, scalar_preheader
,
2329 single_pred (scalar_preheader
));
2330 set_immediate_dominator (CDI_DOMINATORS
, preheader
,
2334 nloop
= loop_version (loop
, cond_expr
, &condition_bb
,
2335 prob
, prob
, REG_BR_PROB_BASE
- prob
, true);
2339 /* The versioned loop could be infinite, we need to clear existing
2340 niter information which is copied from the original loop. */
2341 gcc_assert (loop_constraint_set_p (loop
, LOOP_C_FINITE
));
2342 vect_free_loop_info_assumptions (nloop
);
2343 /* And set constraint LOOP_C_INFINITE for niter analyzer. */
2344 loop_constraint_set (loop
, LOOP_C_INFINITE
);
2347 if (LOCATION_LOCUS (vect_location
) != UNKNOWN_LOCATION
2348 && dump_enabled_p ())
2351 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
2352 "loop versioned for vectorization because of "
2353 "possible aliasing\n");
2355 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
2356 "loop versioned for vectorization to enhance "
2360 free_original_copy_tables ();
2362 /* Loop versioning violates an assumption we try to maintain during
2363 vectorization - that the loop exit block has a single predecessor.
2364 After versioning, the exit block of both loop versions is the same
2365 basic block (i.e. it has two predecessors). Just in order to simplify
2366 following transformations in the vectorizer, we fix this situation
2367 here by adding a new (empty) block on the exit-edge of the loop,
2368 with the proper loop-exit phis to maintain loop-closed-form.
2369 If loop versioning wasn't done from loop, but scalar_loop instead,
2370 merge_bb will have already just a single successor. */
2372 merge_bb
= single_exit (loop
)->dest
;
2373 if (scalar_loop
== NULL
|| EDGE_COUNT (merge_bb
->preds
) >= 2)
2375 gcc_assert (EDGE_COUNT (merge_bb
->preds
) >= 2);
2376 new_exit_bb
= split_edge (single_exit (loop
));
2377 new_exit_e
= single_exit (loop
);
2378 e
= EDGE_SUCC (new_exit_bb
, 0);
2380 for (gsi
= gsi_start_phis (merge_bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2383 orig_phi
= gsi
.phi ();
2384 new_res
= copy_ssa_name (PHI_RESULT (orig_phi
));
2385 new_phi
= create_phi_node (new_res
, new_exit_bb
);
2386 arg
= PHI_ARG_DEF_FROM_EDGE (orig_phi
, e
);
2387 add_phi_arg (new_phi
, arg
, new_exit_e
,
2388 gimple_phi_arg_location_from_edge (orig_phi
, e
));
2389 adjust_phi_and_debug_stmts (orig_phi
, e
, PHI_RESULT (new_phi
));
2393 /* End loop-exit-fixes after versioning. */
2395 if (cond_expr_stmt_list
)
2397 cond_exp_gsi
= gsi_last_bb (condition_bb
);
2398 gsi_insert_seq_before (&cond_exp_gsi
, cond_expr_stmt_list
,
2401 update_ssa (TODO_update_ssa
);