1 /* Vectorizer Specific Loop Manipulations
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
29 #include "tree-pass.h"
31 #include "fold-const.h"
34 #include "gimple-iterator.h"
35 #include "gimplify-me.h"
37 #include "tree-ssa-loop-manip.h"
38 #include "tree-into-ssa.h"
41 #include "tree-scalar-evolution.h"
42 #include "tree-vectorizer.h"
43 #include "tree-ssa-loop-ivopts.h"
45 /*************************************************************************
46 Simple Loop Peeling Utilities
48 Utilities to support loop peeling for vectorization purposes.
49 *************************************************************************/
52 /* Renames the use *OP_P. */
55 rename_use_op (use_operand_p op_p
)
59 if (TREE_CODE (USE_FROM_PTR (op_p
)) != SSA_NAME
)
62 new_name
= get_current_def (USE_FROM_PTR (op_p
));
64 /* Something defined outside of the loop. */
68 /* An ordinary ssa name defined in the loop. */
70 SET_USE (op_p
, new_name
);
74 /* Renames the variables in basic block BB. Allow renaming of PHI arguments
75 on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
79 rename_variables_in_bb (basic_block bb
, bool rename_from_outer_loop
)
86 struct loop
*loop
= bb
->loop_father
;
87 struct loop
*outer_loop
= NULL
;
89 if (rename_from_outer_loop
)
92 outer_loop
= loop_outer (loop
);
95 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);
98 stmt
= gsi_stmt (gsi
);
99 FOR_EACH_SSA_USE_OPERAND (use_p
, stmt
, iter
, SSA_OP_ALL_USES
)
100 rename_use_op (use_p
);
103 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
105 if (!flow_bb_inside_loop_p (loop
, e
->src
))
107 if (!rename_from_outer_loop
)
109 if (e
->src
!= outer_loop
->header
)
111 if (outer_loop
->inner
->next
)
113 /* If outer_loop has 2 inner loops, allow there to
114 be an extra basic block which decides which of the
115 two loops to use using LOOP_VECTORIZED. */
116 if (!single_pred_p (e
->src
)
117 || single_pred (e
->src
) != outer_loop
->header
)
124 for (gphi_iterator gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
);
126 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi
.phi (), e
));
137 /* A stack of values to be adjusted in debug stmts. We have to
138 process them LIFO, so that the closest substitution applies. If we
139 processed them FIFO, without the stack, we might substitute uses
140 with a PHI DEF that would soon become non-dominant, and when we got
141 to the suitable one, it wouldn't have anything to substitute any
143 static vec
<adjust_info
, va_heap
> adjust_vec
;
145 /* Adjust any debug stmts that referenced AI->from values to use the
146 loop-closed AI->to, if the references are dominated by AI->bb and
147 not by the definition of AI->from. */
150 adjust_debug_stmts_now (adjust_info
*ai
)
152 basic_block bbphi
= ai
->bb
;
153 tree orig_def
= ai
->from
;
154 tree new_def
= ai
->to
;
155 imm_use_iterator imm_iter
;
157 basic_block bbdef
= gimple_bb (SSA_NAME_DEF_STMT (orig_def
));
159 gcc_assert (dom_info_available_p (CDI_DOMINATORS
));
161 /* Adjust any debug stmts that held onto non-loop-closed
163 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, orig_def
)
168 if (!is_gimple_debug (stmt
))
171 gcc_assert (gimple_debug_bind_p (stmt
));
173 bbuse
= gimple_bb (stmt
);
176 || dominated_by_p (CDI_DOMINATORS
, bbuse
, bbphi
))
178 || dominated_by_p (CDI_DOMINATORS
, bbuse
, bbdef
)))
181 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
182 SET_USE (use_p
, new_def
);
185 gimple_debug_bind_reset_value (stmt
);
192 /* Adjust debug stmts as scheduled before. */
195 adjust_vec_debug_stmts (void)
197 if (!MAY_HAVE_DEBUG_STMTS
)
200 gcc_assert (adjust_vec
.exists ());
202 while (!adjust_vec
.is_empty ())
204 adjust_debug_stmts_now (&adjust_vec
.last ());
209 /* Adjust any debug stmts that referenced FROM values to use the
210 loop-closed TO, if the references are dominated by BB and not by
211 the definition of FROM. If adjust_vec is non-NULL, adjustments
212 will be postponed until adjust_vec_debug_stmts is called. */
215 adjust_debug_stmts (tree from
, tree to
, basic_block bb
)
219 if (MAY_HAVE_DEBUG_STMTS
220 && TREE_CODE (from
) == SSA_NAME
221 && ! SSA_NAME_IS_DEFAULT_DEF (from
)
222 && ! virtual_operand_p (from
))
228 if (adjust_vec
.exists ())
229 adjust_vec
.safe_push (ai
);
231 adjust_debug_stmts_now (&ai
);
235 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
236 to adjust any debug stmts that referenced the old phi arg,
237 presumably non-loop-closed references left over from other
241 adjust_phi_and_debug_stmts (gimple
*update_phi
, edge e
, tree new_def
)
243 tree orig_def
= PHI_ARG_DEF_FROM_EDGE (update_phi
, e
);
245 SET_PHI_ARG_DEF (update_phi
, e
->dest_idx
, new_def
);
247 if (MAY_HAVE_DEBUG_STMTS
)
248 adjust_debug_stmts (orig_def
, PHI_RESULT (update_phi
),
249 gimple_bb (update_phi
));
252 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
253 that starts at zero, increases by one and its limit is NITERS.
255 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
258 slpeel_make_loop_iterate_ntimes (struct loop
*loop
, tree niters
)
260 tree indx_before_incr
, indx_after_incr
;
263 edge exit_edge
= single_exit (loop
);
264 gimple_stmt_iterator loop_cond_gsi
;
265 gimple_stmt_iterator incr_gsi
;
267 tree init
= build_int_cst (TREE_TYPE (niters
), 0);
268 tree step
= build_int_cst (TREE_TYPE (niters
), 1);
269 source_location loop_loc
;
272 orig_cond
= get_loop_exit_condition (loop
);
273 gcc_assert (orig_cond
);
274 loop_cond_gsi
= gsi_for_stmt (orig_cond
);
276 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
277 create_iv (init
, step
, NULL_TREE
, loop
,
278 &incr_gsi
, insert_after
, &indx_before_incr
, &indx_after_incr
);
280 indx_after_incr
= force_gimple_operand_gsi (&loop_cond_gsi
, indx_after_incr
,
281 true, NULL_TREE
, true,
283 niters
= force_gimple_operand_gsi (&loop_cond_gsi
, niters
, true, NULL_TREE
,
284 true, GSI_SAME_STMT
);
286 code
= (exit_edge
->flags
& EDGE_TRUE_VALUE
) ? GE_EXPR
: LT_EXPR
;
287 cond_stmt
= gimple_build_cond (code
, indx_after_incr
, niters
, NULL_TREE
,
290 gsi_insert_before (&loop_cond_gsi
, cond_stmt
, GSI_SAME_STMT
);
292 /* Remove old loop exit test: */
293 gsi_remove (&loop_cond_gsi
, true);
294 free_stmt_vec_info (orig_cond
);
296 loop_loc
= find_loop_location (loop
);
297 if (dump_enabled_p ())
299 if (LOCATION_LOCUS (loop_loc
) != UNKNOWN_LOCATION
)
300 dump_printf (MSG_NOTE
, "\nloop at %s:%d: ", LOCATION_FILE (loop_loc
),
301 LOCATION_LINE (loop_loc
));
302 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, cond_stmt
, 0);
305 /* Record the number of latch iterations. */
306 loop
->nb_iterations
= fold_build2 (MINUS_EXPR
, TREE_TYPE (niters
), niters
,
307 build_int_cst (TREE_TYPE (niters
), 1));
310 /* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg.
311 For all PHI arguments in FROM->dest and TO->dest from those
312 edges ensure that TO->dest PHI arguments have current_def
316 slpeel_duplicate_current_defs_from_edges (edge from
, edge to
)
318 gimple_stmt_iterator gsi_from
, gsi_to
;
320 for (gsi_from
= gsi_start_phis (from
->dest
),
321 gsi_to
= gsi_start_phis (to
->dest
);
322 !gsi_end_p (gsi_from
) && !gsi_end_p (gsi_to
);)
324 gimple
*from_phi
= gsi_stmt (gsi_from
);
325 gimple
*to_phi
= gsi_stmt (gsi_to
);
326 tree from_arg
= PHI_ARG_DEF_FROM_EDGE (from_phi
, from
);
327 tree to_arg
= PHI_ARG_DEF_FROM_EDGE (to_phi
, to
);
328 if (virtual_operand_p (from_arg
))
330 gsi_next (&gsi_from
);
333 if (virtual_operand_p (to_arg
))
338 if (TREE_CODE (from_arg
) != SSA_NAME
)
339 gcc_assert (operand_equal_p (from_arg
, to_arg
, 0));
342 if (get_current_def (to_arg
) == NULL_TREE
)
343 set_current_def (to_arg
, get_current_def (from_arg
));
345 gsi_next (&gsi_from
);
349 gphi
*from_phi
= get_virtual_phi (from
->dest
);
350 gphi
*to_phi
= get_virtual_phi (to
->dest
);
352 set_current_def (PHI_ARG_DEF_FROM_EDGE (to_phi
, to
),
353 get_current_def (PHI_ARG_DEF_FROM_EDGE (from_phi
, from
)));
357 /* Given LOOP this function generates a new copy of it and puts it
358 on E which is either the entry or exit of LOOP. If SCALAR_LOOP is
359 non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
360 basic blocks from SCALAR_LOOP instead of LOOP, but to either the
361 entry or exit of LOOP. */
364 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop
*loop
,
365 struct loop
*scalar_loop
, edge e
)
367 struct loop
*new_loop
;
368 basic_block
*new_bbs
, *bbs
, *pbbs
;
371 basic_block exit_dest
;
373 bool duplicate_outer_loop
= false;
375 exit
= single_exit (loop
);
376 at_exit
= (e
== exit
);
377 if (!at_exit
&& e
!= loop_preheader_edge (loop
))
380 if (scalar_loop
== NULL
)
383 bbs
= XNEWVEC (basic_block
, scalar_loop
->num_nodes
+ 1);
385 get_loop_body_with_size (scalar_loop
, pbbs
, scalar_loop
->num_nodes
);
386 /* Allow duplication of outer loops. */
387 if (scalar_loop
->inner
)
388 duplicate_outer_loop
= true;
389 /* Check whether duplication is possible. */
390 if (!can_copy_bbs_p (pbbs
, scalar_loop
->num_nodes
))
396 /* Generate new loop structure. */
397 new_loop
= duplicate_loop (scalar_loop
, loop_outer (scalar_loop
));
398 duplicate_subloops (scalar_loop
, new_loop
);
400 exit_dest
= exit
->dest
;
401 was_imm_dom
= (get_immediate_dominator (CDI_DOMINATORS
,
402 exit_dest
) == loop
->header
?
405 /* Also copy the pre-header, this avoids jumping through hoops to
406 duplicate the loop entry PHI arguments. Create an empty
407 pre-header unconditionally for this. */
408 basic_block preheader
= split_edge (loop_preheader_edge (scalar_loop
));
409 edge entry_e
= single_pred_edge (preheader
);
411 new_bbs
= XNEWVEC (basic_block
, scalar_loop
->num_nodes
+ 1);
413 exit
= single_exit (scalar_loop
);
414 copy_bbs (bbs
, scalar_loop
->num_nodes
+ 1, new_bbs
,
415 &exit
, 1, &new_exit
, NULL
,
416 at_exit
? loop
->latch
: e
->src
, true);
417 exit
= single_exit (loop
);
418 basic_block new_preheader
= new_bbs
[0];
420 add_phi_args_after_copy (new_bbs
, scalar_loop
->num_nodes
+ 1, NULL
);
422 if (scalar_loop
!= loop
)
424 /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
425 SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop,
426 but LOOP will not. slpeel_update_phi_nodes_for_guard{1,2} expects
427 the LOOP SSA_NAMEs (on the exit edge and edge from latch to
428 header) to have current_def set, so copy them over. */
429 slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop
),
431 slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop
->latch
,
433 EDGE_SUCC (loop
->latch
, 0));
436 if (at_exit
) /* Add the loop copy at exit. */
438 if (scalar_loop
!= loop
)
441 new_exit
= redirect_edge_and_branch (new_exit
, exit_dest
);
443 for (gsi
= gsi_start_phis (exit_dest
); !gsi_end_p (gsi
);
446 gphi
*phi
= gsi
.phi ();
447 tree orig_arg
= PHI_ARG_DEF_FROM_EDGE (phi
, e
);
448 location_t orig_locus
449 = gimple_phi_arg_location_from_edge (phi
, e
);
451 add_phi_arg (phi
, orig_arg
, new_exit
, orig_locus
);
454 redirect_edge_and_branch_force (e
, new_preheader
);
455 flush_pending_stmts (e
);
456 set_immediate_dominator (CDI_DOMINATORS
, new_preheader
, e
->src
);
457 if (was_imm_dom
|| duplicate_outer_loop
)
458 set_immediate_dominator (CDI_DOMINATORS
, exit_dest
, new_exit
->src
);
460 /* And remove the non-necessary forwarder again. Keep the other
461 one so we have a proper pre-header for the loop at the exit edge. */
462 redirect_edge_pred (single_succ_edge (preheader
),
463 single_pred (preheader
));
464 delete_basic_block (preheader
);
465 set_immediate_dominator (CDI_DOMINATORS
, scalar_loop
->header
,
466 loop_preheader_edge (scalar_loop
)->src
);
468 else /* Add the copy at entry. */
470 if (scalar_loop
!= loop
)
472 /* Remove the non-necessary forwarder of scalar_loop again. */
473 redirect_edge_pred (single_succ_edge (preheader
),
474 single_pred (preheader
));
475 delete_basic_block (preheader
);
476 set_immediate_dominator (CDI_DOMINATORS
, scalar_loop
->header
,
477 loop_preheader_edge (scalar_loop
)->src
);
478 preheader
= split_edge (loop_preheader_edge (loop
));
479 entry_e
= single_pred_edge (preheader
);
482 redirect_edge_and_branch_force (entry_e
, new_preheader
);
483 flush_pending_stmts (entry_e
);
484 set_immediate_dominator (CDI_DOMINATORS
, new_preheader
, entry_e
->src
);
486 redirect_edge_and_branch_force (new_exit
, preheader
);
487 flush_pending_stmts (new_exit
);
488 set_immediate_dominator (CDI_DOMINATORS
, preheader
, new_exit
->src
);
490 /* And remove the non-necessary forwarder again. Keep the other
491 one so we have a proper pre-header for the loop at the exit edge. */
492 redirect_edge_pred (single_succ_edge (new_preheader
),
493 single_pred (new_preheader
));
494 delete_basic_block (new_preheader
);
495 set_immediate_dominator (CDI_DOMINATORS
, new_loop
->header
,
496 loop_preheader_edge (new_loop
)->src
);
499 for (unsigned i
= 0; i
< scalar_loop
->num_nodes
+ 1; i
++)
500 rename_variables_in_bb (new_bbs
[i
], duplicate_outer_loop
);
502 if (scalar_loop
!= loop
)
504 /* Update new_loop->header PHIs, so that on the preheader
505 edge they are the ones from loop rather than scalar_loop. */
506 gphi_iterator gsi_orig
, gsi_new
;
507 edge orig_e
= loop_preheader_edge (loop
);
508 edge new_e
= loop_preheader_edge (new_loop
);
510 for (gsi_orig
= gsi_start_phis (loop
->header
),
511 gsi_new
= gsi_start_phis (new_loop
->header
);
512 !gsi_end_p (gsi_orig
) && !gsi_end_p (gsi_new
);
513 gsi_next (&gsi_orig
), gsi_next (&gsi_new
))
515 gphi
*orig_phi
= gsi_orig
.phi ();
516 gphi
*new_phi
= gsi_new
.phi ();
517 tree orig_arg
= PHI_ARG_DEF_FROM_EDGE (orig_phi
, orig_e
);
518 location_t orig_locus
519 = gimple_phi_arg_location_from_edge (orig_phi
, orig_e
);
521 add_phi_arg (new_phi
, orig_arg
, new_e
, orig_locus
);
528 checking_verify_dominators (CDI_DOMINATORS
);
534 /* Given the condition expression COND, put it as the last statement of
535 GUARD_BB; set both edges' probability; set dominator of GUARD_TO to
536 DOM_BB; return the skip edge. GUARD_TO is the target basic block to
537 skip the loop. PROBABILITY is the skip edge's probability. */
540 slpeel_add_loop_guard (basic_block guard_bb
, tree cond
,
541 basic_block guard_to
, basic_block dom_bb
,
544 gimple_stmt_iterator gsi
;
547 gimple_seq gimplify_stmt_list
= NULL
;
549 enter_e
= EDGE_SUCC (guard_bb
, 0);
550 enter_e
->flags
&= ~EDGE_FALLTHRU
;
551 enter_e
->flags
|= EDGE_FALSE_VALUE
;
552 gsi
= gsi_last_bb (guard_bb
);
554 cond
= force_gimple_operand_1 (cond
, &gimplify_stmt_list
, is_gimple_condexpr
,
556 if (gimplify_stmt_list
)
557 gsi_insert_seq_after (&gsi
, gimplify_stmt_list
, GSI_NEW_STMT
);
559 cond_stmt
= gimple_build_cond_from_tree (cond
, NULL_TREE
, NULL_TREE
);
560 gsi
= gsi_last_bb (guard_bb
);
561 gsi_insert_after (&gsi
, cond_stmt
, GSI_NEW_STMT
);
563 /* Add new edge to connect guard block to the merge/loop-exit block. */
564 new_e
= make_edge (guard_bb
, guard_to
, EDGE_TRUE_VALUE
);
566 new_e
->count
= guard_bb
->count
;
567 new_e
->probability
= probability
;
568 new_e
->count
= apply_probability (enter_e
->count
, probability
);
569 enter_e
->count
-= new_e
->count
;
570 enter_e
->probability
= inverse_probability (probability
);
571 set_immediate_dominator (CDI_DOMINATORS
, guard_to
, dom_bb
);
576 /* This function verifies that the following restrictions apply to LOOP:
577 (1) it consists of exactly 2 basic blocks - header, and an empty latch
578 for innermost loop and 5 basic blocks for outer-loop.
579 (2) it is single entry, single exit
580 (3) its exit condition is the last stmt in the header
581 (4) E is the entry/exit edge of LOOP.
585 slpeel_can_duplicate_loop_p (const struct loop
*loop
, const_edge e
)
587 edge exit_e
= single_exit (loop
);
588 edge entry_e
= loop_preheader_edge (loop
);
589 gcond
*orig_cond
= get_loop_exit_condition (loop
);
590 gimple_stmt_iterator loop_exit_gsi
= gsi_last_bb (exit_e
->src
);
591 unsigned int num_bb
= loop
->inner
? 5 : 2;
593 /* All loops have an outer scope; the only case loop->outer is NULL is for
594 the function itself. */
595 if (!loop_outer (loop
)
596 || loop
->num_nodes
!= num_bb
597 || !empty_block_p (loop
->latch
)
598 || !single_exit (loop
)
599 /* Verify that new loop exit condition can be trivially modified. */
600 || (!orig_cond
|| orig_cond
!= gsi_stmt (loop_exit_gsi
))
601 || (e
!= exit_e
&& e
!= entry_e
))
607 /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
608 in the exit bb and rename all the uses after the loop. This simplifies
609 the *guard[12] routines, which assume loop closed SSA form for all PHIs
610 (but normally loop closed SSA form doesn't require virtual PHIs to be
611 in the same form). Doing this early simplifies the checking what
612 uses should be renamed. */
615 create_lcssa_for_virtual_phi (struct loop
*loop
)
618 edge exit_e
= single_exit (loop
);
620 for (gsi
= gsi_start_phis (loop
->header
); !gsi_end_p (gsi
); gsi_next (&gsi
))
621 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi
))))
623 gphi
*phi
= gsi
.phi ();
624 for (gsi
= gsi_start_phis (exit_e
->dest
);
625 !gsi_end_p (gsi
); gsi_next (&gsi
))
626 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi
))))
630 tree new_vop
= copy_ssa_name (PHI_RESULT (phi
));
631 gphi
*new_phi
= create_phi_node (new_vop
, exit_e
->dest
);
632 tree vop
= PHI_ARG_DEF_FROM_EDGE (phi
, EDGE_SUCC (loop
->latch
, 0));
633 imm_use_iterator imm_iter
;
637 add_phi_arg (new_phi
, vop
, exit_e
, UNKNOWN_LOCATION
);
638 gimple_phi_set_result (new_phi
, new_vop
);
639 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, vop
)
641 && !flow_bb_inside_loop_p (loop
, gimple_bb (stmt
)))
642 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
643 SET_USE (use_p
, new_vop
);
650 /* Function vect_get_loop_location.
652 Extract the location of the loop in the source code.
653 If the loop is not well formed for vectorization, an estimated
654 location is calculated.
655 Return the loop location if succeed and NULL if not. */
658 find_loop_location (struct loop
*loop
)
662 gimple_stmt_iterator si
;
665 return UNKNOWN_LOCATION
;
667 stmt
= get_loop_exit_condition (loop
);
670 && LOCATION_LOCUS (gimple_location (stmt
)) > BUILTINS_LOCATION
)
671 return gimple_location (stmt
);
673 /* If we got here the loop is probably not "well formed",
674 try to estimate the loop location */
677 return UNKNOWN_LOCATION
;
681 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
683 stmt
= gsi_stmt (si
);
684 if (LOCATION_LOCUS (gimple_location (stmt
)) > BUILTINS_LOCATION
)
685 return gimple_location (stmt
);
688 return UNKNOWN_LOCATION
;
691 /* Return true if PHI defines an IV of the loop to be vectorized. */
696 if (virtual_operand_p (PHI_RESULT (phi
)))
699 stmt_vec_info stmt_info
= vinfo_for_stmt (phi
);
700 gcc_assert (stmt_info
!= NULL
);
701 if (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
702 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_double_reduction_def
)
708 /* Function vect_can_advance_ivs_p
710 In case the number of iterations that LOOP iterates is unknown at compile
711 time, an epilog loop will be generated, and the loop induction variables
712 (IVs) will be "advanced" to the value they are supposed to take just before
713 the epilog loop. Here we check that the access function of the loop IVs
714 and the expression that represents the loop bound are simple enough.
715 These restrictions will be relaxed in the future. */
718 vect_can_advance_ivs_p (loop_vec_info loop_vinfo
)
720 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
721 basic_block bb
= loop
->header
;
724 /* Analyze phi functions of the loop header. */
726 if (dump_enabled_p ())
727 dump_printf_loc (MSG_NOTE
, vect_location
, "vect_can_advance_ivs_p:\n");
728 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
732 gphi
*phi
= gsi
.phi ();
733 if (dump_enabled_p ())
735 dump_printf_loc (MSG_NOTE
, vect_location
, "Analyze phi: ");
736 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
739 /* Skip virtual phi's. The data dependences that are associated with
740 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.
742 Skip reduction phis. */
745 if (dump_enabled_p ())
746 dump_printf_loc (MSG_NOTE
, vect_location
,
747 "reduc or virtual phi. skip.\n");
751 /* Analyze the evolution function. */
754 = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi
));
755 if (evolution_part
== NULL_TREE
)
757 if (dump_enabled_p ())
758 dump_printf (MSG_MISSED_OPTIMIZATION
,
759 "No access function or evolution.\n");
763 /* FORNOW: We do not transform initial conditions of IVs
764 which evolution functions are not invariants in the loop. */
766 if (!expr_invariant_in_loop_p (loop
, evolution_part
))
768 if (dump_enabled_p ())
769 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
770 "evolution not invariant in loop.\n");
774 /* FORNOW: We do not transform initial conditions of IVs
775 which evolution functions are a polynomial of degree >= 2. */
777 if (tree_is_chrec (evolution_part
))
779 if (dump_enabled_p ())
780 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
781 "evolution is chrec.\n");
790 /* Function vect_update_ivs_after_vectorizer.
792 "Advance" the induction variables of LOOP to the value they should take
793 after the execution of LOOP. This is currently necessary because the
794 vectorizer does not handle induction variables that are used after the
795 loop. Such a situation occurs when the last iterations of LOOP are
797 1. We introduced new uses after LOOP for IVs that were not originally used
798 after LOOP: the IVs of LOOP are now used by an epilog loop.
799 2. LOOP is going to be vectorized; this means that it will iterate N/VF
800 times, whereas the loop IVs should be bumped N times.
803 - LOOP - a loop that is going to be vectorized. The last few iterations
805 - NITERS - the number of iterations that LOOP executes (before it is
806 vectorized). i.e, the number of times the ivs should be bumped.
807 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
808 coming out from LOOP on which there are uses of the LOOP ivs
809 (this is the path from LOOP->exit to epilog_loop->preheader).
811 The new definitions of the ivs are placed in LOOP->exit.
812 The phi args associated with the edge UPDATE_E in the bb
813 UPDATE_E->dest are updated accordingly.
815 Assumption 1: Like the rest of the vectorizer, this function assumes
816 a single loop exit that has a single predecessor.
818 Assumption 2: The phi nodes in the LOOP header and in update_bb are
819 organized in the same order.
821 Assumption 3: The access function of the ivs is simple enough (see
822 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
824 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
825 coming out of LOOP on which the ivs of LOOP are used (this is the path
826 that leads to the epilog loop; other paths skip the epilog loop). This
827 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
828 needs to have its phis updated.
832 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo
,
833 tree niters
, edge update_e
)
835 gphi_iterator gsi
, gsi1
;
836 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
837 basic_block update_bb
= update_e
->dest
;
838 basic_block exit_bb
= single_exit (loop
)->dest
;
840 /* Make sure there exists a single-predecessor exit bb: */
841 gcc_assert (single_pred_p (exit_bb
));
842 gcc_assert (single_succ_edge (exit_bb
) == update_e
);
844 for (gsi
= gsi_start_phis (loop
->header
), gsi1
= gsi_start_phis (update_bb
);
845 !gsi_end_p (gsi
) && !gsi_end_p (gsi1
);
846 gsi_next (&gsi
), gsi_next (&gsi1
))
851 tree var
, ni
, ni_name
;
852 gimple_stmt_iterator last_gsi
;
854 gphi
*phi
= gsi
.phi ();
855 gphi
*phi1
= gsi1
.phi ();
856 if (dump_enabled_p ())
858 dump_printf_loc (MSG_NOTE
, vect_location
,
859 "vect_update_ivs_after_vectorizer: phi: ");
860 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
863 /* Skip reduction and virtual phis. */
866 if (dump_enabled_p ())
867 dump_printf_loc (MSG_NOTE
, vect_location
,
868 "reduc or virtual phi. skip.\n");
872 type
= TREE_TYPE (gimple_phi_result (phi
));
873 step_expr
= STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi
));
874 step_expr
= unshare_expr (step_expr
);
876 /* FORNOW: We do not support IVs whose evolution function is a polynomial
877 of degree >= 2 or exponential. */
878 gcc_assert (!tree_is_chrec (step_expr
));
880 init_expr
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
882 off
= fold_build2 (MULT_EXPR
, TREE_TYPE (step_expr
),
883 fold_convert (TREE_TYPE (step_expr
), niters
),
885 if (POINTER_TYPE_P (type
))
886 ni
= fold_build_pointer_plus (init_expr
, off
);
888 ni
= fold_build2 (PLUS_EXPR
, type
,
889 init_expr
, fold_convert (type
, off
));
891 var
= create_tmp_var (type
, "tmp");
893 last_gsi
= gsi_last_bb (exit_bb
);
894 gimple_seq new_stmts
= NULL
;
895 ni_name
= force_gimple_operand (ni
, &new_stmts
, false, var
);
896 /* Exit_bb shouldn't be empty. */
897 if (!gsi_end_p (last_gsi
))
898 gsi_insert_seq_after (&last_gsi
, new_stmts
, GSI_SAME_STMT
);
900 gsi_insert_seq_before (&last_gsi
, new_stmts
, GSI_SAME_STMT
);
902 /* Fix phi expressions in the successor bb. */
903 adjust_phi_and_debug_stmts (phi1
, update_e
, ni_name
);
907 /* Function vect_gen_prolog_loop_niters
909 Generate the number of iterations which should be peeled as prolog for the
910 loop represented by LOOP_VINFO. It is calculated as the misalignment of
911 DR - the data reference recorded in LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).
912 As a result, after the execution of this loop, the data reference DR will
913 refer to an aligned location. The following computation is generated:
915 If the misalignment of DR is known at compile time:
916 addr_mis = int mis = DR_MISALIGNMENT (dr);
917 Else, compute address misalignment in bytes:
918 addr_mis = addr & (vectype_align - 1)
920 prolog_niters = ((VF - addr_mis/elem_size)&(VF-1))/step
922 (elem_size = element type size; an element is the scalar element whose type
923 is the inner type of the vectype)
925 The computations will be emitted at the end of BB. We also compute and
926 store upper bound (included) of the result in BOUND.
928 When the step of the data-ref in the loop is not 1 (as in interleaved data
929 and SLP), the number of iterations of the prolog must be divided by the step
930 (which is equal to the size of interleaved group).
932 The above formulas assume that VF == number of elements in the vector. This
933 may not hold when there are multiple-types in the loop.
934 In this case, for some data-references in the loop the VF does not represent
935 the number of elements that fit in the vector. Therefore, instead of VF we
936 use TYPE_VECTOR_SUBPARTS. */
939 vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo
,
940 basic_block bb
, int *bound
)
942 struct data_reference
*dr
= LOOP_VINFO_UNALIGNED_DR (loop_vinfo
);
943 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
945 tree niters_type
= TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo
));
946 gimple_seq stmts
= NULL
, new_stmts
= NULL
;
947 tree iters
, iters_name
;
948 gimple
*dr_stmt
= DR_STMT (dr
);
949 stmt_vec_info stmt_info
= vinfo_for_stmt (dr_stmt
);
950 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
951 int vectype_align
= TYPE_ALIGN (vectype
) / BITS_PER_UNIT
;
952 int nelements
= TYPE_VECTOR_SUBPARTS (vectype
);
954 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo
) > 0)
956 int npeel
= LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo
);
958 if (dump_enabled_p ())
959 dump_printf_loc (MSG_NOTE
, vect_location
,
960 "known peeling = %d.\n", npeel
);
962 iters
= build_int_cst (niters_type
, npeel
);
963 *bound
= LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo
);
967 bool negative
= tree_int_cst_compare (DR_STEP (dr
), size_zero_node
) < 0;
968 tree offset
= negative
969 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1) : size_zero_node
;
970 tree start_addr
= vect_create_addr_base_for_vector_ref (dr_stmt
,
971 &stmts
, offset
, loop
);
972 tree type
= unsigned_type_for (TREE_TYPE (start_addr
));
973 tree vectype_align_minus_1
= build_int_cst (type
, vectype_align
- 1);
974 HOST_WIDE_INT elem_size
=
975 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
976 tree elem_size_log
= build_int_cst (type
, exact_log2 (elem_size
));
977 tree nelements_minus_1
= build_int_cst (type
, nelements
- 1);
978 tree nelements_tree
= build_int_cst (type
, nelements
);
982 /* Create: byte_misalign = addr & (vectype_align - 1) */
984 fold_build2 (BIT_AND_EXPR
, type
, fold_convert (type
, start_addr
),
985 vectype_align_minus_1
);
987 /* Create: elem_misalign = byte_misalign / element_size */
989 fold_build2 (RSHIFT_EXPR
, type
, byte_misalign
, elem_size_log
);
991 /* Create: (niters_type) (nelements - elem_misalign)&(nelements - 1) */
993 iters
= fold_build2 (MINUS_EXPR
, type
, elem_misalign
, nelements_tree
);
995 iters
= fold_build2 (MINUS_EXPR
, type
, nelements_tree
, elem_misalign
);
996 iters
= fold_build2 (BIT_AND_EXPR
, type
, iters
, nelements_minus_1
);
997 iters
= fold_convert (niters_type
, iters
);
998 *bound
= nelements
- 1;
1001 if (dump_enabled_p ())
1003 dump_printf_loc (MSG_NOTE
, vect_location
,
1004 "niters for prolog loop: ");
1005 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, iters
);
1006 dump_printf (MSG_NOTE
, "\n");
1009 var
= create_tmp_var (niters_type
, "prolog_loop_niters");
1010 iters_name
= force_gimple_operand (iters
, &new_stmts
, false, var
);
1013 gimple_seq_add_seq (&stmts
, new_stmts
);
1016 gcc_assert (single_succ_p (bb
));
1017 gimple_stmt_iterator gsi
= gsi_last_bb (bb
);
1018 if (gsi_end_p (gsi
))
1019 gsi_insert_seq_before (&gsi
, stmts
, GSI_SAME_STMT
);
1021 gsi_insert_seq_after (&gsi
, stmts
, GSI_SAME_STMT
);
1027 /* Function vect_update_init_of_dr
1029 NITERS iterations were peeled from LOOP. DR represents a data reference
1030 in LOOP. This function updates the information recorded in DR to
1031 account for the fact that the first NITERS iterations had already been
1032 executed. Specifically, it updates the OFFSET field of DR. */
1035 vect_update_init_of_dr (struct data_reference
*dr
, tree niters
)
1037 tree offset
= DR_OFFSET (dr
);
1039 niters
= fold_build2 (MULT_EXPR
, sizetype
,
1040 fold_convert (sizetype
, niters
),
1041 fold_convert (sizetype
, DR_STEP (dr
)));
1042 offset
= fold_build2 (PLUS_EXPR
, sizetype
,
1043 fold_convert (sizetype
, offset
), niters
);
1044 DR_OFFSET (dr
) = offset
;
1048 /* Function vect_update_inits_of_drs
1050 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1051 This function updates the information recorded for the data references in
1052 the loop to account for the fact that the first NITERS iterations had
1053 already been executed. Specifically, it updates the initial_condition of
1054 the access_function of all the data_references in the loop. */
1057 vect_update_inits_of_drs (loop_vec_info loop_vinfo
, tree niters
)
1060 vec
<data_reference_p
> datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1061 struct data_reference
*dr
;
1063 if (dump_enabled_p ())
1064 dump_printf_loc (MSG_NOTE
, vect_location
,
1065 "=== vect_update_inits_of_dr ===\n");
1067 /* Adjust niters to sizetype and insert stmts on loop preheader edge. */
1068 if (!types_compatible_p (sizetype
, TREE_TYPE (niters
)))
1071 edge pe
= loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo
));
1072 tree var
= create_tmp_var (sizetype
, "prolog_loop_adjusted_niters");
1074 niters
= fold_convert (sizetype
, niters
);
1075 niters
= force_gimple_operand (niters
, &seq
, false, var
);
1078 basic_block new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
1079 gcc_assert (!new_bb
);
1083 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
1084 vect_update_init_of_dr (dr
, niters
);
1088 /* This function builds ni_name = number of iterations. Statements
1089 are emitted on the loop preheader edge. */
1092 vect_build_loop_niters (loop_vec_info loop_vinfo
)
1094 tree ni
= unshare_expr (LOOP_VINFO_NITERS (loop_vinfo
));
1095 if (TREE_CODE (ni
) == INTEGER_CST
)
1100 gimple_seq stmts
= NULL
;
1101 edge pe
= loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo
));
1103 var
= create_tmp_var (TREE_TYPE (ni
), "niters");
1104 ni_name
= force_gimple_operand (ni
, &stmts
, false, var
);
1106 gsi_insert_seq_on_edge_immediate (pe
, stmts
);
1112 /* Calculate the number of iterations above which vectorized loop will be
1113 preferred than scalar loop. NITERS_PROLOG is the number of iterations
1114 of prolog loop. If it's integer const, the integer number is also passed
1115 in INT_NITERS_PROLOG. BOUND_PROLOG is the upper bound (included) of
1116 number of iterations of prolog loop. VFM1 is vector factor minus one.
1117 If CHECK_PROFITABILITY is true, TH is the threshold below which scalar
1118 (rather than vectorized) loop will be executed. This function stores
1119 upper bound (included) of the result in BOUND_SCALAR. */
1122 vect_gen_scalar_loop_niters (tree niters_prolog
, int int_niters_prolog
,
1123 int bound_prolog
, int vfm1
, int th
,
1124 int *bound_scalar
, bool check_profitability
)
1126 tree type
= TREE_TYPE (niters_prolog
);
1127 tree niters
= fold_build2 (PLUS_EXPR
, type
, niters_prolog
,
1128 build_int_cst (type
, vfm1
));
1130 *bound_scalar
= vfm1
+ bound_prolog
;
1131 if (check_profitability
)
1133 /* TH indicates the minimum niters of vectorized loop, while we
1134 compute the maximum niters of scalar loop. */
1136 /* Peeling for constant times. */
1137 if (int_niters_prolog
>= 0)
1139 *bound_scalar
= (int_niters_prolog
+ vfm1
< th
1141 : vfm1
+ int_niters_prolog
);
1142 return build_int_cst (type
, *bound_scalar
);
1144 /* Peeling for unknown times. Note BOUND_PROLOG is the upper
1145 bound (inlcuded) of niters of prolog loop. */
1146 if (th
>= vfm1
+ bound_prolog
)
1149 return build_int_cst (type
, th
);
1151 /* Need to do runtime comparison, but BOUND_SCALAR remains the same. */
1153 return fold_build2 (MAX_EXPR
, type
, build_int_cst (type
, th
), niters
);
1158 /* This function generates the following statements:
1160 niters = number of iterations loop executes (after peeling)
1161 niters_vector = niters / vf
1163 and places them on the loop preheader edge. NITERS_NO_OVERFLOW is
1164 true if NITERS doesn't overflow. */
1167 vect_gen_vector_loop_niters (loop_vec_info loop_vinfo
, tree niters
,
1168 tree
*niters_vector_ptr
, bool niters_no_overflow
)
1170 tree ni_minus_gap
, var
;
1172 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1173 edge pe
= loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo
));
1174 tree log_vf
= build_int_cst (TREE_TYPE (niters
), exact_log2 (vf
));
1176 /* If epilogue loop is required because of data accesses with gaps, we
1177 subtract one iteration from the total number of iterations here for
1178 correct calculation of RATIO. */
1179 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
))
1181 ni_minus_gap
= fold_build2 (MINUS_EXPR
, TREE_TYPE (niters
),
1183 build_one_cst (TREE_TYPE (niters
)));
1184 if (!is_gimple_val (ni_minus_gap
))
1186 var
= create_tmp_var (TREE_TYPE (niters
), "ni_gap");
1187 gimple
*stmts
= NULL
;
1188 ni_minus_gap
= force_gimple_operand (ni_minus_gap
, &stmts
,
1190 gsi_insert_seq_on_edge_immediate (pe
, stmts
);
1194 ni_minus_gap
= niters
;
1196 /* Create: niters >> log2(vf) */
1197 /* If it's known that niters == number of latch executions + 1 doesn't
1198 overflow, we can generate niters >> log2(vf); otherwise we generate
1199 (niters - vf) >> log2(vf) + 1 by using the fact that we know ratio
1200 will be at least one. */
1201 if (niters_no_overflow
)
1202 niters_vector
= fold_build2 (RSHIFT_EXPR
, TREE_TYPE (niters
),
1203 ni_minus_gap
, log_vf
);
1206 = fold_build2 (PLUS_EXPR
, TREE_TYPE (niters
),
1207 fold_build2 (RSHIFT_EXPR
, TREE_TYPE (niters
),
1208 fold_build2 (MINUS_EXPR
, TREE_TYPE (niters
),
1211 (TREE_TYPE (niters
), vf
)),
1213 build_int_cst (TREE_TYPE (niters
), 1));
1215 if (!is_gimple_val (niters_vector
))
1217 var
= create_tmp_var (TREE_TYPE (niters
), "bnd");
1218 gimple
*stmts
= NULL
;
1219 niters_vector
= force_gimple_operand (niters_vector
, &stmts
, true, var
);
1220 gsi_insert_seq_on_edge_immediate (pe
, stmts
);
1222 *niters_vector_ptr
= niters_vector
;
1227 /* Given NITERS_VECTOR which is the number of iterations for vectorized
1228 loop specified by LOOP_VINFO after vectorization, compute the number
1229 of iterations before vectorization (niters_vector * vf) and store it
1230 to NITERS_VECTOR_MULT_VF_PTR. */
1233 vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo
,
1235 tree
*niters_vector_mult_vf_ptr
)
1237 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1238 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1239 tree type
= TREE_TYPE (niters_vector
);
1240 tree log_vf
= build_int_cst (type
, exact_log2 (vf
));
1241 basic_block exit_bb
= single_exit (loop
)->dest
;
1243 gcc_assert (niters_vector_mult_vf_ptr
!= NULL
);
1244 tree niters_vector_mult_vf
= fold_build2 (LSHIFT_EXPR
, type
,
1245 niters_vector
, log_vf
);
1246 if (!is_gimple_val (niters_vector_mult_vf
))
1248 tree var
= create_tmp_var (type
, "niters_vector_mult_vf");
1249 gimple_seq stmts
= NULL
;
1250 niters_vector_mult_vf
= force_gimple_operand (niters_vector_mult_vf
,
1252 gimple_stmt_iterator gsi
= gsi_start_bb (exit_bb
);
1253 gsi_insert_seq_before (&gsi
, stmts
, GSI_SAME_STMT
);
1255 *niters_vector_mult_vf_ptr
= niters_vector_mult_vf
;
1258 /* Function slpeel_tree_duplicate_loop_to_edge_cfg duplciates FIRST/SECOND
1259 from SECOND/FIRST and puts it at the original loop's preheader/exit
1260 edge, the two loops are arranged as below:
1265 i_1 = PHI<i_0, i_2>;
1276 ;; i_x = PHI<i_2>; ;; LCSSA phi node to be created for FIRST,
1280 i_3 = PHI<i_0, i_4>; ;; Use of i_0 to be replaced with i_x,
1281 or with i_2 if no LCSSA phi is created
1282 under condition of CREATE_LCSSA_FOR_IV_PHIS.
1294 This function creates loop closed SSA for the first loop; update the
1295 second loop's PHI nodes by replacing argument on incoming edge with the
1296 result of newly created lcssa PHI nodes. IF CREATE_LCSSA_FOR_IV_PHIS
1297 is false, Loop closed ssa phis will only be created for non-iv phis for
1300 This function assumes exit bb of the first loop is preheader bb of the
1301 second loop, i.e, between_bb in the example code. With PHIs updated,
1302 the second loop will execute rest iterations of the first. */
1305 slpeel_update_phi_nodes_for_loops (loop_vec_info loop_vinfo
,
1306 struct loop
*first
, struct loop
*second
,
1307 bool create_lcssa_for_iv_phis
)
1309 gphi_iterator gsi_update
, gsi_orig
;
1310 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1312 edge first_latch_e
= EDGE_SUCC (first
->latch
, 0);
1313 edge second_preheader_e
= loop_preheader_edge (second
);
1314 basic_block between_bb
= single_exit (first
)->dest
;
1316 gcc_assert (between_bb
== second_preheader_e
->src
);
1317 gcc_assert (single_pred_p (between_bb
) && single_succ_p (between_bb
));
1318 /* Either the first loop or the second is the loop to be vectorized. */
1319 gcc_assert (loop
== first
|| loop
== second
);
1321 for (gsi_orig
= gsi_start_phis (first
->header
),
1322 gsi_update
= gsi_start_phis (second
->header
);
1323 !gsi_end_p (gsi_orig
) && !gsi_end_p (gsi_update
);
1324 gsi_next (&gsi_orig
), gsi_next (&gsi_update
))
1326 gphi
*orig_phi
= gsi_orig
.phi ();
1327 gphi
*update_phi
= gsi_update
.phi ();
1329 tree arg
= PHI_ARG_DEF_FROM_EDGE (orig_phi
, first_latch_e
);
1330 /* Generate lcssa PHI node for the first loop. */
1331 gphi
*vect_phi
= (loop
== first
) ? orig_phi
: update_phi
;
1332 if (create_lcssa_for_iv_phis
|| !iv_phi_p (vect_phi
))
1334 tree new_res
= copy_ssa_name (PHI_RESULT (orig_phi
));
1335 gphi
*lcssa_phi
= create_phi_node (new_res
, between_bb
);
1336 add_phi_arg (lcssa_phi
, arg
, single_exit (first
), UNKNOWN_LOCATION
);
1340 /* Update PHI node in the second loop by replacing arg on the loop's
1342 adjust_phi_and_debug_stmts (update_phi
, second_preheader_e
, arg
);
1346 /* Function slpeel_add_loop_guard adds guard skipping from the beginning
1347 of SKIP_LOOP to the beginning of UPDATE_LOOP. GUARD_EDGE and MERGE_EDGE
1348 are two pred edges of the merge point before UPDATE_LOOP. The two loops
1359 i_1 = PHI<i_0, i_2>;
1373 ;; PHI (i_x = PHI<i_0, i_5>) to be created at merge point.
1377 i_3 = PHI<i_5, i_4>; ;; Use of i_5 to be replaced with i_x.
1389 This function creates PHI nodes at merge_bb and replaces the use of i_5
1390 in the update_loop's PHI node with the result of new PHI result. */
1393 slpeel_update_phi_nodes_for_guard1 (struct loop
*skip_loop
,
1394 struct loop
*update_loop
,
1395 edge guard_edge
, edge merge_edge
)
1397 source_location merge_loc
, guard_loc
;
1398 edge orig_e
= loop_preheader_edge (skip_loop
);
1399 edge update_e
= loop_preheader_edge (update_loop
);
1400 gphi_iterator gsi_orig
, gsi_update
;
1402 for ((gsi_orig
= gsi_start_phis (skip_loop
->header
),
1403 gsi_update
= gsi_start_phis (update_loop
->header
));
1404 !gsi_end_p (gsi_orig
) && !gsi_end_p (gsi_update
);
1405 gsi_next (&gsi_orig
), gsi_next (&gsi_update
))
1407 gphi
*orig_phi
= gsi_orig
.phi ();
1408 gphi
*update_phi
= gsi_update
.phi ();
1410 /* Generate new phi node at merge bb of the guard. */
1411 tree new_res
= copy_ssa_name (PHI_RESULT (orig_phi
));
1412 gphi
*new_phi
= create_phi_node (new_res
, guard_edge
->dest
);
1414 /* Merge bb has two incoming edges: GUARD_EDGE and MERGE_EDGE. Set the
1415 args in NEW_PHI for these edges. */
1416 tree merge_arg
= PHI_ARG_DEF_FROM_EDGE (update_phi
, update_e
);
1417 tree guard_arg
= PHI_ARG_DEF_FROM_EDGE (orig_phi
, orig_e
);
1418 merge_loc
= gimple_phi_arg_location_from_edge (update_phi
, update_e
);
1419 guard_loc
= gimple_phi_arg_location_from_edge (orig_phi
, orig_e
);
1420 add_phi_arg (new_phi
, merge_arg
, merge_edge
, merge_loc
);
1421 add_phi_arg (new_phi
, guard_arg
, guard_edge
, guard_loc
);
1423 /* Update phi in UPDATE_PHI. */
1424 adjust_phi_and_debug_stmts (update_phi
, update_e
, new_res
);
1428 /* LCSSA_PHI is a lcssa phi of EPILOG loop which is copied from LOOP,
1429 this function searches for the corresponding lcssa phi node in exit
1430 bb of LOOP. If it is found, return the phi result; otherwise return
1434 find_guard_arg (struct loop
*loop
, struct loop
*epilog ATTRIBUTE_UNUSED
,
1438 edge e
= single_exit (loop
);
1440 gcc_assert (single_pred_p (e
->dest
));
1441 for (gsi
= gsi_start_phis (e
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1443 gphi
*phi
= gsi
.phi ();
1444 if (operand_equal_p (PHI_ARG_DEF (phi
, 0),
1445 PHI_ARG_DEF (lcssa_phi
, 0), 0))
1446 return PHI_RESULT (phi
);
1451 /* LOOP and EPILOG are two consecutive loops in CFG and EPILOG is copied
1452 from LOOP. Function slpeel_add_loop_guard adds guard skipping from a
1453 point between the two loops to the end of EPILOG. Edges GUARD_EDGE
1454 and MERGE_EDGE are the two pred edges of merge_bb at the end of EPILOG.
1459 i_1 = PHI<i_0, i_2>;
1481 i_3 = PHI<i_2, i_4>;
1492 ; PHI node (i_y = PHI<i_2, i_4>) to be created at merge point.
1495 i_x = PHI<i_4>; ;Use of i_4 to be replaced with i_y in merge_bb.
1497 For each name used out side EPILOG (i.e - for each name that has a lcssa
1498 phi in exit_bb) we create a new PHI in merge_bb. The new PHI has two
1499 args corresponding to GUARD_EDGE and MERGE_EDGE. Arg for MERGE_EDGE is
1500 the arg of the original PHI in exit_bb, arg for GUARD_EDGE is defined
1501 by LOOP and is found in the exit bb of LOOP. Arg of the original PHI
1502 in exit_bb will also be updated. */
1505 slpeel_update_phi_nodes_for_guard2 (struct loop
*loop
, struct loop
*epilog
,
1506 edge guard_edge
, edge merge_edge
)
1509 basic_block merge_bb
= guard_edge
->dest
;
1511 gcc_assert (single_succ_p (merge_bb
));
1512 edge e
= single_succ_edge (merge_bb
);
1513 basic_block exit_bb
= e
->dest
;
1514 gcc_assert (single_pred_p (exit_bb
));
1515 gcc_assert (single_pred (exit_bb
) == single_exit (epilog
)->dest
);
1517 for (gsi
= gsi_start_phis (exit_bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1519 gphi
*update_phi
= gsi
.phi ();
1520 tree old_arg
= PHI_ARG_DEF (update_phi
, 0);
1521 /* This loop-closed-phi actually doesn't represent a use out of the
1522 loop - the phi arg is a constant. */
1523 if (TREE_CODE (old_arg
) != SSA_NAME
)
1526 tree merge_arg
= get_current_def (old_arg
);
1528 merge_arg
= old_arg
;
1530 tree guard_arg
= find_guard_arg (loop
, epilog
, update_phi
);
1531 /* If the var is live after loop but not a reduction, we simply
1534 guard_arg
= old_arg
;
1536 /* Create new phi node in MERGE_BB: */
1537 tree new_res
= copy_ssa_name (PHI_RESULT (update_phi
));
1538 gphi
*merge_phi
= create_phi_node (new_res
, merge_bb
);
1540 /* MERGE_BB has two incoming edges: GUARD_EDGE and MERGE_EDGE, Set
1541 the two PHI args in merge_phi for these edges. */
1542 add_phi_arg (merge_phi
, merge_arg
, merge_edge
, UNKNOWN_LOCATION
);
1543 add_phi_arg (merge_phi
, guard_arg
, guard_edge
, UNKNOWN_LOCATION
);
1545 /* Update the original phi in exit_bb. */
1546 adjust_phi_and_debug_stmts (update_phi
, e
, new_res
);
1550 /* EPILOG loop is duplicated from the original loop for vectorizing,
1551 the arg of its loop closed ssa PHI needs to be updated. */
1554 slpeel_update_phi_nodes_for_lcssa (struct loop
*epilog
)
1557 basic_block exit_bb
= single_exit (epilog
)->dest
;
1559 gcc_assert (single_pred_p (exit_bb
));
1560 edge e
= EDGE_PRED (exit_bb
, 0);
1561 for (gsi
= gsi_start_phis (exit_bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1562 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi
.phi (), e
));
1565 /* Function vect_do_peeling.
1568 - LOOP_VINFO: Represent a loop to be vectorized, which looks like:
1574 if (exit_loop_cond) goto exit_bb
1578 - NITERS: The number of iterations of the loop.
1579 - NITERSM1: The number of iterations of the loop's latch.
1580 - NITERS_NO_OVERFLOW: No overflow in computing NITERS.
1581 - TH, CHECK_PROFITABILITY: Threshold of niters to vectorize loop if
1582 CHECK_PROFITABILITY is true.
1584 - NITERS_VECTOR: The number of iterations of loop after vectorization.
1586 This function peels prolog and epilog from the loop, adds guards skipping
1587 PROLOG and EPILOG for various conditions. As a result, the changed CFG
1591 if (prefer_scalar_loop) goto merge_bb_1
1592 else goto guard_bb_2
1595 if (skip_prolog) goto merge_bb_2
1596 else goto prolog_preheader
1602 if (exit_prolog_cond) goto prolog_exit_bb
1603 else goto prolog_header_bb
1612 if (exit_vector_cond) goto vector_exit_bb
1613 else goto vector_header_bb
1617 if (skip_epilog) goto merge_bb_3
1618 else goto epilog_preheader
1626 if (exit_epilog_cond) goto merge_bb_3
1627 else goto epilog_header_bb
1631 Note this function peels prolog and epilog only if it's necessary,
1633 Returns created epilogue or NULL.
1635 TODO: Guard for prefer_scalar_loop should be emitted along with
1636 versioning conditions if loop versioning is needed. */
1640 vect_do_peeling (loop_vec_info loop_vinfo
, tree niters
, tree nitersm1
,
1641 tree
*niters_vector
, int th
, bool check_profitability
,
1642 bool niters_no_overflow
)
1645 tree type
= TREE_TYPE (niters
), guard_cond
;
1646 basic_block guard_bb
, guard_to
;
1647 int prob_prolog
, prob_vector
, prob_epilog
;
1648 int bound_prolog
= 0, bound_scalar
= 0, bound
= 0;
1649 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1650 int prolog_peeling
= LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo
);
1651 bool epilog_peeling
= (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo
)
1652 || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
));
1654 if (!prolog_peeling
&& !epilog_peeling
)
1657 prob_vector
= 9 * REG_BR_PROB_BASE
/ 10;
1658 if ((vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
)) == 2)
1660 prob_prolog
= prob_epilog
= (vf
- 1) * REG_BR_PROB_BASE
/ vf
;
1661 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1663 struct loop
*prolog
, *epilog
= NULL
, *loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1664 struct loop
*first_loop
= loop
;
1665 create_lcssa_for_virtual_phi (loop
);
1666 update_ssa (TODO_update_ssa_only_virtuals
);
1668 if (MAY_HAVE_DEBUG_STMTS
)
1670 gcc_assert (!adjust_vec
.exists ());
1671 adjust_vec
.create (32);
1673 initialize_original_copy_tables ();
1675 /* Prolog loop may be skipped. */
1676 bool skip_prolog
= (prolog_peeling
!= 0);
1677 /* Skip to epilog if scalar loop may be preferred. It's only used when
1678 we peel for epilog loop. */
1679 bool skip_vector
= (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
));
1680 /* Epilog loop must be executed if the number of iterations for epilog
1681 loop is known at compile time, otherwise we need to add a check at
1682 the end of vector loop and skip to the end of epilog loop. */
1683 bool skip_epilog
= (prolog_peeling
< 0
1684 || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
));
1685 /* PEELING_FOR_GAPS is special because epilog loop must be executed. */
1686 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
))
1687 skip_epilog
= false;
1689 /* Record the anchor bb at which guard should be placed if scalar loop
1690 may be preferred. */
1691 basic_block anchor
= loop_preheader_edge (loop
)->src
;
1693 split_edge (loop_preheader_edge (loop
));
1695 tree niters_prolog
= build_int_cst (type
, 0);
1696 source_location loop_loc
= find_loop_location (loop
);
1697 struct loop
*scalar_loop
= LOOP_VINFO_SCALAR_LOOP (loop_vinfo
);
1700 e
= loop_preheader_edge (loop
);
1701 if (!slpeel_can_duplicate_loop_p (loop
, e
))
1703 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, loop_loc
,
1704 "loop can't be duplicated to preheader edge.\n");
1707 /* Peel prolog and put it on preheader edge of loop. */
1708 prolog
= slpeel_tree_duplicate_loop_to_edge_cfg (loop
, scalar_loop
, e
);
1711 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, loop_loc
,
1712 "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1715 slpeel_update_phi_nodes_for_loops (loop_vinfo
, prolog
, loop
, true);
1716 first_loop
= prolog
;
1717 reset_original_copy_tables ();
1719 /* Generate and update the number of iterations for prolog loop. */
1720 niters_prolog
= vect_gen_prolog_loop_niters (loop_vinfo
, anchor
,
1722 slpeel_make_loop_iterate_ntimes (prolog
, niters_prolog
);
1724 /* Skip the prolog loop. */
1727 guard_cond
= fold_build2 (EQ_EXPR
, boolean_type_node
,
1728 niters_prolog
, build_int_cst (type
, 0));
1729 guard_bb
= loop_preheader_edge (prolog
)->src
;
1730 guard_to
= split_edge (loop_preheader_edge (loop
));
1731 guard_e
= slpeel_add_loop_guard (guard_bb
, guard_cond
,
1733 inverse_probability (prob_prolog
));
1734 e
= EDGE_PRED (guard_to
, 0);
1735 e
= (e
!= guard_e
? e
: EDGE_PRED (guard_to
, 1));
1736 slpeel_update_phi_nodes_for_guard1 (prolog
, loop
, guard_e
, e
);
1737 scale_loop_profile (prolog
, prob_prolog
, bound_prolog
);
1739 /* Update init address of DRs. */
1740 vect_update_inits_of_drs (loop_vinfo
, niters_prolog
);
1741 /* Update niters for vector loop. */
1742 LOOP_VINFO_NITERS (loop_vinfo
)
1743 = fold_build2 (MINUS_EXPR
, type
, niters
, niters_prolog
);
1744 LOOP_VINFO_NITERSM1 (loop_vinfo
)
1745 = fold_build2 (MINUS_EXPR
, type
,
1746 LOOP_VINFO_NITERSM1 (loop_vinfo
), niters_prolog
);
1747 niters
= vect_build_loop_niters (loop_vinfo
);
1749 /* Prolog iterates at most bound_prolog times, latch iterates at
1750 most bound_prolog - 1 times. */
1751 record_niter_bound (prolog
, bound_prolog
- 1, false, true);
1752 delete_update_ssa ();
1753 adjust_vec_debug_stmts ();
1759 e
= single_exit (loop
);
1760 if (!slpeel_can_duplicate_loop_p (loop
, e
))
1762 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, loop_loc
,
1763 "loop can't be duplicated to exit edge.\n");
1766 /* Peel epilog and put it on exit edge of loop. */
1767 epilog
= slpeel_tree_duplicate_loop_to_edge_cfg (loop
, scalar_loop
, e
);
1770 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, loop_loc
,
1771 "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1774 slpeel_update_phi_nodes_for_loops (loop_vinfo
, loop
, epilog
, false);
1776 /* Scalar version loop may be preferred. In this case, add guard
1777 and skip to epilog. Note this only happens when the number of
1778 iterations of loop is unknown at compile time, otherwise this
1779 won't be vectorized. */
1782 /* Additional epilogue iteration is peeled if gap exists. */
1783 bool peel_for_gaps
= LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
);
1784 tree t
= vect_gen_scalar_loop_niters (niters_prolog
, prolog_peeling
,
1786 peel_for_gaps
? vf
: vf
- 1,
1788 check_profitability
);
1789 /* Build guard against NITERSM1 since NITERS may overflow. */
1790 guard_cond
= fold_build2 (LT_EXPR
, boolean_type_node
, nitersm1
, t
);
1792 guard_to
= split_edge (loop_preheader_edge (epilog
));
1793 guard_e
= slpeel_add_loop_guard (guard_bb
, guard_cond
,
1795 inverse_probability (prob_vector
));
1796 e
= EDGE_PRED (guard_to
, 0);
1797 e
= (e
!= guard_e
? e
: EDGE_PRED (guard_to
, 1));
1798 slpeel_update_phi_nodes_for_guard1 (first_loop
, epilog
, guard_e
, e
);
1799 scale_loop_profile (epilog
, prob_vector
, bound_scalar
);
1802 tree niters_vector_mult_vf
;
1803 /* If loop is peeled for non-zero constant times, now niters refers to
1804 orig_niters - prolog_peeling, it won't overflow even the orig_niters
1806 niters_no_overflow
|= (prolog_peeling
> 0);
1807 vect_gen_vector_loop_niters (loop_vinfo
, niters
,
1808 niters_vector
, niters_no_overflow
);
1809 vect_gen_vector_loop_niters_mult_vf (loop_vinfo
, *niters_vector
,
1810 &niters_vector_mult_vf
);
1811 /* Update IVs of original loop as if they were advanced by
1812 niters_vector_mult_vf steps. */
1813 gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo
));
1814 edge update_e
= skip_vector
? e
: loop_preheader_edge (epilog
);
1815 vect_update_ivs_after_vectorizer (loop_vinfo
, niters_vector_mult_vf
,
1820 guard_cond
= fold_build2 (EQ_EXPR
, boolean_type_node
,
1821 niters
, niters_vector_mult_vf
);
1822 guard_bb
= single_exit (loop
)->dest
;
1823 guard_to
= split_edge (single_exit (epilog
));
1824 guard_e
= slpeel_add_loop_guard (guard_bb
, guard_cond
, guard_to
,
1825 skip_vector
? anchor
: guard_bb
,
1826 inverse_probability (prob_epilog
));
1827 slpeel_update_phi_nodes_for_guard2 (loop
, epilog
, guard_e
,
1828 single_exit (epilog
));
1829 scale_loop_profile (epilog
, prob_epilog
, bound
);
1832 slpeel_update_phi_nodes_for_lcssa (epilog
);
1834 bound
= LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) ? vf
- 1 : vf
- 2;
1835 /* We share epilog loop with scalar version loop. */
1836 bound
= MAX (bound
, bound_scalar
- 1);
1837 record_niter_bound (epilog
, bound
, false, true);
1839 delete_update_ssa ();
1840 adjust_vec_debug_stmts ();
1843 adjust_vec
.release ();
1844 free_original_copy_tables ();
1849 /* Function vect_create_cond_for_niters_checks.
1851 Create a conditional expression that represents the run-time checks for
1852 loop's niter. The loop is guaranteed to to terminate if the run-time
1856 COND_EXPR - input conditional expression. New conditions will be chained
1857 with logical AND operation. If it is NULL, then the function
1858 is used to return the number of alias checks.
1859 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
1863 COND_EXPR - conditional expression.
1865 The returned COND_EXPR is the conditional expression to be used in the
1866 if statement that controls which version of the loop gets executed at
1870 vect_create_cond_for_niters_checks (loop_vec_info loop_vinfo
, tree
*cond_expr
)
1872 tree part_cond_expr
= LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo
);
1875 *cond_expr
= fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
,
1876 *cond_expr
, part_cond_expr
);
1878 *cond_expr
= part_cond_expr
;
1881 /* Function vect_create_cond_for_align_checks.
1883 Create a conditional expression that represents the alignment checks for
1884 all of data references (array element references) whose alignment must be
1888 COND_EXPR - input conditional expression. New conditions will be chained
1889 with logical AND operation.
1890 LOOP_VINFO - two fields of the loop information are used.
1891 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
1892 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
1895 COND_EXPR_STMT_LIST - statements needed to construct the conditional
1897 The returned value is the conditional expression to be used in the if
1898 statement that controls which version of the loop gets executed at runtime.
1900 The algorithm makes two assumptions:
1901 1) The number of bytes "n" in a vector is a power of 2.
1902 2) An address "a" is aligned if a%n is zero and that this
1903 test can be done as a&(n-1) == 0. For example, for 16
1904 byte vectors the test is a&0xf == 0. */
1907 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo
,
1909 gimple_seq
*cond_expr_stmt_list
)
1911 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1912 vec
<gimple
*> may_misalign_stmts
1913 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
);
1915 int mask
= LOOP_VINFO_PTR_MASK (loop_vinfo
);
1918 tree int_ptrsize_type
;
1920 tree or_tmp_name
= NULL_TREE
;
1924 tree part_cond_expr
;
1926 /* Check that mask is one less than a power of 2, i.e., mask is
1927 all zeros followed by all ones. */
1928 gcc_assert ((mask
!= 0) && ((mask
& (mask
+1)) == 0));
1930 int_ptrsize_type
= signed_type_for (ptr_type_node
);
1932 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
1933 of the first vector of the i'th data reference. */
1935 FOR_EACH_VEC_ELT (may_misalign_stmts
, i
, ref_stmt
)
1937 gimple_seq new_stmt_list
= NULL
;
1940 tree new_or_tmp_name
;
1941 gimple
*addr_stmt
, *or_stmt
;
1942 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (ref_stmt
);
1943 tree vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1944 bool negative
= tree_int_cst_compare
1945 (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo
)), size_zero_node
) < 0;
1946 tree offset
= negative
1947 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1) : size_zero_node
;
1949 /* create: addr_tmp = (int)(address_of_first_vector) */
1951 vect_create_addr_base_for_vector_ref (ref_stmt
, &new_stmt_list
,
1953 if (new_stmt_list
!= NULL
)
1954 gimple_seq_add_seq (cond_expr_stmt_list
, new_stmt_list
);
1956 sprintf (tmp_name
, "addr2int%d", i
);
1957 addr_tmp_name
= make_temp_ssa_name (int_ptrsize_type
, NULL
, tmp_name
);
1958 addr_stmt
= gimple_build_assign (addr_tmp_name
, NOP_EXPR
, addr_base
);
1959 gimple_seq_add_stmt (cond_expr_stmt_list
, addr_stmt
);
1961 /* The addresses are OR together. */
1963 if (or_tmp_name
!= NULL_TREE
)
1965 /* create: or_tmp = or_tmp | addr_tmp */
1966 sprintf (tmp_name
, "orptrs%d", i
);
1967 new_or_tmp_name
= make_temp_ssa_name (int_ptrsize_type
, NULL
, tmp_name
);
1968 or_stmt
= gimple_build_assign (new_or_tmp_name
, BIT_IOR_EXPR
,
1969 or_tmp_name
, addr_tmp_name
);
1970 gimple_seq_add_stmt (cond_expr_stmt_list
, or_stmt
);
1971 or_tmp_name
= new_or_tmp_name
;
1974 or_tmp_name
= addr_tmp_name
;
1978 mask_cst
= build_int_cst (int_ptrsize_type
, mask
);
1980 /* create: and_tmp = or_tmp & mask */
1981 and_tmp_name
= make_temp_ssa_name (int_ptrsize_type
, NULL
, "andmask");
1983 and_stmt
= gimple_build_assign (and_tmp_name
, BIT_AND_EXPR
,
1984 or_tmp_name
, mask_cst
);
1985 gimple_seq_add_stmt (cond_expr_stmt_list
, and_stmt
);
1987 /* Make and_tmp the left operand of the conditional test against zero.
1988 if and_tmp has a nonzero bit then some address is unaligned. */
1989 ptrsize_zero
= build_int_cst (int_ptrsize_type
, 0);
1990 part_cond_expr
= fold_build2 (EQ_EXPR
, boolean_type_node
,
1991 and_tmp_name
, ptrsize_zero
);
1993 *cond_expr
= fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
,
1994 *cond_expr
, part_cond_expr
);
1996 *cond_expr
= part_cond_expr
;
1999 /* Given two data references and segment lengths described by DR_A and DR_B,
2000 create expression checking if the two addresses ranges intersect with
2001 each other based on index of the two addresses. This can only be done
2002 if DR_A and DR_B referring to the same (array) object and the index is
2003 the only difference. For example:
2006 data-ref arr[i] arr[j]
2008 index {i_0, +, 1}_loop {j_0, +, 1}_loop
2010 The addresses and their index are like:
2012 |<- ADDR_A ->| |<- ADDR_B ->|
2013 ------------------------------------------------------->
2015 ------------------------------------------------------->
2016 i_0 ... i_0+4 j_0 ... j_0+4
2018 We can create expression based on index rather than address:
2020 (i_0 + 4 < j_0 || j_0 + 4 < i_0)
2022 Note evolution step of index needs to be considered in comparison. */
2025 create_intersect_range_checks_index (loop_vec_info loop_vinfo
, tree
*cond_expr
,
2026 const dr_with_seg_len
& dr_a
,
2027 const dr_with_seg_len
& dr_b
)
2029 if (integer_zerop (DR_STEP (dr_a
.dr
))
2030 || integer_zerop (DR_STEP (dr_b
.dr
))
2031 || DR_NUM_DIMENSIONS (dr_a
.dr
) != DR_NUM_DIMENSIONS (dr_b
.dr
))
2034 if (!tree_fits_uhwi_p (dr_a
.seg_len
) || !tree_fits_uhwi_p (dr_b
.seg_len
))
2037 if (!tree_fits_shwi_p (DR_STEP (dr_a
.dr
)))
2040 if (!operand_equal_p (DR_BASE_OBJECT (dr_a
.dr
), DR_BASE_OBJECT (dr_b
.dr
), 0))
2043 if (!operand_equal_p (DR_STEP (dr_a
.dr
), DR_STEP (dr_b
.dr
), 0))
2046 gcc_assert (TREE_CODE (DR_STEP (dr_a
.dr
)) == INTEGER_CST
);
2048 bool neg_step
= tree_int_cst_compare (DR_STEP (dr_a
.dr
), size_zero_node
) < 0;
2049 unsigned HOST_WIDE_INT abs_step
2050 = absu_hwi (tree_to_shwi (DR_STEP (dr_a
.dr
)));
2052 unsigned HOST_WIDE_INT seg_len1
= tree_to_uhwi (dr_a
.seg_len
);
2053 unsigned HOST_WIDE_INT seg_len2
= tree_to_uhwi (dr_b
.seg_len
);
2054 /* Infer the number of iterations with which the memory segment is accessed
2055 by DR. In other words, alias is checked if memory segment accessed by
2056 DR_A in some iterations intersect with memory segment accessed by DR_B
2057 in the same amount iterations.
2058 Note segnment length is a linear function of number of iterations with
2059 DR_STEP as the coefficient. */
2060 unsigned HOST_WIDE_INT niter_len1
= (seg_len1
+ abs_step
- 1) / abs_step
;
2061 unsigned HOST_WIDE_INT niter_len2
= (seg_len2
+ abs_step
- 1) / abs_step
;
2064 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2065 for (i
= 0; i
< DR_NUM_DIMENSIONS (dr_a
.dr
); i
++)
2067 tree access1
= DR_ACCESS_FN (dr_a
.dr
, i
);
2068 tree access2
= DR_ACCESS_FN (dr_b
.dr
, i
);
2069 /* Two indices must be the same if they are not scev, or not scev wrto
2070 current loop being vecorized. */
2071 if (TREE_CODE (access1
) != POLYNOMIAL_CHREC
2072 || TREE_CODE (access2
) != POLYNOMIAL_CHREC
2073 || CHREC_VARIABLE (access1
) != (unsigned)loop
->num
2074 || CHREC_VARIABLE (access2
) != (unsigned)loop
->num
)
2076 if (operand_equal_p (access1
, access2
, 0))
2081 /* The two indices must have the same step. */
2082 if (!operand_equal_p (CHREC_RIGHT (access1
), CHREC_RIGHT (access2
), 0))
2085 tree idx_step
= CHREC_RIGHT (access1
);
2086 /* Index must have const step, otherwise DR_STEP won't be constant. */
2087 gcc_assert (TREE_CODE (idx_step
) == INTEGER_CST
);
2088 /* Index must evaluate in the same direction as DR. */
2089 gcc_assert (!neg_step
|| tree_int_cst_sign_bit (idx_step
) == 1);
2091 tree min1
= CHREC_LEFT (access1
);
2092 tree min2
= CHREC_LEFT (access2
);
2093 if (!types_compatible_p (TREE_TYPE (min1
), TREE_TYPE (min2
)))
2096 /* Ideally, alias can be checked against loop's control IV, but we
2097 need to prove linear mapping between control IV and reference
2098 index. Although that should be true, we check against (array)
2099 index of data reference. Like segment length, index length is
2100 linear function of the number of iterations with index_step as
2101 the coefficient, i.e, niter_len * idx_step. */
2102 tree idx_len1
= fold_build2 (MULT_EXPR
, TREE_TYPE (min1
), idx_step
,
2103 build_int_cst (TREE_TYPE (min1
),
2105 tree idx_len2
= fold_build2 (MULT_EXPR
, TREE_TYPE (min2
), idx_step
,
2106 build_int_cst (TREE_TYPE (min2
),
2108 tree max1
= fold_build2 (PLUS_EXPR
, TREE_TYPE (min1
), min1
, idx_len1
);
2109 tree max2
= fold_build2 (PLUS_EXPR
, TREE_TYPE (min2
), min2
, idx_len2
);
2110 /* Adjust ranges for negative step. */
2113 min1
= fold_build2 (MINUS_EXPR
, TREE_TYPE (min1
), max1
, idx_step
);
2114 max1
= fold_build2 (MINUS_EXPR
, TREE_TYPE (min1
),
2115 CHREC_LEFT (access1
), idx_step
);
2116 min2
= fold_build2 (MINUS_EXPR
, TREE_TYPE (min2
), max2
, idx_step
);
2117 max2
= fold_build2 (MINUS_EXPR
, TREE_TYPE (min2
),
2118 CHREC_LEFT (access2
), idx_step
);
2121 = fold_build2 (TRUTH_OR_EXPR
, boolean_type_node
,
2122 fold_build2 (LE_EXPR
, boolean_type_node
, max1
, min2
),
2123 fold_build2 (LE_EXPR
, boolean_type_node
, max2
, min1
));
2125 *cond_expr
= fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
,
2126 *cond_expr
, part_cond_expr
);
2128 *cond_expr
= part_cond_expr
;
2133 /* Given two data references and segment lengths described by DR_A and DR_B,
2134 create expression checking if the two addresses ranges intersect with
2137 ((DR_A_addr_0 + DR_A_segment_length_0) <= DR_B_addr_0)
2138 || (DR_B_addr_0 + DER_B_segment_length_0) <= DR_A_addr_0)) */
2141 create_intersect_range_checks (loop_vec_info loop_vinfo
, tree
*cond_expr
,
2142 const dr_with_seg_len
& dr_a
,
2143 const dr_with_seg_len
& dr_b
)
2145 *cond_expr
= NULL_TREE
;
2146 if (create_intersect_range_checks_index (loop_vinfo
, cond_expr
, dr_a
, dr_b
))
2149 tree segment_length_a
= dr_a
.seg_len
;
2150 tree segment_length_b
= dr_b
.seg_len
;
2151 tree addr_base_a
= DR_BASE_ADDRESS (dr_a
.dr
);
2152 tree addr_base_b
= DR_BASE_ADDRESS (dr_b
.dr
);
2153 tree offset_a
= DR_OFFSET (dr_a
.dr
), offset_b
= DR_OFFSET (dr_b
.dr
);
2155 offset_a
= fold_build2 (PLUS_EXPR
, TREE_TYPE (offset_a
),
2156 offset_a
, DR_INIT (dr_a
.dr
));
2157 offset_b
= fold_build2 (PLUS_EXPR
, TREE_TYPE (offset_b
),
2158 offset_b
, DR_INIT (dr_b
.dr
));
2159 addr_base_a
= fold_build_pointer_plus (addr_base_a
, offset_a
);
2160 addr_base_b
= fold_build_pointer_plus (addr_base_b
, offset_b
);
2162 tree seg_a_min
= addr_base_a
;
2163 tree seg_a_max
= fold_build_pointer_plus (addr_base_a
, segment_length_a
);
2164 /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
2165 bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
2167 if (tree_int_cst_compare (DR_STEP (dr_a
.dr
), size_zero_node
) < 0)
2169 tree unit_size
= TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a
.dr
)));
2170 seg_a_min
= fold_build_pointer_plus (seg_a_max
, unit_size
);
2171 seg_a_max
= fold_build_pointer_plus (addr_base_a
, unit_size
);
2174 tree seg_b_min
= addr_base_b
;
2175 tree seg_b_max
= fold_build_pointer_plus (addr_base_b
, segment_length_b
);
2176 if (tree_int_cst_compare (DR_STEP (dr_b
.dr
), size_zero_node
) < 0)
2178 tree unit_size
= TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b
.dr
)));
2179 seg_b_min
= fold_build_pointer_plus (seg_b_max
, unit_size
);
2180 seg_b_max
= fold_build_pointer_plus (addr_base_b
, unit_size
);
2183 = fold_build2 (TRUTH_OR_EXPR
, boolean_type_node
,
2184 fold_build2 (LE_EXPR
, boolean_type_node
, seg_a_max
, seg_b_min
),
2185 fold_build2 (LE_EXPR
, boolean_type_node
, seg_b_max
, seg_a_min
));
2188 /* Function vect_create_cond_for_alias_checks.
2190 Create a conditional expression that represents the run-time checks for
2191 overlapping of address ranges represented by a list of data references
2192 relations passed as input.
2195 COND_EXPR - input conditional expression. New conditions will be chained
2196 with logical AND operation. If it is NULL, then the function
2197 is used to return the number of alias checks.
2198 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2202 COND_EXPR - conditional expression.
2204 The returned COND_EXPR is the conditional expression to be used in the if
2205 statement that controls which version of the loop gets executed at runtime.
2209 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo
, tree
* cond_expr
)
2211 vec
<dr_with_seg_len_pair_t
> comp_alias_ddrs
=
2212 LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo
);
2213 tree part_cond_expr
;
2215 if (comp_alias_ddrs
.is_empty ())
2218 for (size_t i
= 0, s
= comp_alias_ddrs
.length (); i
< s
; ++i
)
2220 const dr_with_seg_len
& dr_a
= comp_alias_ddrs
[i
].first
;
2221 const dr_with_seg_len
& dr_b
= comp_alias_ddrs
[i
].second
;
2223 if (dump_enabled_p ())
2225 dump_printf_loc (MSG_NOTE
, vect_location
,
2226 "create runtime check for data references ");
2227 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (dr_a
.dr
));
2228 dump_printf (MSG_NOTE
, " and ");
2229 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (dr_b
.dr
));
2230 dump_printf (MSG_NOTE
, "\n");
2233 /* Create condition expression for each pair data references. */
2234 create_intersect_range_checks (loop_vinfo
, &part_cond_expr
, dr_a
, dr_b
);
2236 *cond_expr
= fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
,
2237 *cond_expr
, part_cond_expr
);
2239 *cond_expr
= part_cond_expr
;
2242 if (dump_enabled_p ())
2243 dump_printf_loc (MSG_NOTE
, vect_location
,
2244 "created %u versioning for alias checks.\n",
2245 comp_alias_ddrs
.length ());
2249 /* Function vect_loop_versioning.
2251 If the loop has data references that may or may not be aligned or/and
2252 has data reference relations whose independence was not proven then
2253 two versions of the loop need to be generated, one which is vectorized
2254 and one which isn't. A test is then generated to control which of the
2255 loops is executed. The test checks for the alignment of all of the
2256 data references that may or may not be aligned. An additional
2257 sequence of runtime tests is generated for each pairs of DDRs whose
2258 independence was not proven. The vectorized version of loop is
2259 executed only if both alias and alignment tests are passed.
2261 The test generated to check which version of loop is executed
2262 is modified to also check for profitability as indicated by the
2263 cost model threshold TH.
2265 The versioning precondition(s) are placed in *COND_EXPR and
2266 *COND_EXPR_STMT_LIST. */
2269 vect_loop_versioning (loop_vec_info loop_vinfo
,
2270 unsigned int th
, bool check_profitability
)
2272 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
), *nloop
;
2273 struct loop
*scalar_loop
= LOOP_VINFO_SCALAR_LOOP (loop_vinfo
);
2274 basic_block condition_bb
;
2276 gimple_stmt_iterator cond_exp_gsi
;
2277 basic_block merge_bb
;
2278 basic_block new_exit_bb
;
2280 gphi
*orig_phi
, *new_phi
;
2281 tree cond_expr
= NULL_TREE
;
2282 gimple_seq cond_expr_stmt_list
= NULL
;
2284 unsigned prob
= 4 * REG_BR_PROB_BASE
/ 5;
2285 gimple_seq gimplify_stmt_list
= NULL
;
2286 tree scalar_loop_iters
= LOOP_VINFO_NITERS (loop_vinfo
);
2287 bool version_align
= LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo
);
2288 bool version_alias
= LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo
);
2289 bool version_niter
= LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo
);
2291 if (check_profitability
)
2292 cond_expr
= fold_build2 (GT_EXPR
, boolean_type_node
, scalar_loop_iters
,
2293 build_int_cst (TREE_TYPE (scalar_loop_iters
),
2297 vect_create_cond_for_niters_checks (loop_vinfo
, &cond_expr
);
2300 cond_expr
= force_gimple_operand_1 (cond_expr
, &cond_expr_stmt_list
,
2301 is_gimple_condexpr
, NULL_TREE
);
2304 vect_create_cond_for_align_checks (loop_vinfo
, &cond_expr
,
2305 &cond_expr_stmt_list
);
2308 vect_create_cond_for_alias_checks (loop_vinfo
, &cond_expr
);
2310 cond_expr
= force_gimple_operand_1 (cond_expr
, &gimplify_stmt_list
,
2311 is_gimple_condexpr
, NULL_TREE
);
2312 gimple_seq_add_seq (&cond_expr_stmt_list
, gimplify_stmt_list
);
2314 initialize_original_copy_tables ();
2318 basic_block preheader
, scalar_preheader
;
2320 /* We don't want to scale SCALAR_LOOP's frequencies, we need to
2321 scale LOOP's frequencies instead. */
2322 nloop
= loop_version (scalar_loop
, cond_expr
, &condition_bb
, prob
,
2323 REG_BR_PROB_BASE
, REG_BR_PROB_BASE
- prob
, true);
2324 scale_loop_frequencies (loop
, prob
, REG_BR_PROB_BASE
);
2325 /* CONDITION_BB was created above SCALAR_LOOP's preheader,
2326 while we need to move it above LOOP's preheader. */
2327 e
= loop_preheader_edge (loop
);
2328 scalar_e
= loop_preheader_edge (scalar_loop
);
2329 gcc_assert (empty_block_p (e
->src
)
2330 && single_pred_p (e
->src
));
2331 gcc_assert (empty_block_p (scalar_e
->src
)
2332 && single_pred_p (scalar_e
->src
));
2333 gcc_assert (single_pred_p (condition_bb
));
2335 scalar_preheader
= scalar_e
->src
;
2336 scalar_e
= find_edge (condition_bb
, scalar_preheader
);
2337 e
= single_pred_edge (preheader
);
2338 redirect_edge_and_branch_force (single_pred_edge (condition_bb
),
2340 redirect_edge_and_branch_force (scalar_e
, preheader
);
2341 redirect_edge_and_branch_force (e
, condition_bb
);
2342 set_immediate_dominator (CDI_DOMINATORS
, condition_bb
,
2343 single_pred (condition_bb
));
2344 set_immediate_dominator (CDI_DOMINATORS
, scalar_preheader
,
2345 single_pred (scalar_preheader
));
2346 set_immediate_dominator (CDI_DOMINATORS
, preheader
,
2350 nloop
= loop_version (loop
, cond_expr
, &condition_bb
,
2351 prob
, prob
, REG_BR_PROB_BASE
- prob
, true);
2355 /* The versioned loop could be infinite, we need to clear existing
2356 niter information which is copied from the original loop. */
2357 gcc_assert (loop_constraint_set_p (loop
, LOOP_C_FINITE
));
2358 vect_free_loop_info_assumptions (nloop
);
2359 /* And set constraint LOOP_C_INFINITE for niter analyzer. */
2360 loop_constraint_set (loop
, LOOP_C_INFINITE
);
2363 if (LOCATION_LOCUS (vect_location
) != UNKNOWN_LOCATION
2364 && dump_enabled_p ())
2367 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
2368 "loop versioned for vectorization because of "
2369 "possible aliasing\n");
2371 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
2372 "loop versioned for vectorization to enhance "
2376 free_original_copy_tables ();
2378 /* Loop versioning violates an assumption we try to maintain during
2379 vectorization - that the loop exit block has a single predecessor.
2380 After versioning, the exit block of both loop versions is the same
2381 basic block (i.e. it has two predecessors). Just in order to simplify
2382 following transformations in the vectorizer, we fix this situation
2383 here by adding a new (empty) block on the exit-edge of the loop,
2384 with the proper loop-exit phis to maintain loop-closed-form.
2385 If loop versioning wasn't done from loop, but scalar_loop instead,
2386 merge_bb will have already just a single successor. */
2388 merge_bb
= single_exit (loop
)->dest
;
2389 if (scalar_loop
== NULL
|| EDGE_COUNT (merge_bb
->preds
) >= 2)
2391 gcc_assert (EDGE_COUNT (merge_bb
->preds
) >= 2);
2392 new_exit_bb
= split_edge (single_exit (loop
));
2393 new_exit_e
= single_exit (loop
);
2394 e
= EDGE_SUCC (new_exit_bb
, 0);
2396 for (gsi
= gsi_start_phis (merge_bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2399 orig_phi
= gsi
.phi ();
2400 new_res
= copy_ssa_name (PHI_RESULT (orig_phi
));
2401 new_phi
= create_phi_node (new_res
, new_exit_bb
);
2402 arg
= PHI_ARG_DEF_FROM_EDGE (orig_phi
, e
);
2403 add_phi_arg (new_phi
, arg
, new_exit_e
,
2404 gimple_phi_arg_location_from_edge (orig_phi
, e
));
2405 adjust_phi_and_debug_stmts (orig_phi
, e
, PHI_RESULT (new_phi
));
2409 /* End loop-exit-fixes after versioning. */
2411 if (cond_expr_stmt_list
)
2413 cond_exp_gsi
= gsi_last_bb (condition_bb
);
2414 gsi_insert_seq_before (&cond_exp_gsi
, cond_expr_stmt_list
,
2417 update_ssa (TODO_update_ssa
);